Metadata-Version: 2.4
Name: g-gremlin
Version: 0.1.21
Summary: Little AI gremlin in your sheet.
Author: g-gremlin authors
License: PROPRIETARY SOFTWARE LICENSE
        
        Copyright (c) 2026 FoundryOps.
        All rights reserved.
        
        This software and associated documentation files (the "Software") are proprietary
        and confidential. No permission is granted to use, copy, modify, merge, publish,
        distribute, sublicense, sell, or create derivative works of the Software except
        as expressly authorized in a separate written agreement with FoundryOps.
        
        You may not reverse engineer, decompile, disassemble, or otherwise attempt to
        derive source code, trade secrets, or know-how from the Software, except where
        such restriction is prohibited by applicable law.
        
        The Software is provided "AS IS", without warranty of any kind, express or
        implied, including but not limited to warranties of merchantability, fitness
        for a particular purpose, noninfringement, and title.
        
        In no event shall FoundryOps be liable for any claim, damages, or other
        liability, whether in an action of contract, tort, or otherwise, arising from,
        out of, or in connection with the Software or the use or other dealings in the
        Software.
        
Requires-Python: >=3.11
Description-Content-Type: text/markdown
License-File: LICENSE
Requires-Dist: click>=8.1
Requires-Dist: google-auth>=2.23.0
Requires-Dist: google-auth-oauthlib>=1.2.0
Requires-Dist: google-api-python-client>=2.108.0
Requires-Dist: jinja2>=3.1.0
Requires-Dist: psutil>=5.9.0
Requires-Dist: requests>=2.31.0
Requires-Dist: httpx>=0.25.0
Requires-Dist: nimble_python>=0.10.0
Requires-Dist: pydantic>=2.7.0
Requires-Dist: pyyaml>=6.0.1
Requires-Dist: keyring>=25.0.0
Requires-Dist: asteval>=0.9.0
Requires-Dist: bitarray>=2.9.0
Requires-Dist: duckdb>=1.0.0
Requires-Dist: numpy>=1.26.0
Requires-Dist: openpyxl>=3.1.0
Requires-Dist: pandas>=2.2.0
Requires-Dist: phonenumbers>=8.13.0
Requires-Dist: pyarrow>=15.0.0
Requires-Dist: rapidfuzz>=3.6.0
Provides-Extra: desktop
Requires-Dist: chardet>=5.2.0; extra == "desktop"
Requires-Dist: concurrent-log-handler>=0.9.0; extra == "desktop"
Requires-Dist: publicsuffix2>=2.20191221; extra == "desktop"
Requires-Dist: tldextract>=5.0.0; extra == "desktop"
Requires-Dist: watchdog>=6.0.0; extra == "desktop"
Provides-Extra: query
Requires-Dist: duckdb>=1.0.0; extra == "query"
Provides-Extra: sink-bq
Requires-Dist: google-cloud-bigquery>=3.0; extra == "sink-bq"
Provides-Extra: sink-s3
Requires-Dist: boto3>=1.26; extra == "sink-s3"
Provides-Extra: sink-gcs
Requires-Dist: google-cloud-storage>=2.0; extra == "sink-gcs"
Provides-Extra: sink-parquet
Requires-Dist: pyarrow>=14.0; extra == "sink-parquet"
Provides-Extra: sink-snowflake
Requires-Dist: snowflake-connector-python>=3.0; extra == "sink-snowflake"
Provides-Extra: sink-redshift
Requires-Dist: psycopg2-binary>=2.9; extra == "sink-redshift"
Provides-Extra: sink
Requires-Dist: g-gremlin[sink-bq]; extra == "sink"
Requires-Dist: g-gremlin[sink-s3]; extra == "sink"
Requires-Dist: g-gremlin[sink-gcs]; extra == "sink"
Provides-Extra: sink-all
Requires-Dist: g-gremlin[sink]; extra == "sink-all"
Requires-Dist: g-gremlin[sink-parquet]; extra == "sink-all"
Requires-Dist: g-gremlin[sink-snowflake]; extra == "sink-all"
Requires-Dist: g-gremlin[sink-redshift]; extra == "sink-all"
Provides-Extra: docs-word
Requires-Dist: python-docx>=1.1.0; extra == "docs-word"
Provides-Extra: docs-confluence
Requires-Dist: markdown-it-py>=3.0.0; extra == "docs-confluence"
Requires-Dist: mdit-py-plugins>=0.4.0; extra == "docs-confluence"
Provides-Extra: keychain
Requires-Dist: keyring>=25.0.0; extra == "keychain"
Provides-Extra: tableau
Requires-Dist: tableauserverclient>=0.30; extra == "tableau"
Requires-Dist: pydantic>=2.7.0; extra == "tableau"
Requires-Dist: pandas>=2.2.0; extra == "tableau"
Requires-Dist: lxml>=5.0.0; extra == "tableau"
Provides-Extra: tableau-hyper
Requires-Dist: tableauhyperapi>=0.0.19; extra == "tableau-hyper"
Provides-Extra: visualize
Requires-Dist: plotly>=5.0.0; extra == "visualize"
Requires-Dist: kaleido>=0.2.0; extra == "visualize"
Requires-Dist: pandas>=2.0.0; extra == "visualize"
Provides-Extra: secrets-aws
Requires-Dist: boto3>=1.26; extra == "secrets-aws"
Provides-Extra: secrets-gcp
Requires-Dist: google-cloud-secret-manager>=2.0; extra == "secrets-gcp"
Provides-Extra: autopilot
Requires-Dist: fastapi>=0.104.0; extra == "autopilot"
Requires-Dist: uvicorn[standard]>=0.24.0; extra == "autopilot"
Requires-Dist: pydantic>=2.5.0; extra == "autopilot"
Requires-Dist: cryptography>=41.0.0; extra == "autopilot"
Requires-Dist: PyJWT>=2.10.0; extra == "autopilot"
Requires-Dist: anthropic>=0.40.0; extra == "autopilot"
Requires-Dist: slack-sdk>=3.21.0; extra == "autopilot"
Provides-Extra: mcp
Requires-Dist: mcp>=1.0.0; extra == "mcp"
Provides-Extra: autopilot-dev
Requires-Dist: g-gremlin[autopilot]; extra == "autopilot-dev"
Requires-Dist: pytest>=7.4.0; extra == "autopilot-dev"
Requires-Dist: httpx>=0.25.0; extra == "autopilot-dev"
Provides-Extra: dev
Requires-Dist: pytest>=7.4.0; extra == "dev"
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
Requires-Dist: httpx>=0.25.0; extra == "dev"
Requires-Dist: import-linter>=2.0; extra == "dev"
Provides-Extra: test
Requires-Dist: pytest>=7.4.0; extra == "test"
Requires-Dist: pytest-asyncio>=0.23.0; extra == "test"
Requires-Dist: pytest-timeout>=2.3.1; extra == "test"
Requires-Dist: pytest-cov>=5.0.0; extra == "test"
Requires-Dist: httpx>=0.25.0; extra == "test"
Requires-Dist: beautifulsoup4>=4.12.0; extra == "test"
Requires-Dist: langdetect>=1.0.9; extra == "test"
Requires-Dist: fastapi>=0.111.0; extra == "test"
Requires-Dist: pydantic>=2.7.0; extra == "test"
Requires-Dist: pydantic-settings>=2.4.0; extra == "test"
Requires-Dist: python-dotenv>=1.0.0; extra == "test"
Requires-Dist: python-multipart>=0.0.9; extra == "test"
Requires-Dist: sqlmodel>=0.0.18; extra == "test"
Requires-Dist: SQLAlchemy>=2.0; extra == "test"
Requires-Dist: aiosqlite>=0.19.0; extra == "test"
Requires-Dist: pandas>=2.2.0; extra == "test"
Requires-Dist: numpy>=1.26.0; extra == "test"
Requires-Dist: duckdb>=0.10.0; extra == "test"
Requires-Dist: pyarrow>=15.0.0; extra == "test"
Requires-Dist: rapidfuzz>=3.6.0; extra == "test"
Requires-Dist: responses>=0.25.0; extra == "test"
Requires-Dist: prometheus-client>=0.23.0; extra == "test"
Requires-Dist: redis>=5.0.0; extra == "test"
Requires-Dist: rq>=1.15.0; extra == "test"
Requires-Dist: fakeredis>=2.23.0; extra == "test"
Requires-Dist: PyJWT>=2.10.0; extra == "test"
Requires-Dist: email-validator>=2.2.0; extra == "test"
Requires-Dist: apscheduler>=3.10.0; extra == "test"
Requires-Dist: croniter>=6.0.0; extra == "test"
Requires-Dist: asteval>=0.9.0; extra == "test"
Requires-Dist: chardet>=5.2.0; extra == "test"
Requires-Dist: aiofiles>=25.0.0; extra == "test"
Requires-Dist: bitarray>=2.9.0; extra == "test"
Requires-Dist: phonenumbers>=8.13.0; extra == "test"
Requires-Dist: cryptography>=41.0.0; extra == "test"
Requires-Dist: python-jose>=3.3.0; extra == "test"
Requires-Dist: scikit-learn>=1.3.0; extra == "test"
Requires-Dist: publicsuffix2>=2.20191221; extra == "test"
Requires-Dist: boto3>=1.26.0; extra == "test"
Requires-Dist: sse-starlette>=3.0.0; extra == "test"
Requires-Dist: tldextract>=5.0.0; extra == "test"
Requires-Dist: markdown-it-py>=3.0.0; extra == "test"
Requires-Dist: mdit-py-plugins>=0.4.0; extra == "test"
Dynamic: license-file

  # g-gremlin — the little AI gremlin in your sheet

  Local-shadow CLI for Google Sheets that pulls to CSV, diffs, and pushes back with safety rails (concurrency token, formula protection, previews).

  ## Setup
  - Python 3.11+
  - From repo root: `python -m pip install --upgrade pip` then `python -m pip install -e .`
  - Obtain a Google OAuth client (Desktop app) from Google Cloud Console.
    - Set `G_GREMLIN_CREDENTIALS=/path/to/credentials.json` **or** place it at `~/.g_gremlin/credentials.json`.
  - Tokens live at `~/.g_gremlin/token.json`; delete to re-auth.

  ## Usage
  - Pull a range to CSV:
    ```bash
    g-gremlin pull SPREADSHEET_ID --range "Leads!A1:Z" --output leads.csv
    ```
  - Diff two CSVs:
    ```bash
    g-gremlin diff --original leads.csv --modified leads_edited.csv --key-col Id --limit 10
    ```
    - Summary + preview; writes `.g_gremlin/diff.json`.
- Push changes back (ETag/concurrency guarded, formula-aware):
  ```bash
  g-gremlin push --dry-run   # preview updates
  g-gremlin push             # apply updates
  ```
  - Uses `.g_gremlin/state.json` and `.g_gremlin/diff.json`.
  - Requires a concurrency token from the last pull; if missing, re-run pull.
  - Skips cells that contain formulas (warns) and aborts if the remote version changed unless `--force` is provided.
- Validate a CSV against rules:
  ```bash
  g-gremlin validate --csv account_dim.csv --rules rules/account_dim.yaml --output .g_gremlin/validate_result.json
  # or run a formula pre-flight (no rules) with allowed sheet refs:
  g-gremlin validate --csv model.csv --refs "FY26_Assumptions,FY27_Assumptions"
  ```
- Inspect (profile) a sheet without modifying it:
  ```bash
  g-gremlin inspect                # uses state.json from last pull
  g-gremlin inspect -s SPREADSHEET_ID -r "Leads!A1:E200" -o .g_gremlin/profile.json
  ```
    - Reads values and formulas, infers column types/stats, lists formula cells.
    - Captures named ranges and a basic role per column (dimension/measure/derived/unknown) to help AI tools understand inputs vs metrics vs derived outputs.
    - Writes `.g_gremlin/profile.json`; running inspect before heavy workflows helps avoid overwriting formulas during push.
    - Semantic inspect adds `semantic_type` (e.g., `sfdc_account_id`, `date_excel_serial`), optional `validation`, tab `classification`, and `detected_sfdc_patterns` to the JSON profile.
    - Use `--structure` to capture formula dependency graphs (drivers, OFFSET lags, IFERROR wrappers, cross-sheet refs).
    - Use `--lint` to scan formulas for common issues; combine with `--fix` to auto-quote bad sheet references.
- Repair inherited spreadsheets (the "broken model" rescue flow):
  ```bash
  g-gremlin repair analyze -s SPREADSHEET_ID -r "Model!A1:Z100" -o repair_plan.json
  g-gremlin repair fix --plan repair_plan.json --auto -s SPREADSHEET_ID
  g-gremlin repair fix --plan repair_plan.json -s SPREADSHEET_ID   # interactive for remaining items
  ```
  - Use `repair stub-sheets` to create placeholders for missing tabs; `repair patterns` for anomaly-only scans; `repair diff` to compare to a baseline tab/file.
  - `repair diff` accepts `--baseline` (CSV file), `--baseline-tab`, or `--baseline-sheet` and writes `.g_gremlin/repair_diff.json`.
- Send in the gremlin (hero command):
  ```bash
  g-gremlin rescue SPREADSHEET_ID \
    --band "J:M=FY26_Assumptions" \
    --band "N:Q=FY27_Assumptions" \
    --receipt
  ```
  - Scans all tabs, fixes everything it safely can, prompts only when needed (e.g., missing sheet remap vs stub), writes a JSON report to `.g_gremlin/rescue_report.json`, and a receipt to `.g_gremlin/rescues/` when requested. Flags: `--dry-run`, `--auto`, `--output FILE`, `--receipt`, `--backup/--no-backup` (placeholder), `--throttle/--no-throttle` (pacing + quota backoff; default on).
- Style a range (headers/stripes/fonts):
  ```bash
  g-gremlin style -s SPREADSHEET_ID -r "Tab!A1:Q200" --header-fill "#1f4e79" --stripe --font-size 11
  ```
  - Flags: `--header-rows`, `--header-bold/--no-header-bold`, `--header-fill`, `--header-font-color`, `--stripe/--no-stripe`, `--stripe-color`, `--font-size`, `--font-color`.
- Write a CSV directly to a tab (creates it if missing):
  ```bash
  g-gremlin write SPREADSHEET_ID --tab "Cleaned" --csv cleaned.csv --mode overwrite
  g-gremlin write SPREADSHEET_ID --tab "Cleaned" --csv delta.csv --mode append
  # with backup copy before writing:
  g-gremlin write SPREADSHEET_ID --tab "Master_Enriched" --csv master_final.csv --mode overwrite --backup
  ```
- Copy a tab safely (preserves cross-sheet references):
  ```bash
  g-gremlin copy-tab SPREADSHEET_ID --source "MH 2026" --dest "Orion_FY27_Assumptions" --overwrite
  ```
  - Merge multiple CSVs by key with optional source flags:
    ```bash
    g-gremlin merge --sources cw_opps.csv,commure_opps.csv --key "Account ID" --output merged.csv --source-flag-col Source
  ```
  - Fuzzy-match names between two files:
    ```bash
    g-gremlin fuzzy-match --source master_list.csv --source-col "Account Name" --target account_dim.csv --target-col AccountName --output matched.csv --threshold 0.8
    ```
- Quick safety + doctor checks:
  ```bash
  g-gremlin doctor SHEET_ID --live --sfdc --format json --output doctor.json
  g-gremlin safety
  ```
  - Machine output standard: `--format json` (alias `--json`) + `--output PATH` (use `-` for stdout). `--json-summary` remains a shortcut on mutating commands.
- Connect Apollo OAuth for FoundryMatch enrichment:
  ```bash
  g-gremlin apollo setup --show-redirect-urls
  g-gremlin apollo auth --workspace-id YOUR_WORKSPACE_ID
  g-gremlin apollo doctor --dev
  ```
  - Register the redirect URL printed by `g-gremlin apollo setup` in Apollo (single callback for Sheets + CLI).
  - Full setup guide: `docs/APOLLO_OAUTH_SETUP.md`.

## Recipes (agent-friendly)
- Deploy a spreadsheet model: `g-gremlin diff ...` (optional) → `g-gremlin lint ...` → `g-gremlin sync SHEET_ID --config g_gremlin.yaml` (dry-run preview) → `g-gremlin sync ... --apply` (backed up; plan hash + run ID emitted).
- Resolve accounts to SFDC + writeback: `g-gremlin pull ... --range "Accounts!A:Z" --output accounts.csv` → match domains with `g-gremlin fg search`/`fg lookup` or `g-gremlin fuzzy-match` → stage updates in CSV → `g-gremlin sfdc preview-push --csv updates.csv --object Account --id-column Id --fields "Type,Industry" --output preview` → `g-gremlin push --dry-run` → `g-gremlin push`.
- Build contacts from an account universe: export with `g-gremlin pull ... --range "Contacts!A:Z"` → enrich with `g-gremlin enrich --csv contacts.csv --company-col Company --output enriched.csv` → dedupe/merge via `g-gremlin fuzzy-match` or `merge` → link IDs in the sheet with `g-gremlin write`/`sync` (prefer dry-run first).
- Notify a webhook on completion (piped from another command):
  ```bash
  g-gremlin sync SHEET_ID --config sync.yaml --apply --json-summary \
    | g-gremlin notify webhook https://hooks.slack.com/... --apply
  ```
- Sequence contacts (Instantly, Smartlead, Apollo):
  ```bash
  g-gremlin sequencer doctor --provider apollo
  g-gremlin sequencer campaigns list --provider instantly --format json
  g-gremlin sequencer push --provider smartlead --csv leads.csv --campaign-id XYZ --apply
  ```
  - Credentials: set `G_GREMLIN_INSTANTLY_API_KEY`, `G_GREMLIN_SMARTLEAD_API_KEY`, or `G_GREMLIN_APOLLO_API_KEY` (or use `g-gremlin auth set ...`).

## Command map (everything in the CLI)
- `pull` / `write` / `push` — core read/write with concurrency + formula safety.
- `diff` / `merge` / `fuzzy-match` / `transform` — local CSV utilities.
- `inspect` / `inspect-formulas` / `find-refs` / `trace` / `check-bands` / `generate-lag` — analysis/lineage helpers.
- `repair` / `rescue` / `copy-tab` / `style` — sheet fixing and formatting.
- `sync` / `names` / `ranges` / `formula` — config-driven writes, named range, and formula helpers.
- `lint` / `validate` / `schema-validate` — linting and schema validation.
- `enrich` / `fullenrich` / `dedup` / `apify` / `hubspot` / `sfdc` / `zendesk` / `fg` / `outreach` / `gdocs` / `gslides` — enrichment, dedup, and integrations (Apify, HubSpot, Salesforce, Zendesk, FoundryGraph, Outreach, Google Docs, Google Slides).
- `apollo` — Apollo OAuth connect/check/disconnect for FoundryMatch enrichment (server-held tokens).
- `recipe` / `chain` — prebuilt flows and chained pull→enrich→transform→write (with optional sink).
- `query` — DuckDB-backed read-only SQL over CSV/Parquet.
- `sink` — land CSV data to BigQuery, S3, GCS, Snowflake, or Redshift.
- `snapshot` — create/list/diff/restore spreadsheet snapshots with rollback support.
- `notify` — job-level notifications to webhooks (Slack/Discord/custom).
- `sequencer` — push contacts to Instantly, Smartlead, or Apollo sequences.
- `provenance` — sidecar cleanup; `stats` — gremlin achievements.
- `autopilot` — localhost web UI + CLI-runnable plays (workflow templates).
- `docs` / `help` — built-in docs and JSON introspection.

## Autopilot Plays

Pre-built workflow templates that run without AI assistance. They extract data and metadata from connected systems into structured artifacts for documentation and analysis.

### Quick Start

```bash
# List available plays (shows credential status)
g-gremlin autopilot play list

# Validate a play definition
g-gremlin autopilot play validate document_cpq

# Run a play (dry-run preview)
g-gremlin autopilot play run test_echo --dry-run

# Run a play with parameters
g-gremlin autopilot play run document_cpq -p output_format=markdown

# Specify output directory
g-gremlin autopilot play run document_cpq --output-dir ./my_output
```

### Available CRM Documentation Plays

| Play | Description | Credentials |
|------|-------------|-------------|
| `document_cpq` | Document CPQ config (products, pricing, quotes) | salesforce |
| `document_lead_routing` | Document lead assignment and territories | salesforce |
| `document_case_routing` | Document case routing and entitlements | salesforce |
| `document_renewals` | Document renewal process and contracts | salesforce |
| `document_revenue_process` | Document opportunity stages and forecasting | salesforce |
| `test_echo` | Smoke test play (no external dependencies) | none |

### Output Structure

Each play run creates an artifacts directory:
```
artifacts/<play_name>/<timestamp>/
  metadata_inventory.md     # Summary of extracted data
  final_doc.md              # Documentation skeleton
  risks_and_edge_cases.md   # Automated risk flags
  run_summary.json          # Execution details + step results
  logs/                     # Per-step execution logs
  metadata/                 # Retrieved SF metadata (flows, validations)
  *.csv                     # Extracted data files
```

### Operator Prompts

Each CRM play has a matching `.prompt.md` file in `config/plays/` for AI-assisted synthesis:
1. Run the play to extract data
2. Use the operator prompt with an AI assistant to synthesize artifacts into client-ready documentation

### Documentation

- [PLAYBOOKS.md](docs/PLAYBOOKS.md) — Full user guide
- [PLAY_AUTHORING.md](docs/PLAY_AUTHORING.md) — Create custom plays
- [AUTOPILOT_SCOUTS.md](docs/AUTOPILOT_SCOUTS.md) — Scout catalog and configuration

## Gremlin Triage (Sheets add-on)
> One-click answer to “What should I do with this sheet?”

- Endpoint: `POST /api/v2/gremlin/triage` (headers: `X-Sheets-Token`), body: `{ spreadsheet_id, sheet_name?, range? }` (defaults to active sheet, `A1:Z2000`).
- Returns: deterministic summary (row/col counts, headers, SFDC ID detection, B2C/duplicate %, formula density), `suggested_actions` (2-4 routed flows), optional `ai_summary` for Pro+ tiers.
- Client entry points: Sheets add-on sidebar button “🧭 What should I do with this?” and menu: FoundryMatch → Model Intelligence → 🧭 What should I do?
- Actions route to existing flows (scan/fix/explain model, dedupe, match to accounts, SFDC enrich placeholders). Free tiers get deterministic summary; Pro/Scale/Unleashed add AI natural-language summary (Gemini).
- Model building helpers:
  ```bash
  # Pre-flight validate a CSV and allowed sheet refs
  g-gremlin validate --csv model.csv --refs "FY26_Assumptions,FY27_Assumptions"

  # Check column bands reference expected sheets
  g-gremlin check-bands --csv model.csv --band "J:M=FY26_Assumptions" --band "N:Q=FY27_Assumptions"

  # Generate lagged OFFSET formulas across a row
  g-gremlin generate-lag --source "E5:Q5" --lag 2 --output lag_row.csv
  # Copy a tab safely without breaking cross-sheet references
  g-gremlin copy-tab SPREADSHEET_ID --source "Template" --dest "Target" --overwrite
  ```
- Salesforce helpers:
  ```bash
  g-gremlin sfdc connect --org-alias canopy
  g-gremlin sfdc audit --csv account_dim.csv --id-column AccountId --object Account --report .g_gremlin/sfdc_audit.json
  g-gremlin sfdc enrich --csv account_dim.csv --id-column AccountId --object Account --fields Type,Industry --output account_enriched.csv
  # Aliasing fields:
  g-gremlin sfdc enrich --csv account_dim.csv --id-column AccountId --object Account --fields "CS_Scorecard_GPA__c AS 'CS Scorecard GPA'" --output account_enriched.csv
  # Preview SFDC changes from a CSV (does not push):
  g-gremlin sfdc preview-push --csv master_final.csv --object Account --id-column AccountId --fields Type,Industry --output sfdc_preview
  ```
  - `sfdc audit` checks ID format, prefix, duplicates, and SFDC existence; default report path is `.g_gremlin/sfdc_audit.json`.
  - `sfdc preview-push` only compares CSV vs SFDC and writes `<output>.json` and `<output>.md`; it never writes to SFDC.
  - Zendesk:
    ```bash
    g-gremlin zendesk connect --subdomain your_subdomain --email you@domain.com --api-token ****
    ```
  - Recipe: build a simple Account Dim from opp exports:
    ```bash
    g-gremlin recipe build-account-dim --opp-sources cw_opps.csv,commure_opps.csv --key-column "Account ID" --name-column "Account Name" --output account_dim.csv
    # with optional master list attach
    g-gremlin recipe build-account-dim --opp-sources cw_opps.csv,commure_opps.csv --key-column "Account ID" --name-column "Account Name" --master-list master.csv --master-name-col "Account Name" --output account_dim.csv
    # optional: specify stage/close/amount columns to compute Has_ClosedWon, ARR_Estimate, latest close/end dates
    g-gremlin recipe build-account-dim --opp-sources cw.csv,commure.csv --key-column "Account ID" --name-column "Account Name" --stage-column "StageName" --close-date-column "CloseDate" --amount-column "Amount" --output account_dim.csv
    ```
  - Recipe: enrich-from-sfdc (recipe wrapper for sfdc enrich):
    ```bash
    g-gremlin recipe enrich-from-sfdc \
      --csv account_dim.csv \
      --id-column AccountId \
      --object Account \
      --fields Type,Industry,BillingCity \
      --output account_enriched.csv
    ```
  - Recipe: resolve-unmatched (surface rows missing IDs with SOQL hints):
    ```bash
    g-gremlin recipe resolve-unmatched \
      --csv account_dim.csv \
      --id-column AccountId \
      --name-column "Original_Master_Name" \
      --output unmatched.csv
    ```
  - Recipe: detect open renewals on accounts:
    ```bash
    g-gremlin recipe detect-open-renewals \
      --csv account_dim.csv \
      --id-column AccountId \
      --renewal-col "Open Renewal?" \
      --renewal-id-col "Open Renewal ID" \
      --output account_dim_with_renewals.csv
    ```
  - Recipe: attach products from OpportunityLineItems per account:
    ```bash
    g-gremlin recipe attach-products \
      --csv account_dim_with_renewals.csv \
      --id-column AccountId \
      --products-column "Direct Products" \
      --stage "7 - Closed Won" \
      --output master_enriched.csv
    ```
  - Recipe: Zendesk user stats by email:
    ```bash
    g-gremlin recipe zendesk-user-stats \
      --csv master_enriched.csv \
      --email-column "Primary Contact Email" \
      --tickets-count-col "ZD Ticket Count" \
      --open-tickets-col "ZD Open Tickets" \
      --last-ticket-col "ZD Last Ticket At" \
      --output master_with_zd.csv
    ```
  - Recipe: Zendesk org stats by org URL/ID:
    ```bash
    g-gremlin recipe zendesk-org-stats \
      --csv master_enriched.csv \
      --org-column "Zendesk_URL" \
      --total-tickets-col "ZD_Total_Tickets" \
      --open-tickets-col "ZD_Open_Tickets" \
      --last-ticket-col "ZD_Last_Ticket_At" \
      --org-created-col "ZD_Created_At" \
      --output master_with_org_stats.csv
    ```
  - Transform a CSV (rename/add/calculated columns):
    ```bash
    g-gremlin transform --input data.csv --output data_out.csv --rename Old=New --add-column CSM_Assigned --calc "Days_To_Renewal=Latest_End_Date-TODAY"
    ```
  - Chain pull → enrich → transforms → write → sink:
    ```bash
  g-gremlin chain --spreadsheet-id SPREADSHEET_ID --pull-range "Sheet!A1:Z" \
    --enrich-object Account --enrich-fields "Type,Industry" --enrich-id-column AccountId \
    --calc "Days_To_Renewal=Latest_End_Date-45997" --add-column "CSM_Assigned" \
    --write-tab "Output_Tab"

  # Chain with sink to BigQuery
  g-gremlin chain --spreadsheet-id SPREADSHEET_ID --pull-range "Data!A1:Z" \
    --enrich-object Account --enrich-fields "Type,Industry" \
    --sink bq --sink-project myproj --sink-dataset revops --sink-table enriched
  ```
  - Chains pull → optional SFDC enrich → transforms → optional write → optional sink; default output lands at `.g_gremlin/chain/chain_output.csv`.
  - Manifest example at `.g_gremlin/state.json`:
    ```json
    {
      "spreadsheet_id": "...",
      "range": "Leads!A1:Z",
      "output_file": "leads.csv",
      "concurrency_token": "...",
      "pulled_at": "2025-01-01T12:34:56Z"
    }
    ```

  ## Example flow
  - Pull, merge two sources, fuzzy-match account names to a dimension, then write back:
    ```bash
    g-gremlin pull SPREADSHEET_ID --range "Opps!A1:Z" --output opps.csv
    g-gremlin merge --sources opps.csv,commure_opps.csv --key "Account ID" --output merged.csv --source-flag-col Source
    g-gremlin fuzzy-match --source merged.csv --source-col "Account Name" --target account_dim.csv --target-col AccountName --output matched.csv --threshold 0.82
    g-gremlin write SPREADSHEET_ID --tab "Matched" --csv matched.csv --mode overwrite
    ```
    - Extend with SFDC enrich:
    ```bash
    g-gremlin recipe build-account-dim --opp-sources opps.csv,commure_opps.csv --key-column "Account ID" --name-column "Account Name" --output account_dim.csv
    g-gremlin sfdc enrich --csv account_dim.csv --id-column "Account ID" --object Account --fields Type,Industry,BillingCity --output account_dim_enriched.csv
    g-gremlin write SPREADSHEET_ID --tab "Master_Enriched" --csv account_dim_enriched.csv --mode overwrite
    ```

## Backup & Rollback

All destructive commands support automatic backup snapshots with restore-on-error:

```bash
# Backup is ON by default for: write, push, repair fix, rescue, sync, chain, copy-tab, style
g-gremlin write SPREADSHEET_ID --tab "Data" --csv data.csv            # Creates pre-write snapshot
g-gremlin write SPREADSHEET_ID --tab "Data" --csv data.csv --no-backup  # Skip backup

# Auto-rollback for CI/agents (restores on any error)
g-gremlin push SPREADSHEET_ID --auto-rollback

# Manual snapshot management
g-gremlin snapshot create SPREADSHEET_ID --name "before-refactor" --tabs "Sheet1,Sheet2"
g-gremlin snapshot list SPREADSHEET_ID --json
g-gremlin snapshot diff SPREADSHEET_ID --name "before-refactor"
g-gremlin snapshot restore SPREADSHEET_ID --name "before-refactor"
g-gremlin snapshot restore SPREADSHEET_ID --name "before-refactor" --force  # Override safety checks
```

**Safety checks on restore:**
- **ETag mismatch**: Blocked if spreadsheet modified since snapshot
- **Shape drift**: Blocked if rows/columns changed
- Use `--force` to override (data may be lost)

## Data Sink (`g-gremlin sink`)

Land CSV data to external systems:

```bash
# BigQuery
g-gremlin sink bq --csv accounts.csv --project myproj --dataset revops --table accounts --mode replace

# S3 / GCS
g-gremlin sink s3 --csv data.csv --bucket my-bucket --prefix exports/2025/ --compress
g-gremlin sink gcs --csv data.csv --bucket my-bucket --prefix exports/2025/ --compress

# Snowflake (via S3/GCS staging)
g-gremlin sink snowflake --csv accounts.csv \
  --stage-bucket my-staging --database ANALYTICS --schema REVOPS --table ACCOUNTS

# Redshift (via S3 staging)
g-gremlin sink redshift --csv accounts.csv \
  --stage-bucket my-staging --host cluster.redshift.amazonaws.com \
  --database analytics --schema revops --table accounts \
  --iam-role arn:aws:iam::123456789:role/RedshiftCopyRole

# Cleanup old staging files
g-gremlin sink cleanup --older-than 7 --delete-from-cloud
```

**Optional dependencies:**
```bash
pip install g-gremlin[sink-bq]        # BigQuery
pip install g-gremlin[sink-s3]        # S3 (boto3)
pip install g-gremlin[sink-gcs]       # GCS
pip install g-gremlin[sink-snowflake] # Snowflake
pip install g-gremlin[sink-redshift]  # Redshift (psycopg2)
pip install g-gremlin[sink-all]       # All sinks
```

## Notes
- Dependencies: click, google-auth, google-auth-oauthlib, google-api-python-client, requests, pyyaml.

## Close the GTM Loop (Common Room → HeyReach → Docs)

- New integrations: Common Room (CSV ingest now; webhooks later), HeyReach activation, docs sinks (Notion + Outline + Confluence). They are independent entry points; use the ones you need.
- Safety: All mutation commands are dry-run by default; add `--apply` to execute. Defaults are conservative (HeyReach 10/batch, 500ms delay, 100/day; Notion 3 req/sec built-in; Outline honors `Retry-After` on 429s).
- Notion updates replace the existing page body to keep re-publish idempotent. Large pages can update slowly because Notion requires per-block deletes, and if a replace fails mid-flight the next publish will repair the page body.
- Quick start (pick any provider):
  ```bash
  g-gremlin auth set commonroom --key webhook_secret
  g-gremlin commonroom doctor
  g-gremlin commonroom import --source signals.csv --output enriched.csv --min-intent-score 0.6 --apply

  g-gremlin auth set heyreach --key api_key
  g-gremlin heyreach doctor
  g-gremlin heyreach enroll --csv enriched.csv --campaign-id ABC123 --apply

  g-gremlin auth set notion --key token
  g-gremlin docs doctor --provider notion
  g-gremlin docs publish-batch --csv enriched.csv --provider notion --database-id XYZ789 --template src/g_gremlin/templates/intent-brief.md.j2 --external-id-column signal_id --apply --profile prod

  g-gremlin auth set outline --key api_token
  g-gremlin auth set outline --key api_base --value https://wiki.example.com/api
  g-gremlin docs doctor --provider outline
  g-gremlin docs publish-batch --csv enriched.csv --provider outline --collection-id COLLECTION_ID --template src/g_gremlin/templates/intent-brief.md.j2 --external-id-column signal_id --apply --profile prod

  g-gremlin auth set confluence --key api_base --value https://acme.atlassian.net
  g-gremlin auth set confluence --key email --value user@acme.com
  g-gremlin auth set confluence --key api_token --value $CONFLUENCE_TOKEN
  g-gremlin docs doctor --provider confluence
  g-gremlin docs publish --provider confluence --markdown-file plan.md --space-key SPACE --apply
  ```
- Outline auth keys: `api_token`, `api_base` (e.g. `https://wiki.example.com/api`), optional `default_collection_id`.
- Confluence auth keys: `api_base`, `email`, `api_token`, optional `default_space_key`, `default_parent_id`.
  Install with: `pip install g-gremlin[docs-confluence]`
- Confluence sugar: blockquotes → panels, `[STATUS: ON TRACK]` → status lozenge, `::: expand Title` blocks.
- Optional chaining can come later via recipes; today each command stands alone. Deferred: Sheets writeback, Common Room webhook endpoint, richer template gallery.
