Metadata-Version: 2.4
Name: g-gremlin
Version: 0.1.8
Summary: Little AI gremlin in your sheet.
Author: g-gremlin authors
License: MIT
Requires-Python: >=3.11
Description-Content-Type: text/markdown
License-File: license_rules_export.csv
Requires-Dist: click>=8.1
Requires-Dist: google-auth>=2.23.0
Requires-Dist: google-auth-oauthlib>=1.2.0
Requires-Dist: google-api-python-client>=2.108.0
Requires-Dist: jinja2>=3.1.0
Requires-Dist: psutil>=5.9.0
Requires-Dist: requests>=2.31.0
Requires-Dist: pyyaml>=6.0.1
Requires-Dist: keyring>=25.0.0
Provides-Extra: query
Requires-Dist: duckdb>=1.0.0; extra == "query"
Provides-Extra: sink-bq
Requires-Dist: google-cloud-bigquery>=3.0; extra == "sink-bq"
Provides-Extra: sink-s3
Requires-Dist: boto3>=1.26; extra == "sink-s3"
Provides-Extra: sink-gcs
Requires-Dist: google-cloud-storage>=2.0; extra == "sink-gcs"
Provides-Extra: sink-parquet
Requires-Dist: pyarrow>=14.0; extra == "sink-parquet"
Provides-Extra: sink-snowflake
Requires-Dist: snowflake-connector-python>=3.0; extra == "sink-snowflake"
Provides-Extra: sink-redshift
Requires-Dist: psycopg2-binary>=2.9; extra == "sink-redshift"
Provides-Extra: sink
Requires-Dist: g-gremlin[sink-bq]; extra == "sink"
Requires-Dist: g-gremlin[sink-s3]; extra == "sink"
Requires-Dist: g-gremlin[sink-gcs]; extra == "sink"
Provides-Extra: sink-all
Requires-Dist: g-gremlin[sink]; extra == "sink-all"
Requires-Dist: g-gremlin[sink-parquet]; extra == "sink-all"
Requires-Dist: g-gremlin[sink-snowflake]; extra == "sink-all"
Requires-Dist: g-gremlin[sink-redshift]; extra == "sink-all"
Provides-Extra: docs-word
Requires-Dist: python-docx>=1.1.0; extra == "docs-word"
Provides-Extra: docs-confluence
Requires-Dist: markdown-it-py>=3.0.0; extra == "docs-confluence"
Requires-Dist: mdit-py-plugins>=0.4.0; extra == "docs-confluence"
Provides-Extra: keychain
Requires-Dist: keyring>=25.0.0; extra == "keychain"
Provides-Extra: tableau
Requires-Dist: tableauserverclient>=0.30; extra == "tableau"
Requires-Dist: pydantic>=2.7.0; extra == "tableau"
Requires-Dist: pandas>=2.2.0; extra == "tableau"
Requires-Dist: lxml>=5.0.0; extra == "tableau"
Provides-Extra: tableau-hyper
Requires-Dist: tableauhyperapi>=0.0.19; extra == "tableau-hyper"
Provides-Extra: visualize
Requires-Dist: plotly>=5.0.0; extra == "visualize"
Requires-Dist: kaleido>=0.2.0; extra == "visualize"
Requires-Dist: pandas>=2.0.0; extra == "visualize"
Provides-Extra: secrets-aws
Requires-Dist: boto3>=1.26; extra == "secrets-aws"
Provides-Extra: secrets-gcp
Requires-Dist: google-cloud-secret-manager>=2.0; extra == "secrets-gcp"
Provides-Extra: autopilot
Requires-Dist: fastapi>=0.104.0; extra == "autopilot"
Requires-Dist: uvicorn[standard]>=0.24.0; extra == "autopilot"
Requires-Dist: pydantic>=2.5.0; extra == "autopilot"
Requires-Dist: anthropic>=0.40.0; extra == "autopilot"
Provides-Extra: autopilot-dev
Requires-Dist: g-gremlin[autopilot]; extra == "autopilot-dev"
Requires-Dist: pytest>=7.4.0; extra == "autopilot-dev"
Requires-Dist: httpx>=0.25.0; extra == "autopilot-dev"
Provides-Extra: dev
Requires-Dist: pytest>=7.4.0; extra == "dev"
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
Requires-Dist: httpx>=0.25.0; extra == "dev"
Requires-Dist: import-linter>=2.0; extra == "dev"
Provides-Extra: test
Requires-Dist: pytest>=7.4.0; extra == "test"
Requires-Dist: pytest-asyncio>=0.23.0; extra == "test"
Requires-Dist: pytest-timeout>=2.3.1; extra == "test"
Requires-Dist: httpx>=0.25.0; extra == "test"
Requires-Dist: beautifulsoup4>=4.12.0; extra == "test"
Requires-Dist: langdetect>=1.0.9; extra == "test"
Requires-Dist: fastapi>=0.111.0; extra == "test"
Requires-Dist: pydantic>=2.7.0; extra == "test"
Requires-Dist: pydantic-settings>=2.4.0; extra == "test"
Requires-Dist: python-dotenv>=1.0.0; extra == "test"
Requires-Dist: python-multipart>=0.0.9; extra == "test"
Requires-Dist: sqlmodel>=0.0.18; extra == "test"
Requires-Dist: SQLAlchemy>=2.0; extra == "test"
Requires-Dist: aiosqlite>=0.19.0; extra == "test"
Requires-Dist: pandas>=2.2.0; extra == "test"
Requires-Dist: numpy>=1.26.0; extra == "test"
Requires-Dist: duckdb>=0.10.0; extra == "test"
Requires-Dist: pyarrow>=15.0.0; extra == "test"
Requires-Dist: rapidfuzz>=3.6.0; extra == "test"
Requires-Dist: responses>=0.25.0; extra == "test"
Requires-Dist: prometheus-client>=0.23.0; extra == "test"
Requires-Dist: redis>=5.0.0; extra == "test"
Requires-Dist: rq>=1.15.0; extra == "test"
Requires-Dist: fakeredis>=2.23.0; extra == "test"
Requires-Dist: PyJWT>=2.10.0; extra == "test"
Requires-Dist: email-validator>=2.2.0; extra == "test"
Requires-Dist: apscheduler>=3.10.0; extra == "test"
Requires-Dist: croniter>=6.0.0; extra == "test"
Requires-Dist: asteval>=0.9.0; extra == "test"
Requires-Dist: chardet>=5.2.0; extra == "test"
Requires-Dist: aiofiles>=25.0.0; extra == "test"
Requires-Dist: bitarray>=2.9.0; extra == "test"
Requires-Dist: phonenumbers>=8.13.0; extra == "test"
Requires-Dist: cryptography>=41.0.0; extra == "test"
Requires-Dist: python-jose>=3.3.0; extra == "test"
Requires-Dist: scikit-learn>=1.3.0; extra == "test"
Requires-Dist: publicsuffix2>=2.20191221; extra == "test"
Requires-Dist: boto3>=1.26.0; extra == "test"
Requires-Dist: sse-starlette>=3.0.0; extra == "test"
Requires-Dist: tldextract>=5.0.0; extra == "test"
Dynamic: license-file

  # g-gremlin — the little AI gremlin in your sheet

  Local-shadow CLI for Google Sheets that pulls to CSV, diffs, and pushes back with safety rails (concurrency token, formula protection, previews).

  ## Setup
  - Python 3.11+
  - From repo root: `python -m pip install --upgrade pip` then `python -m pip install -e .`
  - Obtain a Google OAuth client (Desktop app) from Google Cloud Console.
    - Set `G_GREMLIN_CREDENTIALS=/path/to/credentials.json` **or** place it at `~/.g_gremlin/credentials.json`.
  - Tokens live at `~/.g_gremlin/token.json`; delete to re-auth.

  ## Usage
  - Pull a range to CSV:
    ```bash
    g-gremlin pull SPREADSHEET_ID --range "Leads!A1:Z" --output leads.csv
    ```
  - Diff two CSVs:
    ```bash
    g-gremlin diff --original leads.csv --modified leads_edited.csv --key-col Id --limit 10
    ```
    - Summary + preview; writes `.g_gremlin/diff.json`.
- Push changes back (ETag/concurrency guarded, formula-aware):
  ```bash
  g-gremlin push --dry-run   # preview updates
  g-gremlin push             # apply updates
  ```
  - Uses `.g_gremlin/state.json` and `.g_gremlin/diff.json`.
  - Requires a concurrency token from the last pull; if missing, re-run pull.
  - Skips cells that contain formulas (warns) and aborts if the remote version changed unless `--force` is provided.
- Validate a CSV against rules:
  ```bash
  g-gremlin validate --csv account_dim.csv --rules rules/account_dim.yaml --output .g_gremlin/validate_result.json
  # or run a formula pre-flight (no rules) with allowed sheet refs:
  g-gremlin validate --csv model.csv --refs "FY26_Assumptions,FY27_Assumptions"
  ```
- Inspect (profile) a sheet without modifying it:
  ```bash
  g-gremlin inspect                # uses state.json from last pull
  g-gremlin inspect -s SPREADSHEET_ID -r "Leads!A1:E200" -o .g_gremlin/profile.json
  ```
    - Reads values and formulas, infers column types/stats, lists formula cells.
    - Captures named ranges and a basic role per column (dimension/measure/derived/unknown) to help AI tools understand inputs vs metrics vs derived outputs.
    - Writes `.g_gremlin/profile.json`; running inspect before heavy workflows helps avoid overwriting formulas during push.
    - Semantic inspect adds `semantic_type` (e.g., `sfdc_account_id`, `date_excel_serial`), optional `validation`, tab `classification`, and `detected_sfdc_patterns` to the JSON profile.
    - Use `--structure` to capture formula dependency graphs (drivers, OFFSET lags, IFERROR wrappers, cross-sheet refs).
    - Use `--lint` to scan formulas for common issues; combine with `--fix` to auto-quote bad sheet references.
- Repair inherited spreadsheets (the "broken model" rescue flow):
  ```bash
  g-gremlin repair analyze -s SPREADSHEET_ID -r "Model!A1:Z100" -o repair_plan.json
  g-gremlin repair fix --plan repair_plan.json --auto -s SPREADSHEET_ID
  g-gremlin repair fix --plan repair_plan.json -s SPREADSHEET_ID   # interactive for remaining items
  ```
  - Use `repair stub-sheets` to create placeholders for missing tabs; `repair patterns` for anomaly-only scans; `repair diff` to compare to a baseline tab/file.
  - `repair diff` accepts `--baseline` (CSV file), `--baseline-tab`, or `--baseline-sheet` and writes `.g_gremlin/repair_diff.json`.
- Send in the gremlin (hero command):
  ```bash
  g-gremlin rescue SPREADSHEET_ID \
    --band "J:M=FY26_Assumptions" \
    --band "N:Q=FY27_Assumptions" \
    --receipt
  ```
  - Scans all tabs, fixes everything it safely can, prompts only when needed (e.g., missing sheet remap vs stub), writes a JSON report to `.g_gremlin/rescue_report.json`, and a receipt to `.g_gremlin/rescues/` when requested. Flags: `--dry-run`, `--auto`, `--output FILE`, `--receipt`, `--backup/--no-backup` (placeholder), `--throttle/--no-throttle` (pacing + quota backoff; default on).
- Style a range (headers/stripes/fonts):
  ```bash
  g-gremlin style -s SPREADSHEET_ID -r "Tab!A1:Q200" --header-fill "#1f4e79" --stripe --font-size 11
  ```
  - Flags: `--header-rows`, `--header-bold/--no-header-bold`, `--header-fill`, `--header-font-color`, `--stripe/--no-stripe`, `--stripe-color`, `--font-size`, `--font-color`.
- Write a CSV directly to a tab (creates it if missing):
  ```bash
  g-gremlin write SPREADSHEET_ID --tab "Cleaned" --csv cleaned.csv --mode overwrite
  g-gremlin write SPREADSHEET_ID --tab "Cleaned" --csv delta.csv --mode append
  # with backup copy before writing:
  g-gremlin write SPREADSHEET_ID --tab "Master_Enriched" --csv master_final.csv --mode overwrite --backup
  ```
- Copy a tab safely (preserves cross-sheet references):
  ```bash
  g-gremlin copy-tab SPREADSHEET_ID --source "MH 2026" --dest "Orion_FY27_Assumptions" --overwrite
  ```
  - Merge multiple CSVs by key with optional source flags:
    ```bash
    g-gremlin merge --sources cw_opps.csv,commure_opps.csv --key "Account ID" --output merged.csv --source-flag-col Source
  ```
  - Fuzzy-match names between two files:
    ```bash
    g-gremlin fuzzy-match --source master_list.csv --source-col "Account Name" --target account_dim.csv --target-col AccountName --output matched.csv --threshold 0.8
    ```
- Quick safety + doctor checks:
  ```bash
  g-gremlin doctor SHEET_ID --live --sfdc --format json --output doctor.json
  g-gremlin safety
  ```
  - Machine output standard: `--format json` (alias `--json`) + `--output PATH` (use `-` for stdout). `--json-summary` remains a shortcut on mutating commands.
- Connect Apollo OAuth for FoundryMatch enrichment:
  ```bash
  g-gremlin apollo setup --show-redirect-urls
  g-gremlin apollo auth --workspace-id YOUR_WORKSPACE_ID
  g-gremlin apollo doctor --dev
  ```
  - Register the redirect URL printed by `g-gremlin apollo setup` in Apollo (single callback for Sheets + CLI).
  - Full setup guide: `docs/APOLLO_OAUTH_SETUP.md`.

## Recipes (agent-friendly)
- Deploy a spreadsheet model: `g-gremlin diff ...` (optional) → `g-gremlin lint ...` → `g-gremlin sync SHEET_ID --config g_gremlin.yaml` (dry-run preview) → `g-gremlin sync ... --apply` (backed up; plan hash + run ID emitted).
- Resolve accounts to SFDC + writeback: `g-gremlin pull ... --range "Accounts!A:Z" --output accounts.csv` → match domains with `g-gremlin fg search`/`fg lookup` or `g-gremlin fuzzy-match` → stage updates in CSV → `g-gremlin sfdc preview-push --csv updates.csv --object Account --id-column Id --fields "Type,Industry" --output preview` → `g-gremlin push --dry-run` → `g-gremlin push`.
- Build contacts from an account universe: export with `g-gremlin pull ... --range "Contacts!A:Z"` → enrich with `g-gremlin enrich --csv contacts.csv --company-col Company --output enriched.csv` → dedupe/merge via `g-gremlin fuzzy-match` or `merge` → link IDs in the sheet with `g-gremlin write`/`sync` (prefer dry-run first).
- Notify a webhook on completion (piped from another command):
  ```bash
  g-gremlin sync SHEET_ID --config sync.yaml --apply --json-summary \
    | g-gremlin notify webhook https://hooks.slack.com/... --apply
  ```
- Sequence contacts (Instantly, Smartlead, Apollo):
  ```bash
  g-gremlin sequencer doctor --provider apollo
  g-gremlin sequencer campaigns list --provider instantly --format json
  g-gremlin sequencer push --provider smartlead --csv leads.csv --campaign-id XYZ --apply
  ```
  - Credentials: set `G_GREMLIN_INSTANTLY_API_KEY`, `G_GREMLIN_SMARTLEAD_API_KEY`, or `G_GREMLIN_APOLLO_API_KEY` (or use `g-gremlin auth set ...`).

## Command map (everything in the CLI)
- `pull` / `write` / `push` — core read/write with concurrency + formula safety.
- `diff` / `merge` / `fuzzy-match` / `transform` — local CSV utilities.
- `inspect` / `inspect-formulas` / `find-refs` / `trace` / `check-bands` / `generate-lag` — analysis/lineage helpers.
- `repair` / `rescue` / `copy-tab` / `style` — sheet fixing and formatting.
- `sync` / `names` / `ranges` / `formula` — config-driven writes, named range, and formula helpers.
- `lint` / `validate` / `schema-validate` — linting and schema validation.
- `enrich` / `fullenrich` / `dedup` / `apify` / `hubspot` / `sfdc` / `zendesk` / `fg` / `outreach` / `gdocs` / `gslides` — enrichment, dedup, and integrations (Apify, HubSpot, Salesforce, Zendesk, FoundryGraph, Outreach, Google Docs, Google Slides).
- `apollo` — Apollo OAuth connect/check/disconnect for FoundryMatch enrichment (server-held tokens).
- `recipe` / `chain` — prebuilt flows and chained pull→enrich→transform→write (with optional sink).
- `query` — DuckDB-backed read-only SQL over CSV/Parquet.
- `sink` — land CSV data to BigQuery, S3, GCS, Snowflake, or Redshift.
- `snapshot` — create/list/diff/restore spreadsheet snapshots with rollback support.
- `notify` — job-level notifications to webhooks (Slack/Discord/custom).
- `sequencer` — push contacts to Instantly, Smartlead, or Apollo sequences.
- `provenance` — sidecar cleanup; `stats` — gremlin achievements.
- `autopilot` — localhost web UI + CLI-runnable plays (workflow templates).
- `docs` / `help` — built-in docs and JSON introspection.

## Autopilot Plays

Pre-built workflow templates that run without AI assistance. They extract data and metadata from connected systems into structured artifacts for documentation and analysis.

### Quick Start

```bash
# List available plays (shows credential status)
g-gremlin autopilot play list

# Validate a play definition
g-gremlin autopilot play validate document_cpq

# Run a play (dry-run preview)
g-gremlin autopilot play run test_echo --dry-run

# Run a play with parameters
g-gremlin autopilot play run document_cpq -p output_format=markdown

# Specify output directory
g-gremlin autopilot play run document_cpq --output-dir ./my_output
```

### Available CRM Documentation Plays

| Play | Description | Credentials |
|------|-------------|-------------|
| `document_cpq` | Document CPQ config (products, pricing, quotes) | salesforce |
| `document_lead_routing` | Document lead assignment and territories | salesforce |
| `document_case_routing` | Document case routing and entitlements | salesforce |
| `document_renewals` | Document renewal process and contracts | salesforce |
| `document_revenue_process` | Document opportunity stages and forecasting | salesforce |
| `test_echo` | Smoke test play (no external dependencies) | none |

### Output Structure

Each play run creates an artifacts directory:
```
artifacts/<play_name>/<timestamp>/
  metadata_inventory.md     # Summary of extracted data
  final_doc.md              # Documentation skeleton
  risks_and_edge_cases.md   # Automated risk flags
  run_summary.json          # Execution details + step results
  logs/                     # Per-step execution logs
  metadata/                 # Retrieved SF metadata (flows, validations)
  *.csv                     # Extracted data files
```

### Operator Prompts

Each CRM play has a matching `.prompt.md` file in `config/plays/` for AI-assisted synthesis:
1. Run the play to extract data
2. Use the operator prompt with an AI assistant to synthesize artifacts into client-ready documentation

### Documentation

- [PLAYBOOKS.md](docs/PLAYBOOKS.md) — Full user guide
- [PLAY_AUTHORING.md](docs/PLAY_AUTHORING.md) — Create custom plays
- [AUTOPILOT_SCOUTS.md](docs/AUTOPILOT_SCOUTS.md) — Scout catalog and configuration

## Gremlin Triage (Sheets add-on)
> One-click answer to “What should I do with this sheet?”

- Endpoint: `POST /api/v2/gremlin/triage` (headers: `X-Sheets-Token`), body: `{ spreadsheet_id, sheet_name?, range? }` (defaults to active sheet, `A1:Z2000`).
- Returns: deterministic summary (row/col counts, headers, SFDC ID detection, B2C/duplicate %, formula density), `suggested_actions` (2-4 routed flows), optional `ai_summary` for Pro+ tiers.
- Client entry points: Sheets add-on sidebar button “🧭 What should I do with this?” and menu: FoundryMatch → Model Intelligence → 🧭 What should I do?
- Actions route to existing flows (scan/fix/explain model, dedupe, match to accounts, SFDC enrich placeholders). Free tiers get deterministic summary; Pro/Scale/Unleashed add AI natural-language summary (Gemini).
- Model building helpers:
  ```bash
  # Pre-flight validate a CSV and allowed sheet refs
  g-gremlin validate --csv model.csv --refs "FY26_Assumptions,FY27_Assumptions"

  # Check column bands reference expected sheets
  g-gremlin check-bands --csv model.csv --band "J:M=FY26_Assumptions" --band "N:Q=FY27_Assumptions"

  # Generate lagged OFFSET formulas across a row
  g-gremlin generate-lag --source "E5:Q5" --lag 2 --output lag_row.csv
  # Copy a tab safely without breaking cross-sheet references
  g-gremlin copy-tab SPREADSHEET_ID --source "Template" --dest "Target" --overwrite
  ```
- Salesforce helpers:
  ```bash
  g-gremlin sfdc connect --org-alias canopy
  g-gremlin sfdc audit --csv account_dim.csv --id-column AccountId --object Account --report .g_gremlin/sfdc_audit.json
  g-gremlin sfdc enrich --csv account_dim.csv --id-column AccountId --object Account --fields Type,Industry --output account_enriched.csv
  # Aliasing fields:
  g-gremlin sfdc enrich --csv account_dim.csv --id-column AccountId --object Account --fields "CS_Scorecard_GPA__c AS 'CS Scorecard GPA'" --output account_enriched.csv
  # Preview SFDC changes from a CSV (does not push):
  g-gremlin sfdc preview-push --csv master_final.csv --object Account --id-column AccountId --fields Type,Industry --output sfdc_preview
  ```
  - `sfdc audit` checks ID format, prefix, duplicates, and SFDC existence; default report path is `.g_gremlin/sfdc_audit.json`.
  - `sfdc preview-push` only compares CSV vs SFDC and writes `<output>.json` and `<output>.md`; it never writes to SFDC.
  - Zendesk:
    ```bash
    g-gremlin zendesk connect --subdomain your_subdomain --email you@domain.com --api-token ****
    ```
  - Recipe: build a simple Account Dim from opp exports:
    ```bash
    g-gremlin recipe build-account-dim --opp-sources cw_opps.csv,commure_opps.csv --key-column "Account ID" --name-column "Account Name" --output account_dim.csv
    # with optional master list attach
    g-gremlin recipe build-account-dim --opp-sources cw_opps.csv,commure_opps.csv --key-column "Account ID" --name-column "Account Name" --master-list master.csv --master-name-col "Account Name" --output account_dim.csv
    # optional: specify stage/close/amount columns to compute Has_ClosedWon, ARR_Estimate, latest close/end dates
    g-gremlin recipe build-account-dim --opp-sources cw.csv,commure.csv --key-column "Account ID" --name-column "Account Name" --stage-column "StageName" --close-date-column "CloseDate" --amount-column "Amount" --output account_dim.csv
    ```
  - Recipe: enrich-from-sfdc (recipe wrapper for sfdc enrich):
    ```bash
    g-gremlin recipe enrich-from-sfdc \
      --csv account_dim.csv \
      --id-column AccountId \
      --object Account \
      --fields Type,Industry,BillingCity \
      --output account_enriched.csv
    ```
  - Recipe: resolve-unmatched (surface rows missing IDs with SOQL hints):
    ```bash
    g-gremlin recipe resolve-unmatched \
      --csv account_dim.csv \
      --id-column AccountId \
      --name-column "Original_Master_Name" \
      --output unmatched.csv
    ```
  - Recipe: detect open renewals on accounts:
    ```bash
    g-gremlin recipe detect-open-renewals \
      --csv account_dim.csv \
      --id-column AccountId \
      --renewal-col "Open Renewal?" \
      --renewal-id-col "Open Renewal ID" \
      --output account_dim_with_renewals.csv
    ```
  - Recipe: attach products from OpportunityLineItems per account:
    ```bash
    g-gremlin recipe attach-products \
      --csv account_dim_with_renewals.csv \
      --id-column AccountId \
      --products-column "Direct Products" \
      --stage "7 - Closed Won" \
      --output master_enriched.csv
    ```
  - Recipe: Zendesk user stats by email:
    ```bash
    g-gremlin recipe zendesk-user-stats \
      --csv master_enriched.csv \
      --email-column "Primary Contact Email" \
      --tickets-count-col "ZD Ticket Count" \
      --open-tickets-col "ZD Open Tickets" \
      --last-ticket-col "ZD Last Ticket At" \
      --output master_with_zd.csv
    ```
  - Recipe: Zendesk org stats by org URL/ID:
    ```bash
    g-gremlin recipe zendesk-org-stats \
      --csv master_enriched.csv \
      --org-column "Zendesk_URL" \
      --total-tickets-col "ZD_Total_Tickets" \
      --open-tickets-col "ZD_Open_Tickets" \
      --last-ticket-col "ZD_Last_Ticket_At" \
      --org-created-col "ZD_Created_At" \
      --output master_with_org_stats.csv
    ```
  - Transform a CSV (rename/add/calculated columns):
    ```bash
    g-gremlin transform --input data.csv --output data_out.csv --rename Old=New --add-column CSM_Assigned --calc "Days_To_Renewal=Latest_End_Date-TODAY"
    ```
  - Chain pull → enrich → transforms → write → sink:
    ```bash
  g-gremlin chain --spreadsheet-id SPREADSHEET_ID --pull-range "Sheet!A1:Z" \
    --enrich-object Account --enrich-fields "Type,Industry" --enrich-id-column AccountId \
    --calc "Days_To_Renewal=Latest_End_Date-45997" --add-column "CSM_Assigned" \
    --write-tab "Output_Tab"

  # Chain with sink to BigQuery
  g-gremlin chain --spreadsheet-id SPREADSHEET_ID --pull-range "Data!A1:Z" \
    --enrich-object Account --enrich-fields "Type,Industry" \
    --sink bq --sink-project myproj --sink-dataset revops --sink-table enriched
  ```
  - Chains pull → optional SFDC enrich → transforms → optional write → optional sink; default output lands at `.g_gremlin/chain/chain_output.csv`.
  - Manifest example at `.g_gremlin/state.json`:
    ```json
    {
      "spreadsheet_id": "...",
      "range": "Leads!A1:Z",
      "output_file": "leads.csv",
      "concurrency_token": "...",
      "pulled_at": "2025-01-01T12:34:56Z"
    }
    ```

  ## Example flow
  - Pull, merge two sources, fuzzy-match account names to a dimension, then write back:
    ```bash
    g-gremlin pull SPREADSHEET_ID --range "Opps!A1:Z" --output opps.csv
    g-gremlin merge --sources opps.csv,commure_opps.csv --key "Account ID" --output merged.csv --source-flag-col Source
    g-gremlin fuzzy-match --source merged.csv --source-col "Account Name" --target account_dim.csv --target-col AccountName --output matched.csv --threshold 0.82
    g-gremlin write SPREADSHEET_ID --tab "Matched" --csv matched.csv --mode overwrite
    ```
    - Extend with SFDC enrich:
    ```bash
    g-gremlin recipe build-account-dim --opp-sources opps.csv,commure_opps.csv --key-column "Account ID" --name-column "Account Name" --output account_dim.csv
    g-gremlin sfdc enrich --csv account_dim.csv --id-column "Account ID" --object Account --fields Type,Industry,BillingCity --output account_dim_enriched.csv
    g-gremlin write SPREADSHEET_ID --tab "Master_Enriched" --csv account_dim_enriched.csv --mode overwrite
    ```

## Backup & Rollback

All destructive commands support automatic backup snapshots with restore-on-error:

```bash
# Backup is ON by default for: write, push, repair fix, rescue, sync, chain, copy-tab, style
g-gremlin write SPREADSHEET_ID --tab "Data" --csv data.csv            # Creates pre-write snapshot
g-gremlin write SPREADSHEET_ID --tab "Data" --csv data.csv --no-backup  # Skip backup

# Auto-rollback for CI/agents (restores on any error)
g-gremlin push SPREADSHEET_ID --auto-rollback

# Manual snapshot management
g-gremlin snapshot create SPREADSHEET_ID --name "before-refactor" --tabs "Sheet1,Sheet2"
g-gremlin snapshot list SPREADSHEET_ID --json
g-gremlin snapshot diff SPREADSHEET_ID --name "before-refactor"
g-gremlin snapshot restore SPREADSHEET_ID --name "before-refactor"
g-gremlin snapshot restore SPREADSHEET_ID --name "before-refactor" --force  # Override safety checks
```

**Safety checks on restore:**
- **ETag mismatch**: Blocked if spreadsheet modified since snapshot
- **Shape drift**: Blocked if rows/columns changed
- Use `--force` to override (data may be lost)

## Data Sink (`g-gremlin sink`)

Land CSV data to external systems:

```bash
# BigQuery
g-gremlin sink bq --csv accounts.csv --project myproj --dataset revops --table accounts --mode replace

# S3 / GCS
g-gremlin sink s3 --csv data.csv --bucket my-bucket --prefix exports/2025/ --compress
g-gremlin sink gcs --csv data.csv --bucket my-bucket --prefix exports/2025/ --compress

# Snowflake (via S3/GCS staging)
g-gremlin sink snowflake --csv accounts.csv \
  --stage-bucket my-staging --database ANALYTICS --schema REVOPS --table ACCOUNTS

# Redshift (via S3 staging)
g-gremlin sink redshift --csv accounts.csv \
  --stage-bucket my-staging --host cluster.redshift.amazonaws.com \
  --database analytics --schema revops --table accounts \
  --iam-role arn:aws:iam::123456789:role/RedshiftCopyRole

# Cleanup old staging files
g-gremlin sink cleanup --older-than 7 --delete-from-cloud
```

**Optional dependencies:**
```bash
pip install g-gremlin[sink-bq]        # BigQuery
pip install g-gremlin[sink-s3]        # S3 (boto3)
pip install g-gremlin[sink-gcs]       # GCS
pip install g-gremlin[sink-snowflake] # Snowflake
pip install g-gremlin[sink-redshift]  # Redshift (psycopg2)
pip install g-gremlin[sink-all]       # All sinks
```

## Notes
- Dependencies: click, google-auth, google-auth-oauthlib, google-api-python-client, requests, pyyaml.

## Close the GTM Loop (Common Room → HeyReach → Docs)

- New integrations: Common Room (CSV ingest now; webhooks later), HeyReach activation, docs sinks (Notion + Outline + Confluence). They are independent entry points; use the ones you need.
- Safety: All mutation commands are dry-run by default; add `--apply` to execute. Defaults are conservative (HeyReach 10/batch, 500ms delay, 100/day; Notion 3 req/sec built-in; Outline honors `Retry-After` on 429s).
- Quick start (pick any provider):
  ```bash
  g-gremlin auth set commonroom --key webhook_secret
  g-gremlin commonroom doctor
  g-gremlin commonroom import --source signals.csv --output enriched.csv --min-intent-score 0.6 --apply

  g-gremlin auth set heyreach --key api_key
  g-gremlin heyreach doctor
  g-gremlin heyreach enroll --csv enriched.csv --campaign-id ABC123 --apply

  g-gremlin auth set notion --key token
  g-gremlin docs doctor --provider notion
  g-gremlin docs publish-batch --csv enriched.csv --provider notion --database-id XYZ789 --template src/g_gremlin/templates/intent-brief.md.j2 --external-id-column signal_id --apply --profile prod

  g-gremlin auth set outline --key api_token
  g-gremlin auth set outline --key api_base --value https://wiki.example.com/api
  g-gremlin docs doctor --provider outline
  g-gremlin docs publish-batch --csv enriched.csv --provider outline --collection-id COLLECTION_ID --template src/g_gremlin/templates/intent-brief.md.j2 --external-id-column signal_id --apply --profile prod

  g-gremlin auth set confluence --key api_base --value https://acme.atlassian.net
  g-gremlin auth set confluence --key email --value user@acme.com
  g-gremlin auth set confluence --key api_token --value $CONFLUENCE_TOKEN
  g-gremlin docs doctor --provider confluence
  g-gremlin docs publish --provider confluence --markdown-file plan.md --space-key SPACE --apply
  ```
- Outline auth keys: `api_token`, `api_base` (e.g. `https://wiki.example.com/api`), optional `default_collection_id`.
- Confluence auth keys: `api_base`, `email`, `api_token`, optional `default_space_key`, `default_parent_id`.
  Install with: `pip install g-gremlin[docs-confluence]`
- Confluence sugar: blockquotes → panels, `[STATUS: ON TRACK]` → status lozenge, `::: expand Title` blocks.
- Optional chaining can come later via recipes; today each command stands alone. Deferred: Sheets writeback, Common Room webhook endpoint, richer template gallery.
