#!/usr/bin/env -S uv run --script
# /// script
# dependencies = [
#   "sqlite-utils>=3.30",
#   "click>=8.0",
#   "pyyaml>=6.0",
#   "pathspec>=0.11",
#   "watchdog>=3.0"
# ]
# ///
"""
Import markdown files into SQLite with dynamic schema generation.

SQLite is all you need. This tool just parses markdown + YAML frontmatter.
"""

import click
from pathlib import Path
import sys
import time
from typing import Optional
from datetime import datetime
from sqlite_utils import Database
import pathspec
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler

# Add lib directory to Python path for imports
sys.path.insert(0, str(Path(__file__).parent.parent / 'lib'))

from md_cache import process_markdown_file


class MarkdownChangeHandler(FileSystemEventHandler):
    """Handle markdown file changes and update database."""

    def __init__(self, db_path, root_path, table, gitignore_spec, exclude_patterns, verbose):
        self.db_path = db_path
        self.root_path = root_path
        self.table = table
        self.gitignore_spec = gitignore_spec
        self.exclude_patterns = exclude_patterns
        self.verbose = verbose
        self.last_update = {}  # Debounce map: path -> timestamp
        self.debounce_seconds = 1.0

    def should_process(self, file_path: Path) -> bool:
        """Check if file should be processed based on filters."""
        if not file_path.suffix == '.md':
            return False

        # Check gitignore
        if self.gitignore_spec:
            try:
                rel_path = file_path.relative_to(self.root_path)
                if self.gitignore_spec.match_file(str(rel_path)):
                    return False
            except ValueError:
                return False  # File outside root

        # Check additional exclusions
        for excl_pattern in self.exclude_patterns:
            if file_path.match(excl_pattern):
                return False

        return True

    def process_file(self, file_path: Path, event_type: str = "Updated"):
        """Process a single markdown file and update database."""
        # Debounce: ignore if we processed this file recently
        now = time.time()
        if file_path in self.last_update:
            if now - self.last_update[file_path] < self.debounce_seconds:
                return
        self.last_update[file_path] = now

        if not self.should_process(file_path):
            return

        try:
            timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            rel_path = file_path.relative_to(self.root_path)

            doc = process_markdown_file(file_path, self.root_path)
            database = Database(str(self.db_path))
            database[self.table].upsert(doc, pk='_id', alter=True)

            click.echo(f"[{timestamp}] {event_type}: {rel_path}")
        except Exception as e:
            if self.verbose:
                click.echo(f"⚠️  Error processing {file_path}: {e}", err=True)

    def on_modified(self, event):
        if not event.is_directory:
            self.process_file(Path(event.src_path), "Updated")

    def on_created(self, event):
        if not event.is_directory:
            self.process_file(Path(event.src_path), "Added")

    def on_deleted(self, event):
        if not event.is_directory:
            file_path = Path(event.src_path)
            if self.should_process(file_path):
                # Remove from database
                try:
                    import hashlib
                    rel_path = str(file_path.relative_to(self.root_path))
                    doc_id = hashlib.sha1(rel_path.encode()).hexdigest()
                    database = Database(str(self.db_path))
                    database[self.table].delete(doc_id)
                    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                    click.echo(f"[{timestamp}] Deleted: {rel_path}")
                except Exception as e:
                    if self.verbose:
                        click.echo(f"⚠️  Error deleting {file_path}: {e}", err=True)


def load_gitignore(root_path: Path) -> Optional[pathspec.PathSpec]:
    """Load .gitignore patterns from root directory."""
    gitignore_path = root_path / '.gitignore'
    if not gitignore_path.exists():
        return None

    try:
        patterns = gitignore_path.read_text().splitlines()
        # Filter out comments and empty lines
        patterns = [p for p in patterns if p.strip() and not p.startswith('#')]
        return pathspec.PathSpec.from_lines('gitwildmatch', patterns)
    except Exception:
        return None


@click.command()
@click.option('--db', default='cache.db', help='Database file path')
@click.option('--root', default='.', help='Root directory containing markdown files', type=click.Path(exists=True))
@click.option('--table', default='docs', help='Table name (default: docs)')
@click.option('--pattern', default='**/*.md', help='Glob pattern for markdown files')
@click.option('--exclude', multiple=True, help='Additional exclude patterns beyond .gitignore')
@click.option('--no-gitignore', is_flag=True, help='Disable automatic .gitignore filtering')
@click.option('--watch', '-w', is_flag=True, help='Watch for file changes and auto-update')
@click.option('--verbose', '-v', is_flag=True, help='Verbose output')
def main(db, root, table, pattern, exclude, no_gitignore, watch, verbose):
    """
    Import markdown files into SQLite cache with dynamic schema.

    Automatically respects .gitignore by default.

    Examples:
        md-import --db cache.db --root ~/notes
        md-import --db cache.db --table tasks --root ~/tasks
        md-import --db cache.db --exclude '**/test/**' --root ~/tasks
        md-import --db cache.db --no-gitignore --root ~/tasks  # Disable gitignore
        md-import --db cache.db --root ~/tasks --watch  # Watch mode: auto-update on changes
    """
    root_path = Path(root).resolve()
    db_path = Path(db)

    # Load .gitignore patterns
    gitignore_spec = None if no_gitignore else load_gitignore(root_path)

    if verbose:
        click.echo(f"📂 Scanning {root_path} for {pattern}")
        click.echo(f"💾 Database: {db_path}")
        click.echo(f"📊 Table: {table}")
        if gitignore_spec:
            click.echo(f"✓ Respecting .gitignore")
        if exclude:
            click.echo(f"🚫 Additional exclusions: {', '.join(exclude)}")
        click.echo()

    # Find all markdown files
    md_files = []
    for md_file in root_path.glob(pattern):
        # Check gitignore
        if gitignore_spec:
            try:
                rel_path = md_file.relative_to(root_path)
                if gitignore_spec.match_file(str(rel_path)):
                    if verbose:
                        click.echo(f"⊗ {rel_path} (gitignored)")
                    continue
            except ValueError:
                pass  # File outside root

        # Check additional exclusions
        skip = False
        for excl_pattern in exclude:
            if md_file.match(excl_pattern):
                skip = True
                break
        if not skip:
            md_files.append(md_file)

    if not md_files:
        click.echo(f"❌ No markdown files found matching {pattern} in {root_path}", err=True)
        return 1

    if verbose:
        click.echo(f"Found {len(md_files)} files")
        click.echo()

    # Process all files
    docs = []
    for md_file in md_files:
        if verbose:
            click.echo(f"📄 {md_file.relative_to(root_path)}")

        try:
            doc = process_markdown_file(md_file, root_path)
            docs.append(doc)
        except Exception as e:
            click.echo(f"⚠️  Error processing {md_file}: {e}", err=True)
            continue

    # Import to database with dynamic schema
    database = Database(str(db_path))

    imported = 0
    column_count = 0
    for doc in docs:
        try:
            database[table].upsert(doc, pk='_id', alter=True)
            imported += 1
        except Exception as e:
            # Check for column limit error
            if 'too many columns' in str(e).lower():
                click.echo(f"\n⚠️  SQLite column limit reached (2000 columns)", err=True)
                click.echo(f"Consider using --exclude to filter out diverse markdown files", err=True)
                break
            click.echo(f"⚠️  Error upserting {doc.get('_path', 'unknown')}: {e}", err=True)

    if verbose:
        click.echo()

    # Get final column count
    if table in database.table_names():
        columns = database[table].columns
        column_count = len(columns)

    click.echo(f"✅ Imported {imported} of {len(docs)} documents into {db_path}:{table}")
    click.echo(f"📋 Schema has {column_count} columns")

    if verbose and column_count > 0:
        click.echo("\nFirst 10 columns:")
        for col in list(columns)[:10]:
            click.echo(f"  - {col.name} ({col.type})")
        if column_count > 10:
            click.echo(f"  ... and {column_count - 10} more")

    # Enter watch mode if requested
    if watch:
        click.echo(f"\n👀 Watching {root_path} for changes... (Ctrl-C to stop)")
        event_handler = MarkdownChangeHandler(
            db_path=db_path,
            root_path=root_path,
            table=table,
            gitignore_spec=gitignore_spec,
            exclude_patterns=exclude,
            verbose=verbose
        )
        observer = Observer()
        observer.schedule(event_handler, str(root_path), recursive=True)
        observer.start()
        try:
            while True:
                time.sleep(1)
        except KeyboardInterrupt:
            observer.stop()
            click.echo("\n✋ Stopped watching")
        observer.join()


if __name__ == '__main__':
    main()
