#!/bin/bash
#============================================================================
# curllm - Browser Automation with Local LLM (8GB GPU compatible)
# Author: Softreck (2025)
# Version: 1.0.0
#============================================================================

# Load .env if present
if [ -f ".env" ]; then
    set -a
    . ./.env
    set +a
fi

# Default configuration
CURLLM_API_HOST="${CURLLM_API_HOST:-http://localhost:8000}"
CURLLM_OLLAMA_HOST="${CURLLM_OLLAMA_HOST:-http://localhost:11434}"
CURLLM_MODEL="${CURLLM_MODEL:-qwen2.5:7b}"
CURLLM_BROWSERLESS="${CURLLM_BROWSERLESS:-false}"
CURLLM_DEBUG="${CURLLM_DEBUG:-false}"

if [[ "$CURLLM_API_HOST" == "http://localhost:8000" ]] && [ -f /tmp/curllm_api_port ]; then
    CURLLM_API_HOST="http://localhost:$(cat /tmp/curllm_api_port)"
fi
if [[ "$CURLLM_OLLAMA_HOST" == "http://localhost:11434" ]] && [ -f /tmp/ollama_port ]; then
    CURLLM_OLLAMA_HOST="http://localhost:$(cat /tmp/ollama_port)"
fi

# Color codes
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

# Version info
VERSION="1.0.0"

# Debug logger
dbg() {
    if [ "$CURLLM_DEBUG" = "true" ] || [ "$VERBOSE" = true ]; then
        echo -e "${YELLOW}[DEBUG]${NC} $*"
    fi
}

# Parse command line arguments
METHOD="GET"
URL=""
DATA=""
HEADERS=()
OUTPUT_FILE=""
VERBOSE=false
VISUAL_MODE=false
STEALTH_MODE=false
CAPTCHA_SOLVER=false
USE_BQL=false

# Help function
show_help() {
    cat << EOF
curllm - Browser automation with Local LLM support

Usage: curllm [OPTIONS] <URL or INSTRUCTION>

OPTIONS:
    -X, --request METHOD     HTTP method (GET, POST, PUT, DELETE)
    -d, --data DATA         JSON data or natural language instruction
    -H, --header HEADER     Add header (can be used multiple times)
    -o, --output FILE       Save output to file
    -v, --verbose           Verbose output
    
AUTOMATION OPTIONS:
    --visual                Enable visual mode (screenshots + vision analysis)
    --stealth              Enable stealth mode (anti-bot detection)
    --captcha              Enable CAPTCHA solving
    --bql                  Use BQL (Browser Query Language) mode
    --model MODEL          LLM model to use (default: qwen2.5:7b)
    
SERVICE OPTIONS:
    --start-services       Start required services (Ollama, API server)
    --stop-services        Stop all services
    --status              Check service status
    --install             Install dependencies
    
EXAMPLES:
    # Simple web extraction
    curllm "https://example.com" -d "extract all email addresses"
    
    # Complex workflow with authentication
    curllm -X POST --visual --stealth \\
        -d '{"instruction": "Login and download invoice", 
             "credentials": {"user": "john", "pass": "secret"}}' \\
        https://app.example.com
    
    # BQL mode for structured extraction
    curllm --bql -d 'query { page(url: "https://example.com") { 
        title text links { href text } }}' 

    # With CAPTCHA support
    curllm --visual --captcha -d "Fill form and submit" https://form.example.com

EOF
    exit 0
}

# Function to check if services are running
check_services() {
    local all_good=true
    
    # Check Ollama
    local OLLAMA_OK=false
    dbg "Checking Ollama at ${CURLLM_OLLAMA_HOST}"
    if curl -s "${CURLLM_OLLAMA_HOST}/api/tags" > /dev/null 2>&1; then
        echo -e "${GREEN}✓ Ollama is running${NC}"
        OLLAMA_OK=true
    else
        dbg "Primary Ollama check failed. Probing candidates..."
        dbg "Candidates: ${CURLLM_OLLAMA_PORT} $(cat /tmp/ollama_port 2>/dev/null) 11434 11435 11436"
        for p in "${CURLLM_OLLAMA_PORT}" $(cat /tmp/ollama_port 2>/dev/null) 11434 11435 11436; do
            dbg "Probing http://localhost:${p}/api/tags"
            if [ -n "$p" ] && curl -s "http://localhost:${p}/api/tags" > /dev/null 2>&1; then
                CURLLM_OLLAMA_HOST="http://localhost:${p}"
                echo ${p} > /tmp/ollama_port
                [ -f .env ] || touch .env
                if grep -q '^CURLLM_OLLAMA_PORT=' .env; then sed -i "s/^CURLLM_OLLAMA_PORT=.*/CURLLM_OLLAMA_PORT=${p}/" .env; else echo "CURLLM_OLLAMA_PORT=${p}" >> .env; fi
                if grep -q '^CURLLM_OLLAMA_HOST=' .env; then sed -i "s#^CURLLM_OLLAMA_HOST=.*#CURLLM_OLLAMA_HOST=http://localhost:${p}#" .env; else echo "CURLLM_OLLAMA_HOST=http://localhost:${p}" >> .env; fi
                dbg "Detected existing Ollama at port ${p}. .env updated."
                echo -e "${GREEN}✓ Ollama is running${NC}"
                OLLAMA_OK=true
                break
            fi
        done
        if [ "$OLLAMA_OK" != true ]; then
            echo -e "${RED}✗ Ollama is not running${NC}"
            echo "  Run: ollama serve"
            all_good=false
        fi
    fi
    
    # Check API server (with fallback to detected port)
    local API_OK=false
    dbg "Checking API at ${CURLLM_API_HOST}/health"
    if curl -s "${CURLLM_API_HOST}/health" > /dev/null 2>&1; then
        API_OK=true
    else
        dbg "Primary API check failed. Trying fallback from /tmp/curllm_api_port"
        if [ -f /tmp/curllm_api_port ]; then
            local ap=$(cat /tmp/curllm_api_port)
            dbg "Probing http://localhost:${ap}/health"
            if curl -s "http://localhost:${ap}/health" > /dev/null 2>&1; then
                CURLLM_API_HOST="http://localhost:${ap}"
                [ -f .env ] || touch .env
                if grep -q '^CURLLM_API_PORT=' .env; then sed -i "s/^CURLLM_API_PORT=.*/CURLLM_API_PORT=${ap}/" .env; else echo "CURLLM_API_PORT=${ap}" >> .env; fi
                if grep -q '^CURLLM_API_HOST=' .env; then sed -i "s#^CURLLM_API_HOST=.*#CURLLM_API_HOST=http://localhost:${ap}#" .env; else echo "CURLLM_API_HOST=http://localhost:${ap}" >> .env; fi
                dbg ".env API updated to port ${ap}"
                API_OK=true
            fi
        fi
    fi
    if [ "$API_OK" = true ]; then
        echo -e "${GREEN}✓ curllm API is running${NC}"
    else
        echo -e "${RED}✗ curllm API server is not running${NC}"
        echo "  Run: curllm --start-services"
        all_good=false
    fi
    
    # Check if model exists
    if command -v ollama > /dev/null 2>&1; then
        if ! ollama list | grep -q "$CURLLM_MODEL"; then
            echo -e "${YELLOW}⚠ Model $CURLLM_MODEL not found${NC}"
            echo "  Run: ollama pull $CURLLM_MODEL"
            all_good=false
        else
            echo -e "${GREEN}✓ Model $CURLLM_MODEL is available${NC}"
        fi
    fi
    
    if [ "$all_good" = false ]; then
        return 1
    fi
    return 0
}

# Function to start services
start_services() {
    echo -e "${BLUE}Starting curllm services...${NC}"
    
    # Prefer reusing an already running API server port, if reachable
    SKIP_API_START=false
    if [ -f /tmp/curllm_api_port ]; then
        ap=$(cat /tmp/curllm_api_port)
        dbg "Probing existing API at http://localhost:${ap}/health"
        if [ -n "$ap" ] && curl -s "http://localhost:${ap}/health" > /dev/null 2>&1; then
            PORT=$ap
            SKIP_API_START=true
            dbg "Detected running API at port ${PORT}; will not start a new instance"
        fi
    fi
    # If no running API was detected, choose a free port (starting from .env or default)
    if [ "$SKIP_API_START" = false ]; then
        PORT=${CURLLM_API_PORT:-$(if [ -f /tmp/curllm_api_port ]; then cat /tmp/curllm_api_port; else echo 8000; fi)}
        INIT_PORT=$PORT
        while ss -ltn | grep -q ":${PORT}\b"; do PORT=$((PORT+1)); done
        if [ "$PORT" != "$INIT_PORT" ]; then dbg "API port ${INIT_PORT} busy; selected ${PORT}"; fi
    fi
    OPORT=${CURLLM_OLLAMA_PORT:-$(if [ -f /tmp/ollama_port ]; then cat /tmp/ollama_port; else echo 11434; fi)}
    INIT_OPORT=$OPORT
    while ss -ltn | grep -q ":${OPORT}\b"; do OPORT=$((OPORT+1)); done
    if [ "$OPORT" != "$INIT_OPORT" ]; then dbg "Ollama port ${INIT_OPORT} busy; selected ${OPORT}"; fi
    echo ${PORT} > /tmp/curllm_api_port
    echo ${OPORT} > /tmp/ollama_port
    dbg "Persisted /tmp/curllm_api_port=${PORT}, /tmp/ollama_port=${OPORT}"
    # Persist selected ports and hosts to .env for future runs
    if [ ! -f .env ]; then touch .env; fi
    if grep -q '^CURLLM_API_PORT=' .env; then
        sed -i "s/^CURLLM_API_PORT=.*/CURLLM_API_PORT=${PORT}/" .env
    else
        echo "CURLLM_API_PORT=${PORT}" >> .env
    fi
    if grep -q '^CURLLM_OLLAMA_PORT=' .env; then
        sed -i "s/^CURLLM_OLLAMA_PORT=.*/CURLLM_OLLAMA_PORT=${OPORT}/" .env
    else
        echo "CURLLM_OLLAMA_PORT=${OPORT}" >> .env
    fi
    if grep -q '^CURLLM_API_HOST=' .env; then
        sed -i "s#^CURLLM_API_HOST=.*#CURLLM_API_HOST=http://localhost:${PORT}#" .env
    else
        echo "CURLLM_API_HOST=http://localhost:${PORT}" >> .env
    fi
    if grep -q '^CURLLM_OLLAMA_HOST=' .env; then
        sed -i "s#^CURLLM_OLLAMA_HOST=.*#CURLLM_OLLAMA_HOST=http://localhost:${OPORT}#" .env
    else
        echo "CURLLM_OLLAMA_HOST=http://localhost:${OPORT}" >> .env
    fi
    dbg "Updated .env (API ${PORT}, OLLAMA ${OPORT})"

    if ! pgrep -x "ollama" > /dev/null; then
        echo "Starting Ollama..."
        OLLAMA_HOST="127.0.0.1:${OPORT}" ollama serve > /tmp/ollama.log 2>&1 &
        sleep 2
    else
        # Detect existing Ollama port (prefer current env/.env, then common defaults)
        dbg "Ollama process already running; probing ports to detect host..."
        for p in "${CURLLM_OLLAMA_PORT}" $(cat /tmp/ollama_port 2>/dev/null) 11434 11435 11436; do
            dbg "Probing http://localhost:${p}/api/tags"
            if [ -n "$p" ] && curl -s "http://localhost:${p}/api/tags" > /dev/null 2>&1; then
                OPORT="$p"
                echo ${OPORT} > /tmp/ollama_port
                if grep -q '^CURLLM_OLLAMA_PORT=' .env; then sed -i "s/^CURLLM_OLLAMA_PORT=.*/CURLLM_OLLAMA_PORT=${OPORT}/" .env; else echo "CURLLM_OLLAMA_PORT=${OPORT}" >> .env; fi
                if grep -q '^CURLLM_OLLAMA_HOST=' .env; then sed -i "s#^CURLLM_OLLAMA_HOST=.*#CURLLM_OLLAMA_HOST=http://localhost:${OPORT}#" .env; else echo "CURLLM_OLLAMA_HOST=http://localhost:${OPORT}" >> .env; fi
                dbg "Detected existing Ollama at port ${OPORT}. .env updated."
                break
            fi
        done
    fi
    
    # Pull model if needed
    if ! ollama list | grep -q "$CURLLM_MODEL"; then
        echo "Pulling model $CURLLM_MODEL..."
        ollama pull "$CURLLM_MODEL"
    fi
    
    # Start API server (only if not already running)
    if [ "$SKIP_API_START" = false ] && ! curl -s "http://localhost:${PORT}/health" > /dev/null 2>&1; then
        echo "Starting curllm API server..."
        # Find Python script location
        SCRIPT_PATH="$(readlink -f "${BASH_SOURCE[0]}")"
        SCRIPT_DIR="$( dirname "${SCRIPT_PATH}" )"
        dbg "BASH_SOURCE[0]=${BASH_SOURCE[0]}"
        dbg "Resolved SCRIPT_PATH=${SCRIPT_PATH}"
        dbg "Resolved SCRIPT_DIR=${SCRIPT_DIR}"
        # Fallback search for curllm_server.py
        CANDIDATES=(
            "$SCRIPT_DIR/curllm_server.py"
            "$(dirname "$SCRIPT_DIR")/curllm_server.py"
            "$PWD/curllm_server.py"
            "$(dirname "$PWD")/curllm_server.py"
        )
        SERVER_PATH=""
        for c in "${CANDIDATES[@]}"; do
            dbg "Checking server candidate: $c"
            if [ -f "$c" ]; then SERVER_PATH="$c"; break; fi
        done
        if [ -z "$SERVER_PATH" ]; then
            # Try to locate curllm_server module via Python import (pip-installed site-packages)
            SERVER_PATH_PY=$(python3 - <<'PY'
import importlib, inspect
try:
    m = importlib.import_module('curllm_server')
    print(inspect.getfile(m))
except Exception:
    pass
PY
)
            if [ -n "$SERVER_PATH_PY" ] && [ -f "$SERVER_PATH_PY" ]; then
                SERVER_PATH="$SERVER_PATH_PY"
                dbg "Found server via Python import: $SERVER_PATH"
            fi
        fi
        if [ -n "$SERVER_PATH" ]; then
            dbg "Using server path: $SERVER_PATH"
            CURLLM_API_PORT=${PORT} CURLLM_OLLAMA_HOST="http://localhost:${OPORT}" CURLLM_DEBUG=false python3 "$SERVER_PATH" > /tmp/curllm.log 2>&1 &
            echo $! > /tmp/curllm.pid
            sleep 3
        else
            echo -e "${RED}Error: curllm_server.py not found${NC}"
            echo "Searched paths:"
            for c in "${CANDIDATES[@]}"; do echo "  - $c"; done
            echo "Tip: ensure the curllm script points to the repository version (symlink) or run from repo root."
            return 1
        fi
    fi
    
    # Start Browserless if enabled
    if [ "$CURLLM_BROWSERLESS" = "true" ]; then
        if ! docker ps | grep -q "browserless"; then
            echo "Starting Browserless container..."
            docker run -d \
                --name browserless \
                -p 3000:3000 \
                -e "MAX_CONCURRENT_SESSIONS=10" \
                -e "ENABLE_STEALTH=true" \
                browserless/chrome:latest
        fi
    fi
    
    echo -e "${GREEN}Services started successfully!${NC}"
    check_status
}

# Function to stop services
stop_services() {
    echo -e "${BLUE}Stopping curllm services...${NC}"
    
    # Stop API server
    if [ -f /tmp/curllm.pid ]; then
        kill $(cat /tmp/curllm.pid) 2>/dev/null
        rm /tmp/curllm.pid
        echo "Stopped curllm API server"
    fi
    # Additionally, kill any Python process bound to the API port (handles Flask reloader children)
    # Determine port candidates
    PORTS=()
    if [ -f /tmp/curllm_api_port ]; then PORTS+=("$(cat /tmp/curllm_api_port)"); fi
    if [ -n "$CURLLM_API_PORT" ]; then PORTS+=("$CURLLM_API_PORT"); fi
    # De-duplicate
    UNIQUE_PORTS=($(printf "%s\n" "${PORTS[@]}" | awk '!seen[$0]++'))
    for p in "${UNIQUE_PORTS[@]}"; do
        [ -z "$p" ] && continue
        # Find PIDs via ss
        PIDS=$(ss -ltnp 2>/dev/null | awk -v port=":${p} " 'index($4, port){print $7}' | sed -E 's/.*pid=([0-9]+).*/\1/' | sort -u)
        for pid in $PIDS; do
            if ps -p "$pid" -o comm= | grep -q "python"; then
                kill "$pid" 2>/dev/null || true
                sleep 0.2
                kill -9 "$pid" 2>/dev/null || true
                echo "Killed API server process PID=$pid on port $p"
            fi
        done
    done
    
    # Stop Browserless
    docker stop browserless 2>/dev/null
    docker rm browserless 2>/dev/null
    
    echo -e "${GREEN}Services stopped${NC}"
}

# Function to check status
check_status() {
    echo -e "${BLUE}=== curllm Service Status ===${NC}"
    check_services
    
    # Show GPU info if nvidia-smi available
    if command -v nvidia-smi > /dev/null 2>&1; then
        echo ""
        echo -e "${BLUE}GPU Status:${NC}"
        nvidia-smi --query-gpu=name,memory.used,memory.total --format=csv,noheader
    fi
}

# Function to install dependencies
install_dependencies() {
    echo -e "${BLUE}Installing curllm dependencies...${NC}"
    
    # Check for Python 3
    if ! command -v python3 > /dev/null 2>&1; then
        echo -e "${RED}Python 3 is required${NC}"
        exit 1
    fi
    
    # Install Python packages
    echo "Installing Python packages..."
    pip3 install --user \
        browser-use \
        langchain \
        langchain-ollama \
        playwright \
        flask \
        flask-cors \
        pillow \
        opencv-python \
        pytesseract \
        aiohttp
    
    # Install Playwright browsers
    echo "Installing Playwright browsers..."
    python3 -m playwright install chromium
    
    # Install Ollama if not present
    if ! command -v ollama > /dev/null 2>&1; then
        echo "Installing Ollama..."
        curl -fsSL https://ollama.ai/install.sh | sh
    fi
    
    # Create config directory
    mkdir -p ~/.config/curllm
    
    echo -e "${GREEN}Installation complete!${NC}"
    echo "Run 'curllm --start-services' to begin"
}

## Argument parsing (after functions are defined so function calls work)
while [[ $# -gt 0 ]]; do
    case $1 in
        -X|--request)
            METHOD="$2"; shift 2;;
        -d|--data)
            DATA="$2"; shift 2;;
        -H|--header)
            HEADERS+=("$2"); shift 2;;
        -o|--output)
            OUTPUT_FILE="$2"; shift 2;;
        -v|--verbose)
            VERBOSE=true; shift;;
        --visual)
            VISUAL_MODE=true; shift;;
        --stealth)
            STEALTH_MODE=true; shift;;
        --captcha)
            CAPTCHA_SOLVER=true; shift;;
        --bql)
            USE_BQL=true; shift;;
        --model)
            CURLLM_MODEL="$2"; shift 2;;
        --start-services)
            start_services; exit 0;;
        --stop-services)
            stop_services; exit 0;;
        --status)
            check_status; exit 0;;
        --install)
            install_dependencies; exit 0;;
        -h|--help)
            show_help;;
        --version)
            echo "curllm version $VERSION"; exit 0;;
        *)
            URL="$1"; shift;;
    esac
done

# Main execution
main() {
    # Check if URL or instruction is provided
    if [ -z "$URL" ] && [ -z "$DATA" ]; then
        echo -e "${RED}Error: URL or instruction required${NC}"
        echo "Use 'curllm --help' for usage information"
        exit 1
    fi
    
    # Check services before running
    if ! check_services > /dev/null 2>&1; then
        echo -e "${YELLOW}Services not ready. Checking status...${NC}"
        check_services
        exit 1
    fi
    
    # Build request payload
    DATA_JSON=$( [ -z "$DATA" ] && echo '""' || printf '%s' "$DATA" | jq -Rs '.' )
    if [ ${#HEADERS[@]} -eq 0 ]; then
        HEADERS_JSON='[]'
    else
        HEADERS_JSON=$(printf '%s\n' "${HEADERS[@]}" | jq -R . | jq -s .)
    fi
    PAYLOAD=$(cat <<EOF
{
    "method": "$METHOD",
    "url": "$URL",
    "data": $DATA_JSON,
    "visual_mode": $VISUAL_MODE,
    "stealth_mode": $STEALTH_MODE,
    "captcha_solver": $CAPTCHA_SOLVER,
    "use_bql": $USE_BQL,
    "model": "$CURLLM_MODEL",
    "headers": $HEADERS_JSON
}
EOF
    )
    
    # Show verbose output if requested (to stderr)
    if [ "$VERBOSE" = true ]; then
        { echo -e "${BLUE}Request:${NC}"; echo "$PAYLOAD" | jq .; } 1>&2
    fi
    
    # Make API request
    RESPONSE=$(curl -s -X POST \
        "${CURLLM_API_HOST}/api/execute" \
        -H "Content-Type: application/json" \
        -d "$PAYLOAD")
    
    # Check for errors
    if [ $? -ne 0 ]; then
        echo -e "${RED}Error: Failed to connect to curllm API${NC}"
        exit 1
    fi
    
    # Always output pure JSON on stdout; verbose info goes to stderr
    if [ "$VERBOSE" = true ]; then
        { echo -e "${BLUE}Response:${NC}"; echo "$RESPONSE" | jq .; } 1>&2
    fi
    echo "$RESPONSE"
    
    # Save to file if requested
    if [ -n "$OUTPUT_FILE" ]; then
        echo "$RESPONSE" > "$OUTPUT_FILE"
        { echo -e "${GREEN}Output saved to: $OUTPUT_FILE${NC}"; } 1>&2
    fi
}

# Handle Ctrl+C gracefully
trap 'echo -e "\n${YELLOW}Interrupted${NC}"; exit 130' INT

# Run main function if not sourced
if [ "${BASH_SOURCE[0]}" = "${0}" ]; then
    main
fi
