Directory structure:
└── planning_agents/
    ├── __init__.py
    ├── _deprecation.py
    ├── base_agent.py
    ├── bo_agent.py
    ├── excel_parser.py
    ├── html_generator.py
    ├── ingestor.py
    ├── instruct.py
    ├── knowledge_base.py
    ├── orchestrator_tools.py
    ├── parser_utils.py
    ├── pdf_parser.py
    ├── planning_agent.py
    ├── planning_orchestrator.py
    ├── rag_engine.py
    ├── repo_loader.py
    ├── scalarizer_agent.py
    └── user_interface.py

================================================
FILE: __init__.py
================================================
from .planning_agent import PlanningAgent
from .bo_agent import BOAgent
from .scalarizer_agent import ScalarizerAgent


================================================
FILE: _deprecation.py
================================================
"""
Deprecation utilities for backwards-compatible parameter migration.
"""

import warnings
from typing import Optional, Tuple


def normalize_api_key(
    api_key: Optional[str] = None,
    google_api_key: Optional[str] = None,
    source: str = "Agent"
) -> str:
    """
    Normalize API key parameter, handling deprecated 'google_api_key'.
    
    Args:
        api_key: New parameter name (preferred)
        google_api_key: Deprecated parameter name
        source: Name of the calling class for warning messages
    
    Returns:
        The resolved API key
    
    Raises:
        ValueError: If no API key is provided
    """
    if google_api_key is not None and api_key is not None:
        warnings.warn(
            f"{source}: Both 'api_key' and 'google_api_key' provided. "
            f"Using 'api_key'. 'google_api_key' is deprecated and will be "
            f"removed in v2.0.",
            DeprecationWarning,
            stacklevel=3
        )
        return api_key
    
    if google_api_key is not None:
        warnings.warn(
            f"{source}: 'google_api_key' parameter is deprecated and will be "
            f"removed in v2.0. Use 'api_key' instead.",
            DeprecationWarning,
            stacklevel=3
        )
        return google_api_key
    
    return api_key


def normalize_base_url(
    base_url: Optional[str] = None,
    local_model: Optional[str] = None,
    source: str = "Agent"
) -> Optional[str]:
    """
    Normalize endpoint URL parameter, handling deprecated 'local_model'.
    
    Args:
        base_url: New parameter name (preferred)
        local_model: Deprecated parameter name
        source: Name of the calling class for warning messages
    
    Returns:
        The resolved endpoint URL, or None
    """
    if local_model is not None and base_url is not None:
        warnings.warn(
            f"{source}: Both 'base_url' and 'local_model' provided. "
            f"Using 'base_url'. 'local_model' is deprecated and will be "
            f"removed in v2.0.",
            DeprecationWarning,
            stacklevel=3
        )
        return base_url
    
    if local_model is not None:
        warnings.warn(
            f"{source}: 'local_model' parameter is deprecated and will be "
            f"removed in v2.0. Use 'base_url' instead.",
            DeprecationWarning,
            stacklevel=3
        )
        return local_model
    
    return base_url


def normalize_params(
    api_key: Optional[str] = None,
    google_api_key: Optional[str] = None,
    base_url: Optional[str] = None,
    local_model: Optional[str] = None,
    source: str = "Agent"
) -> Tuple[Optional[str], Optional[str]]:
    """
    Normalize both API key and base URL parameters.
    
    Returns:
        Tuple of (api_key, base_url)
    """
    resolved_key = normalize_api_key(api_key, google_api_key, source)
    resolved_url = normalize_base_url(base_url, local_model, source)
    return resolved_key, resolved_url


================================================
FILE: base_agent.py
================================================
import json
import logging
import uuid
from abc import ABC
from datetime import datetime
from pathlib import Path
from typing import Dict, Any, Optional


class BaseAgent(ABC):
    """
    Base class for all agents in the planning_agents module.
    
    Provides standardized:
    - State management (initialization, persistence, restoration)
    - Action logging (input, result, rationale, feedback tracking)
    - Session management (unique IDs, timestamps)
    
    All planning agents should inherit from this class to ensure 
    consistent state handling and traceability.
    
    Subclasses must:
    1. Call super().__init__(output_dir) in their __init__
    2. Set self.agent_type to a unique identifier (e.g., "planning", "bo", "scalarizer")
    3. Override _get_initial_state_fields() to add agent-specific state fields
    
    Example:
        class MyAgent(BaseAgent):
            def __init__(self, output_dir: str = "."):
                super().__init__(output_dir)
                self.agent_type = "my_agent"
            
            def _get_initial_state_fields(self) -> Dict[str, Any]:
                return {
                    "my_custom_field": [],
                    "another_field": None
                }
    """
    
    def __init__(self, output_dir: str = "."):
        """
        Initialize the base agent.
        
        Args:
            output_dir: Directory for state persistence and outputs.
        """
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)
        
        # Agent identifier - subclasses should override
        self.agent_type: str = "base"
        
        # Core state dictionary
        self.state: Dict[str, Any] = {}
    
    def _get_initial_state_fields(self) -> Dict[str, Any]:
        """
        Override in subclasses to add agent-specific state fields.
        
        Returns:
            Dict of additional fields to include in state initialization.
        """
        return {}
    
    def _init_state(self, **context) -> None:
        """
        Initialize state for a new session.
        
        Creates a fresh state dictionary with:
        - Unique session ID
        - Timestamp
        - Action history for traceability
        - Any context passed as kwargs
        - Agent-specific fields from _get_initial_state_fields()
        
        Args:
            **context: Key-value pairs to store in state (e.g., objective, data_path)
        """
        if self.state.get("session_id") is None:
            self.state = {
                "session_id": str(uuid.uuid4()),
                "start_time": datetime.now().isoformat(),
                "agent_type": self.agent_type,
                "action_history": [],
                "status": "initialized"
            }
            
            # Add agent-specific fields
            self.state.update(self._get_initial_state_fields())
        
        # Update with provided context
        for key, value in context.items():
            self.state[key] = value
        
        self.state["status"] = "active"
    
    def _log_action(self, 
                    action: str, 
                    input_ctx: Dict[str, Any], 
                    result: Dict[str, Any], 
                    rationale: Optional[str] = None, 
                    feedback: Optional[str] = None) -> None:
        """
        Record an atomic action to state history.
        
        Captures the full context chain:
        - Input: What was asked?
        - Result: What was the output?
        - Rationale: Why did the agent choose this path?
        - Feedback: Did a human intervene or correct?
        
        Auto-saves state after logging.
        
        Args:
            action: Name of the action (e.g., "generate_plan", "run_optimization")
            input_ctx: Dictionary describing the input/request
            result: Dictionary describing the outcome
            rationale: Optional explanation of why this action was taken
            feedback: Optional human feedback that influenced this action
        """
        entry = {
            "timestamp": datetime.now().isoformat(),
            "action": action,
            "input": input_ctx,
            "rationale": rationale,
            "result": self._normalize_result(result),
            "feedback": feedback
        }
        
        if "action_history" not in self.state:
            self.state["action_history"] = []
        
        self.state["action_history"].append(entry)
        self._save_state()
    
    def _normalize_result(self, result: Any) -> Dict[str, Any]:
        """
        Normalize result to a consistent dictionary format.
        
        Args:
            result: Raw result (dict, string, or other)
            
        Returns:
            Normalized dictionary with status, error, and other fields
        """
        if isinstance(result, dict):
            return {
                "status": result.get("status", "completed"),
                "error": result.get("error"),
                "iteration": result.get("iteration"),
                "stage": result.get("stage")
            }
        return {
            "status": "completed",
            "error": None,
            "iteration": None,
            "stage": None
        }
    
    def _get_state_filename(self) -> str:
        """
        Get the filename for state persistence.
        
        Returns:
            Filename based on agent_type (e.g., "planning_state.json")
        """
        return f"{self.agent_type}_state.json"
    
    def _save_state(self) -> None:
        """
        Persist state to disk.
        
        Saves to {output_dir}/{agent_type}_state.json
        Called automatically after each _log_action().
        """
        state_file = self.output_dir / self._get_state_filename()
        try:
            with open(state_file, 'w') as f:
                json.dump(self.state, f, indent=2)
        except Exception as e:
            logging.warning(f"Failed to save {self.agent_type} state: {e}")
    
    def load_state(self, state_path: str) -> bool:
        """
        Restore state from disk.
        
        Args:
            state_path: Path to the state JSON file
            
        Returns:
            True if successful, False otherwise
        """
        path = Path(state_path)
        if not path.exists():
            logging.warning(f"State file not found: {state_path}")
            return False
        
        try:
            with open(path, 'r') as f:
                self.state = json.load(f)
            
            if "action_history" not in self.state:
                self.state["action_history"] = []
            
            logging.info(f"Restored {self.agent_type} state: session {self.state.get('session_id')}")
            return True
            
        except json.JSONDecodeError as e:
            logging.error(f"Invalid JSON in state file: {e}")
            return False
        except Exception as e:
            logging.warning(f"Failed to load {self.agent_type} state: {e}")
            return False
    
    def get_action_count(self) -> int:
        """Get the number of logged actions in this session."""
        return len(self.state.get("action_history", []))
    
    def get_session_id(self) -> Optional[str]:
        """Get the current session ID."""
        return self.state.get("session_id")
    
    def is_initialized(self) -> bool:
        """Check if the agent has an active session."""
        return bool(self.state.get("session_id"))



================================================
FILE: bo_agent.py
================================================
import pandas as pd
import numpy as np
import json
import logging
import uuid
from datetime import datetime
from pathlib import Path
from typing import Dict, Any, List, Optional, Tuple
import PIL.Image as PIL_Image

from ...auth import get_internal_proxy_key
from .parser_utils import parse_json_from_response 
from ...tools.bo_tools import get_optimizer
from .instruct import (
    BO_CONFIG_SOO_PROMPT,
    BO_CONFIG_MOO_PROMPT,
    BO_VISUAL_INSPECTION_PROMPT,
    BO_VISUAL_INSPECTION_MOO_PROMPT,
    BO_CONSTRAINED_BATCH_PROMPT,
    BO_CONSTRAINED_BATCH_PROMPT_MOO
)

from ...wrappers.openai_wrapper import OpenAIAsGenerativeModel
from ...wrappers.litellm_wrapper import LiteLLMGenerativeModel

from ._deprecation import normalize_params

from .base_agent import BaseAgent


def _compute_budget_context(experimental_budget: Optional[int], history: List[Dict]) -> Dict[str, Any]:
    """
    Compute budget-aware context for strategy selection.
    
    Translates a raw experiment count into a structured context dict that the 
    LLM can reason about. The phase classification uses both absolute remaining 
    count and the fraction of total campaign budget to handle edge cases 
    (e.g., budget=5 means something different on step 2 vs step 50).
    
    Args:
        experimental_budget: Remaining optimization iterations (None = unlimited).
        history: List of past history entries (used to compute steps completed).
    
    Returns:
        Dict with keys:
            - budget_total: Remaining iterations (None if unlimited)
            - steps_completed: Number of BO iterations already done
            - budget_fraction_remaining: Float in [0, 1] — fraction of full 
              campaign still available. None if unlimited.
            - budget_phase: One of "final_shot", "critical", "low", "mid", 
              "high", "unlimited"
            - budget_guidance: Human-readable strategy guidance string for the LLM
    """
    steps_completed = len(history)
    
    if experimental_budget is None:
        return {
            "budget_total": None,
            "steps_completed": steps_completed,
            "budget_fraction_remaining": None,
            "budget_phase": "unlimited",
            "budget_guidance": (
                "No experimental budget constraint. "
                "Balance exploration and exploitation normally."
            ),
        }
    
    total_campaign = steps_completed + experimental_budget
    fraction_remaining = experimental_budget / total_campaign if total_campaign > 0 else 0.0
    
    # Classify into phases based on absolute remaining AND fraction
    if experimental_budget <= 1:
        phase = "final_shot"
        guidance = (
            "CRITICAL: This is the LAST experiment. You MUST exploit. "
            "Use 'log_ei' or 'ucb' with very low beta (< 0.3). "
            "Do NOT use 'max_variance' or 'thompson'. "
            "Every point must target the most promising region found so far."
        )
    elif experimental_budget <= 3:
        phase = "critical"
        guidance = (
            f"Only {experimental_budget} experiments remain. Strongly favor exploitation. "
            "Use 'log_ei' (preferred) or 'ucb' with low beta (0.3-1.0). "
            "Avoid 'max_variance'. 'thompson' acceptable only if batch_size > 10. "
            "Reserve at most 1 point for exploration if batch allows."
        )
    elif fraction_remaining < 0.25:
        phase = "low"
        guidance = (
            f"{experimental_budget} experiments remain ({fraction_remaining:.0%} of campaign budget). "
            "Late-stage optimization — lean toward exploitation. "
            "Use 'log_ei' or 'ucb' with moderate beta (1.0-2.0). "
            "'max_variance' only if model calibration is poor."
        )
    elif fraction_remaining < 0.6:
        phase = "mid"
        guidance = (
            f"{experimental_budget} experiments remain ({fraction_remaining:.0%} of campaign budget). "
            "Mid-campaign — balance exploration and exploitation. "
            "'log_ei' is a strong default. 'ucb' with beta ~2.0 also appropriate. "
            "'max_variance' acceptable if data is sparse or model uncertain."
        )
    else:
        phase = "high"
        guidance = (
            f"{experimental_budget} experiments remain ({fraction_remaining:.0%} of campaign budget). "
            "Early-stage — exploration is valuable. "
            "'max_variance' is appropriate if few data points exist. "
            "'log_ei' or 'thompson' for balanced exploration with some exploitation."
        )
    
    return {
        "budget_total": experimental_budget,
        "steps_completed": steps_completed,
        "budget_fraction_remaining": round(fraction_remaining, 3),
        "budget_phase": phase,
        "budget_guidance": guidance,
    }


class BOAgent(BaseAgent):
    """
    Autonomous Agent for Bayesian Optimization (BO) designed for "Stop-and-Go" experimental loops.

    This agent acts as an AI research partner that plans your next set of experiments.
    It combines valid statistical modeling (Gaussian Processes) with LLM-based reasoning 
    to adaptively configure the optimization strategy based on your data trends.

    **DATA FORMATTING REQUIREMENTS:**
    --------------------------------
    The agent expects a "Tidy Data" format (Excel .xlsx or CSV .csv) where:
    1.  **Rows** represent individual experiments.
    2.  **Columns** represent input parameters (e.g., 'Temperature', 'Pressure') and 
        measured objectives (e.g., 'Yield', 'Purity').
    3.  **No Merged Cells:** Ensure the header is a single row containing clean variable names.
    4.  **Missing Data:** The agent requires complete data rows for the optimization columns. 
        Rows with NaNs in inputs/targets should be removed or imputed before running.

    **PERSISTENCE & WORKFLOW:**
    ---------------------------
    This agent is stateless and persistent. It is safe to shut down between experiments.
    
    1.  **Run Agent:** Call `run_optimization_loop` pointing to your current data file.
    2.  **Get Recommendations:** The agent saves a new batch of experiments to 
        `./bo_artifacts/batch_step_N.csv`.
    3.  **Shut Down:** You can close the program while you perform the experiments in the lab 
        (whether it takes 1 hour or 1 week).
    4.  **Update Data:** Once results are in, append them as new rows to your original 
        data file (.xlsx/.csv).
    5.  **Restart:** Run the agent again. It automatically re-reads the updated data 
        and the history file (`bo_history.json`) to pick up exactly where it left off.

    Args:
        api_key: API key for the LLM provider.
        model_name: Model name. For public deployments, use LiteLLM format
            (e.g., "gemini/gemini-2.0-flash", "gpt-4o", "claude-sonnet-4-20250514").
        base_url: Base URL for internal proxy endpoint.
            When provided, uses OpenAI-compatible client.
            When None, uses LiteLLM for multi-provider support.
        output_dir: Output directory for artifacts.
        
        google_api_key: DEPRECATED. Use 'api_key' instead.
        local_model: DEPRECATED. Use 'base_url' instead.
    """
    def __init__(
        self,
        api_key: Optional[str] = None,
        model_name: str = "gemini-3-pro-preview",
        base_url: Optional[str] = None,
        output_dir: str = ".",
        # Deprecated
        google_api_key: Optional[str] = None,
        local_model: Optional[str] = None,
    ):
        
        super().__init__(output_dir)
        self.agent_type = "bo"

        # Handle deprecated parameters
        api_key, base_url = normalize_params(
            api_key=api_key,
            google_api_key=google_api_key,
            base_url=base_url,
            local_model=local_model,
            source="BOAgent"
        )
        
        if base_url:
            # INTERNAL PROXY
            if api_key is None:
                api_key = get_internal_proxy_key()
            
            if not api_key:
                raise ValueError(
                    "API key required for internal proxy.\n"
                    "Set SCILINK_API_KEY environment variable or pass api_key parameter."
                )
            
            logging.info(f"🏛️ BOAgent using internal proxy: {base_url}")
            self.model = OpenAIAsGenerativeModel(
                model=model_name,
                api_key=api_key,
                base_url=base_url
            )
        else:
            # PUBLIC LITELLM
            logging.info(f"🌐 BOAgent using LiteLLM: {model_name}")
            self.model = LiteLLMGenerativeModel(
                model=model_name,
                api_key=api_key
            )
        
        self.generation_config = None

        self.history_file = self.output_dir / "bo_history.json"

    def _get_initial_state_fields(self) -> Dict[str, Any]:
        """Agent-specific state fields"""
        return {
            "objective": None,
            "data_path": None,
            "optimization_history": [],
            "current_config": None,
            "data_points_seen": 0,
            "experimental_budget": None,
        }
        
    def _load_history(self) -> List[Dict]:
        if self.history_file.exists():
            with open(self.history_file, 'r') as f: return json.load(f)
        return []

    def _save_history(self, entry: Dict):
        history = self._load_history()
        history.append(entry)
        with open(self.history_file, 'w') as f: json.dump(history, f, indent=2)

    def _validate_config(self, config: Dict) -> Dict:
        clean = config.copy()
        m_conf = clean.get("model_config", {})
        if m_conf.get("kernel") not in ["matern_2.5", "matern_1.5", "rbf"]:
            logging.warning(f"Invalid kernel '{m_conf.get('kernel')}', defaulting to 'matern_2.5'")
            m_conf["kernel"] = "matern_2.5"
        if m_conf.get("noise") not in ["fixed_low", "learnable", "high_noise"]:
            logging.warning(f"Invalid noise '{m_conf.get('noise')}', defaulting to 'fixed_low'")
            m_conf["noise"] = "fixed_low"
        clean["model_config"] = m_conf
        return clean

    # =====================================================================
    # Acquisition Landscape Summarization (for constrained batch planning)
    # =====================================================================

    def _summarize_acquisition_landscape(
        self,
        optimizer,
        input_cols: List[str],
        input_bounds: List[List[float]],
        is_moo: bool = False,
        n_regions: int = 15,
        grid_resolution: int = 40
    ) -> str:
        """
        Evaluate the acquisition function on a dense grid, cluster high-value 
        regions, and return a markdown summary table for LLM consumption.
        
        The summary gives the LLM a ranked "menu" of where the model expects the 
        most value, so it can map these regions onto physical constraints.
        
        Args:
            optimizer: Fitted optimizer object (from bo_tools.get_optimizer)
            input_cols: List of input parameter names
            input_bounds: List of [min, max] per parameter
            is_moo: Whether this is multi-objective optimization
            n_regions: Number of top regions to report
            grid_resolution: Points per dimension for grid evaluation
            
        Returns:
            Markdown-formatted string with ranked regions table
        """
        n_dims = len(input_cols)
        
        # 1. Build evaluation grid
        #    Full grid for low dimensions, Latin Hypercube for high dimensions
        if n_dims <= 3:
            axes = [
                np.linspace(bounds[0], bounds[1], grid_resolution) 
                for bounds in input_bounds
            ]
            mesh = np.meshgrid(*axes, indexing='ij')
            grid_points = np.column_stack([m.ravel() for m in mesh])
        else:
            # Latin Hypercube Sampling — cap total evaluations
            n_samples = min(grid_resolution ** 2, 5000)
            grid_points = np.random.rand(n_samples, n_dims)
            for d in range(n_dims):
                lo, hi = input_bounds[d]
                grid_points[:, d] = lo + grid_points[:, d] * (hi - lo)
        
        # 2. Evaluate acquisition function at all grid points
        #    Try direct acquisition evaluation first, then fall back to variance
        try:
            acq_values = optimizer.evaluate_acquisition(grid_points)
        except (AttributeError, NotImplementedError):
            try:
                _, acq_values = optimizer.predict(grid_points)
                print("    - ℹ️  Using posterior variance as acquisition proxy")
            except Exception as e:
                print(f"    - ⚠️  Cannot evaluate acquisition landscape: {e}")
                return self._fallback_landscape_summary(optimizer, input_cols, input_bounds)
        
        if acq_values is None or len(acq_values) == 0:
            return "Acquisition landscape evaluation returned no data."
        
        acq_values = np.array(acq_values).ravel()
        
        # 3. Cluster into regions
        regions = self._cluster_acquisition_regions(
            grid_points, acq_values, input_cols, input_bounds, n_regions
        )
        
        # 4. Format as markdown table
        header_cols = " | ".join(input_cols)
        header = f"| Rank | {header_cols} | Acq. Value | Spread | Notes |"
        separator = "|" + "|".join(["---"] * (len(input_cols) + 4)) + "|"
        
        rows = []
        for i, region in enumerate(regions):
            param_strs = " | ".join(
                f"{region['center'][j]:.4f}" for j in range(len(input_cols))
            )
            spread_str = ", ".join(
                f"{s:.3f}" for s in region.get('spread', [0.0] * len(input_cols))
            )
            notes = region.get('notes', '')
            rows.append(
                f"| {i+1} | {param_strs} | {region['acq_value']:.5f} | {spread_str} | {notes} |"
            )
        
        table = header + "\n" + separator + "\n" + "\n".join(rows)
        
        summary = f"""### Acquisition Landscape Summary
Total grid points evaluated: {len(grid_points)}
Number of dimensions: {n_dims}
Acquisition value range: [{acq_values.min():.5f}, {acq_values.max():.5f}]

#### Top {len(regions)} Regions (ranked by acquisition value)
{table}

**Interpretation:** Higher acquisition value = the model expects more information gain 
or improvement from sampling that region. "Spread" indicates how broad the high-value 
zone is around each center (per parameter). Wider spread = more forgiving placement.
"""
        return summary

    def _cluster_acquisition_regions(
        self,
        grid_points: np.ndarray,
        acq_values: np.ndarray,
        input_cols: List[str],
        input_bounds: List[List[float]],
        n_regions: int
    ) -> List[Dict[str, Any]]:
        """
        Identify distinct high-value regions via greedy peak selection 
        with exclusion zones. Prevents the LLM from seeing a table of 
        near-duplicate points that all cluster in one area.
        
        Args:
            grid_points: (N, D) array of evaluated points
            acq_values: (N,) array of acquisition values
            input_cols: Parameter names (for boundary detection notes)
            input_bounds: Parameter bounds
            n_regions: Max number of regions to return
            
        Returns:
            List of region dicts with center, acq_value, spread, notes
        """
        n_dims = len(input_cols)
        
        # Normalize coordinates to [0, 1] for distance computation
        bounds_array = np.array(input_bounds)
        ranges = bounds_array[:, 1] - bounds_array[:, 0]
        ranges[ranges == 0] = 1.0
        normalized = (grid_points - bounds_array[:, 0]) / ranges
        
        # Minimum separation between region centers (in normalized space)
        min_separation = 0.15
        
        sorted_idx = np.argsort(-acq_values)
        
        regions = []
        selected_centers = []
        
        for idx in sorted_idx:
            if len(regions) >= n_regions:
                break
            
            candidate = normalized[idx]
            
            # Check distance to already-selected centers
            too_close = False
            for center in selected_centers:
                dist = np.sqrt(np.sum((candidate - center) ** 2))
                if dist < min_separation:
                    too_close = True
                    break
            
            if too_close:
                continue
            
            selected_centers.append(candidate)
            
            # Compute spread: std of nearby high-value points
            distances = np.sqrt(np.sum((normalized - candidate) ** 2, axis=1))
            high_value_mask = (distances < min_separation * 2) & (acq_values > acq_values[idx] * 0.5)
            
            if high_value_mask.sum() > 1:
                spread = np.std(grid_points[high_value_mask], axis=0).tolist()
            else:
                spread = [0.0] * n_dims
            
            # Boundary detection
            center_raw = grid_points[idx]
            notes_parts = []
            for d in range(n_dims):
                lo, hi = input_bounds[d]
                param_range = hi - lo
                if param_range > 0:
                    if (center_raw[d] - lo) / param_range < 0.05:
                        notes_parts.append(f"{input_cols[d]} at lower bound")
                    elif (hi - center_raw[d]) / param_range < 0.05:
                        notes_parts.append(f"{input_cols[d]} at upper bound")
            
            regions.append({
                'center': grid_points[idx].tolist(),
                'acq_value': float(acq_values[idx]),
                'spread': spread,
                'notes': "; ".join(notes_parts) if notes_parts else ""
            })
        
        return regions

    def _fallback_landscape_summary(self, optimizer, input_cols, input_bounds) -> str:
        """
        Minimal fallback when acquisition evaluation is unavailable.
        The LLM will rely on unconstrained suggestions and data summary instead.
        """
        return (
            "### Acquisition Landscape Summary\n"
            "⚠️ Direct acquisition function evaluation not available for this optimizer.\n"
            "Use the unconstrained BO suggestions and data summary below "
            "to design the constrained batch.\n"
        )

    # =====================================================================
    # Constrained Batch Planning
    # =====================================================================

    def _plan_constrained_batch(
        self,
        objective_text: str,
        input_cols: List[str],
        input_bounds: List[List[float]],
        batch_size: int,
        acq_summary: str,
        physical_constraints: str,
        unconstrained_recommendations: List[Dict[str, float]],
        data_summary_str: str,
        current_best: Dict[str, float],
        current_best_value: Dict[str, float],
        budget_ctx: Dict[str, Any],
        is_moo: bool = False,
        pareto_front: Optional[List[Dict]] = None
    ) -> Tuple[Optional[List[Dict[str, float]]], Optional[Dict[str, Any]], Optional[str]]:
        """
        Use LLM to design a physically constrained experiment batch informed by 
        the acquisition landscape.
        
        Args:
            objective_text: Scientific optimization objective
            input_cols: Parameter names
            input_bounds: Parameter bounds
            batch_size: Number of experiments to design
            acq_summary: Markdown summary from _summarize_acquisition_landscape
            physical_constraints: Natural language constraint description
            unconstrained_recommendations: Standard BO output (for reference/fallback)
            data_summary_str: df.describe() as markdown
            current_best: Best parameters found so far
            current_best_value: Best objective value(s) found so far
            budget_ctx: Budget context from _compute_budget_context
            is_moo: Multi-objective flag
            pareto_front: Pareto front points for MOO
            
        Returns:
            Tuple of (recommendations_list, metadata_dict, error_string_or_None)
        """
        prompt_template = BO_CONSTRAINED_BATCH_PROMPT_MOO if is_moo else BO_CONSTRAINED_BATCH_PROMPT
        
        prompt_parts = [
            prompt_template,
            f"## Optimization Objective\n{objective_text}",
            f"## Batch Size\n{batch_size} experiments to design",
            f"\n## REQUIRED Parameter Names (use these EXACT keys in params)\n"
            f"{json.dumps(input_cols)}\n"
            f"Every experiment in the batch must have ALL of these keys in its \"params\" dict. "
            f"Use these exact strings — do not rename, abbreviate, or expand them.",
            f"\n## Parameter Bounds\n" + "\n".join(
                f"- {col}: [{bounds[0]}, {bounds[1]}]" 
                for col, bounds in zip(input_cols, input_bounds)
            ),
            f"\n## Acquisition Landscape\n{acq_summary}",
            f"\n## Physical Constraints\n{physical_constraints}",
        ]
        
        # Budget context for constrained planner
        if budget_ctx["budget_phase"] != "unlimited":
            if budget_ctx["budget_phase"] in ("final_shot", "critical"):
                budget_block = (
                    f"\n## ⚠️ Experimental Budget — CRITICAL\n"
                    f"- Remaining iterations (including this batch): {budget_ctx['budget_total']}\n"
                    f"- Campaign phase: **{budget_ctx['budget_phase']}**\n"
                    f"- {budget_ctx['budget_guidance']}\n"
                    f"\n**THIS OVERRIDES DESIGN PRINCIPLE #1 (Maximize Coverage).**\n"
                    f"This is the LAST batch. Do NOT spread experiments uniformly across "
                    f"the parameter space. Instead:\n"
                    f"1. **Concentrate ≥60% of experiments** in the top 3-5 acquisition regions. "
                    f"These are the regions most likely to contain the optimum.\n"
                    f"2. **IMPORTANT — Include the predicted optimum.** The acquisition function "
                    f"assigns LOW values to already-observed locations (because uncertainty is low "
                    f"there). But the Current Best parameters (see below) and the GP-predicted peak "
                    f"are still the most promising locations. Allocate 3-5 experiments AT or very "
                    f"near the Current Best parameters (snapped to feasible values). This is "
                    f"essential because: (a) confirming reproducibility of the best result has "
                    f"high scientific value, and (b) the true optimum may coincide with an observed "
                    f"point that the acquisition function undervalues.\n"
                    f"3. **Do NOT allocate experiments** to low-acquisition regions just for "
                    f"coverage. Every experiment in a low-value region is wasted.\n"
                    f"4. **Non-uniform parameter allocation is expected.** If the acquisition "
                    f"landscape peaks at specific parameter combinations, most experiments should "
                    f"cluster there — not be evenly distributed across all feasible levels.\n"
                    f"5. Look at the Acquisition Landscape table above. The Acq. Value column "
                    f"tells you where to concentrate. Regions ranked 1-5 should get the bulk "
                    f"of the experiments.\n"
                    f"6. **Allocation guideline:** ~5 experiments replicating current best, "
                    f"~60% of remaining on top acquisition regions, ~40% of remaining on "
                    f"next-best regions."
                )
            else:
                budget_block = (
                    f"\n## Experimental Budget\n"
                    f"- Remaining experiments (including this batch): {budget_ctx['budget_total']}\n"
                    f"- Campaign phase: **{budget_ctx['budget_phase']}**\n"
                    f"- {budget_ctx['budget_guidance']}\n"
                    f"\n**Budget implication for batch design:** "
                    f"Balance coverage with exploitation based on remaining budget."
                )
            prompt_parts.append(budget_block)
        
        if is_moo and pareto_front:
            prompt_parts.append(
                f"\n## Current Pareto Front ({len(pareto_front)} points)\n"
                f"{json.dumps(pareto_front[:20], indent=2)}"
            )
        elif current_best:
            prompt_parts.append(
                f"\n## Current Best Parameters\n{json.dumps(current_best)}"
            )
            if current_best_value:
                prompt_parts.append(
                    f"\n## Current Best Result\n{json.dumps(current_best_value)}"
                )
        
        prompt_parts.append(
            f"\n## Unconstrained BO Suggestions (for reference)\n"
            f"{json.dumps(unconstrained_recommendations, indent=2)}"
        )
        prompt_parts.append(f"\n## Data Summary\n{data_summary_str}")
        
        print(f"  - 🏗️ BO Agent: Planning constrained batch ({batch_size} experiments)...")
        
        max_retries = 3
        last_error = None
        
        for attempt in range(1, max_retries + 1):
            try:
                if attempt > 1:
                    print(f"  - 🔄 Retry {attempt}/{max_retries}...")
                
                resp = self.model.generate_content(
                    prompt_parts, 
                    generation_config=self.generation_config
                )
                constrained_batch, parse_error = parse_json_from_response(resp)
                
                if parse_error:
                    last_error = f"JSON parse error: {parse_error}"
                    logging.warning(
                        f"Constrained batch attempt {attempt}: {last_error}"
                    )
                    continue
                
                if not constrained_batch or "batch" not in constrained_batch:
                    last_error = "LLM response missing 'batch' key"
                    logging.warning(
                        f"Constrained batch attempt {attempt}: {last_error}"
                    )
                    continue
                
                batch_items = constrained_batch["batch"]
                
                if not batch_items:
                    last_error = "LLM returned empty batch"
                    logging.warning(
                        f"Constrained batch attempt {attempt}: {last_error}"
                    )
                    continue
                
                # Validate and extract recommendations
                recommendations = []
                validation_errors = []
                
                for i, item in enumerate(batch_items):
                    params = item.get("params", {})
                    
                    if not params:
                        validation_errors.append(f"Experiment {i+1}: missing params")
                        continue
                    
                    # Check all input columns present
                    missing_cols = [c for c in input_cols if c not in params]
                    if missing_cols:
                        validation_errors.append(
                            f"Experiment {i+1}: missing columns {missing_cols}"
                        )
                        continue
                    
                    # Check values within bounds (with tolerance for constraint snapping)
                    tolerance = 0.01
                    for col, bounds in zip(input_cols, input_bounds):
                        val = float(params[col])
                        param_range = bounds[1] - bounds[0]
                        tol = param_range * tolerance if param_range > 0 else 0.01
                        if val < bounds[0] - tol or val > bounds[1] + tol:
                            validation_errors.append(
                                f"Experiment {i+1}: {col}={val} outside bounds "
                                f"[{bounds[0]}, {bounds[1]}]"
                            )
                    
                    rec = {col: float(params[col]) for col in input_cols}
                    recommendations.append(rec)
                
                if validation_errors:
                    for err in validation_errors:
                        print(f"    - ⚠️  {err}")
                
                if not recommendations:
                    last_error = f"No valid experiments in batch. Errors: {validation_errors}"
                    logging.warning(
                        f"Constrained batch attempt {attempt}: {last_error}"
                    )
                    continue
                
                # Warn if we got fewer than requested — do NOT pad with unconstrained
                # values since they won't respect discrete constraints
                if len(recommendations) < batch_size:
                    shortfall = batch_size - len(recommendations)
                    print(
                        f"    - ⚠️  Got {len(recommendations)}/{batch_size} valid experiments. "
                        f"    {shortfall} slots unfilled (unconstrained padding disabled)."
                    )
                
                metadata = {
                    "allocation_strategy": constrained_batch.get("allocation_strategy", ""),
                    "coverage_summary": constrained_batch.get("coverage_summary", ""),
                    "trade_offs": constrained_batch.get("trade_offs", ""),
                    "validation_points": constrained_batch.get("validation_points", ""),
                    "pareto_strategy": constrained_batch.get("pareto_strategy", ""),
                    "valid_count": len(recommendations),
                    "requested_count": batch_size,
                    "shortfall": max(0, batch_size - len(recommendations)),
                    "validation_errors": validation_errors if validation_errors else None,
                    "attempts": attempt,
                }
                
                print(f"  - ✅ Constrained batch planned: {len(recommendations)} experiments"
                      + (f" (attempt {attempt})" if attempt > 1 else ""))
                return recommendations[:batch_size], metadata, None
                
            except Exception as e:
                last_error = str(e)
                logging.warning(
                    f"Constrained batch attempt {attempt} exception: {e}"
                )
                if attempt == max_retries:
                    logging.error(
                        f"Constrained batch planning failed after {max_retries} attempts",
                        exc_info=True
                    )
        
        return None, None, f"Failed after {max_retries} attempts. Last error: {last_error}"

    # =====================================================================
    # Main Optimization Loop
    # =====================================================================

    def run_optimization_loop(self, data_path: str, objective_text: str, 
                             input_cols: List[str], input_bounds: List[List[float]], 
                             target_cols: List[str], output_dir: str = "./bo_artifacts",
                             batch_size: int = 1,
                             experimental_budget: Optional[int] = None,
                             physical_constraints: Optional[str] = None,
                             save_acq: bool = True,
                             plot_acq: bool = True) -> Dict[str, Any]:
        """
        Run one iteration of the Bayesian Optimization loop.
        
        Args:
            data_path: Path to the data file (.xlsx or .csv).
            objective_text: Natural language description of the optimization goal.
            input_cols: List of input column names.
            input_bounds: List of [min, max] bounds for each input.
            target_cols: List of target/objective column names.
            output_dir: Directory for saving artifacts.
            batch_size: Number of candidates to recommend. When physical_constraints 
                is provided, the constrained planner uses this as the target number
                of experiments to design on the plate.
            experimental_budget: Optional number of remaining experiments (iterations) 
                in the campaign, INCLUDING this one. Controls the 
                exploration-vs-exploitation balance:
                - None (default): No budget constraint; standard behavior.
                - 1: Final experiment — forces pure exploitation.
                - 2-3: Critical budget — strongly favors exploitation.
                - Higher values: Scaled guidance based on fraction of total 
                  campaign completed.
                The budget is passed to the LLM as strategic context in the 
                strategy configuration and constrained batch planning prompts.
                Note: this counts optimization iterations (calls to this method), 
                not individual experiments. A batch_size=10 call with 
                experimental_budget=2 means 2 more calls (up to 20 experiments).
            physical_constraints: Optional natural language description of physical 
                experimental constraints. When provided, the agent evaluates the 
                acquisition landscape and uses LLM reasoning to design a batch that 
                maximizes information gain while respecting the constraints. Examples:
                - "96-well plate: rows share temperature (8 values), columns share pH (12 values)"
                - "Only 5 catalyst concentrations available: 0.1, 0.5, 1.0, 2.0, 5.0 mM"
                - "Reactor zones A,B share cooling; C,D share heating. Max 4 temps total."
                When None, standard unconstrained BO is used (original behavior).
            save_acq: If True, saves acquisition function landscape data to .npz file.
                Supported for single-objective only; ignored for multi-objective.
            plot_acq: If True, generates and saves a plot of the acquisition function.
                Supported for single-objective only; ignored for multi-objective.
            
        Returns:
            Dict with status, recommendations, strategy, plot paths, budget context,
            and optionally acquisition function plot/data paths (single-objective only)
            and constrained planning metadata (when physical_constraints provided).
        """
        if output_dir is None:
            output_dir = str(self.output_dir)
        
        Path(output_dir).mkdir(exist_ok=True, parents=True)
        
        # Initialize state
        self._init_state(objective=objective_text, data_path=data_path)
        
        # 1. Load Data
        try:
            df = pd.read_excel(data_path) if data_path.endswith('.xlsx') else pd.read_csv(data_path)
            for col in input_cols + target_cols:
                if col not in df.columns: 
                    return {"error": f"Column '{col}' not found in data."}
            X = df[input_cols].values
            y = df[target_cols].values
            
            # Track data points
            self.state["data_points_seen"] = len(df)
            
        except Exception as e:
            return {"error": f"Data load failed: {e}"}

        is_moo = len(target_cols) > 1
        history = self._load_history()

        # Compute budget context
        budget_ctx = _compute_budget_context(experimental_budget, history)
        self.state["experimental_budget"] = experimental_budget
        
        if budget_ctx["budget_phase"] != "unlimited":
            print(
                f"  - 💰 Budget: {budget_ctx['budget_total']} iterations remaining "
                f"(phase: {budget_ctx['budget_phase']}, "
                f"{budget_ctx['budget_fraction_remaining']:.0%} of campaign left)"
            )

        # 2. Configure Strategy (LLM)
        trend_context = f"Last 5 strategies: {[h.get('config', {}).get('rationale', 'N/A') for h in history[-5:]]}" if history else "No history."
        
        prompt_tmpl = BO_CONFIG_MOO_PROMPT if is_moo else BO_CONFIG_SOO_PROMPT
        prompt_parts = [
            prompt_tmpl,
            f"Objective: {objective_text}",
            f"Constraint: Fixed Batch Size = {batch_size}",
            f"Meta-Data Trend: {trend_context}",
            f"Data Summary:\n{df.describe().to_markdown()}"
        ]
        
        # Budget context for strategy LLM
        prompt_parts.append(
            f"\n**Experimental Budget:**\n{budget_ctx['budget_guidance']}\n"
            f"Steps completed so far: {budget_ctx['steps_completed']}. "
            f"Data points in dataset: {len(df)}."
        )
        
        # Inform strategy LLM about constraints (for better acq strategy selection)
        if physical_constraints:
            prompt_parts.append(
                f"\n**Physical Constraints (informational for strategy selection):**\n"
                f"{physical_constraints}\n"
                f"Note: A separate step will handle constraint-aware batch design. "
                f"Focus on selecting the best kernel, noise, and acquisition strategy."
            )
        
        print(f"  - 🤖 BO Agent: Configuring strategy (Batch={batch_size})...")
        resp = self.model.generate_content(prompt_parts, generation_config=self.generation_config)
        raw_config, parse_error = parse_json_from_response(resp)
        if parse_error: 
            return {"error": f"JSON Error: {parse_error}"}
        
        valid_config = self._validate_config(raw_config)
        valid_config["batch_size"] = int(batch_size)
        
        # Store current config in state
        self.state["current_config"] = valid_config

        # 3. Fit Model
        optimizer = get_optimizer(is_moo=is_moo)
        optimizer.fit(
            X, y, 
            bounds=input_bounds, 
            model_config=valid_config["model_config"],
            feature_names=input_cols
        )

        # 4. Recommend (Unconstrained)
        acq_conf = valid_config.get("acquisition_strategy", {})
        strategy_name = acq_conf.get("type", "pareto" if is_moo else "log_ei")
        
        print(f"  - 🚀 Optimizing {strategy_name}...")
        next_x_batch = optimizer.recommend(
            n_candidates=batch_size,
            strategy=strategy_name,
            params=acq_conf.get("params", {})
        )

        # Build unconstrained recommendations (used as reference and fallback)
        unconstrained_recommendations = []
        for row in next_x_batch:
            unconstrained_recommendations.append({k: float(v) for k, v in zip(input_cols, row)})

        # 4b. Constrained Batch Planning
        constrained_metadata = None
        
        if physical_constraints:
            print(f"  - 📐 Physical constraints detected. Generating acquisition landscape...")
            
            acq_summary = self._summarize_acquisition_landscape(
                optimizer=optimizer,
                input_cols=input_cols,
                input_bounds=input_bounds,
                is_moo=is_moo
            )
            
            # Get current best for context
            if is_moo:
                current_best = {}
                current_best_value = {}
                try:
                    pareto_indices = optimizer.get_pareto_indices() if hasattr(optimizer, 'get_pareto_indices') else []
                    pareto_front = [
                        {**{k: float(v) for k, v in zip(input_cols, X[i])},
                         **{k: float(v) for k, v in zip(target_cols, y[i])}}
                        for i in pareto_indices
                    ] if len(pareto_indices) > 0 else []
                except Exception:
                    pareto_front = []
            else:
                best_idx = int(np.argmax(y[:, 0]))
                current_best = {k: float(v) for k, v in zip(input_cols, X[best_idx])}
                current_best_value = {target_cols[0]: float(y[best_idx, 0])}
                pareto_front = None
            
            constrained_recs, constrained_metadata, constraint_error = self._plan_constrained_batch(
                objective_text=objective_text,
                input_cols=input_cols,
                input_bounds=input_bounds,
                batch_size=batch_size,
                acq_summary=acq_summary,
                physical_constraints=physical_constraints,
                unconstrained_recommendations=unconstrained_recommendations,
                data_summary_str=df.describe().to_markdown(),
                current_best=current_best,
                current_best_value=current_best_value,
                budget_ctx=budget_ctx,
                is_moo=is_moo,
                pareto_front=pareto_front
            )
            
            if constraint_error:
                print(f"  - ⚠️  Constrained planning failed: {constraint_error}")
                print(f"  - ↩️  Falling back to unconstrained recommendations")
                recommendations = unconstrained_recommendations
            else:
                recommendations = constrained_recs
                next_x_batch = np.array([
                    [rec[col] for col in input_cols] 
                    for rec in recommendations
                ])
                print(f"  - ✅ Using constrained batch ({len(recommendations)} experiments)")
        else:
            recommendations = unconstrained_recommendations

        # 5. Diagnostics
        step_num = len(history) + 1
        plot_path = f"{output_dir}/step_{step_num}.png"
        if is_moo:
            optimizer.generate_diagnostics(save_path=plot_path)
        else:
            optimizer.generate_diagnostics(next_x_batch, df[target_cols[0]].values.tolist(), save_path=plot_path)

        # 5b. Acquisition Function Plot & Data (SOO only)
        acq_plot_path = None
        acq_data_path = None
        
        if not is_moo and plot_acq:
            print("  - 📊 BO Agent: Plotting acquisition function...")
            try:
                acq_plot_path = f"{output_dir}/acq_step_{step_num}.png"
                optimizer.plot_acquisition(
                    candidate_x=next_x_batch,
                    save_path=acq_plot_path
                )
                print(f"  - 💾 Acquisition plot saved: {acq_plot_path}")
            except RuntimeError as e:
                logging.warning(f"Could not plot acquisition function: {e}")
                acq_plot_path = None
        
        if not is_moo and save_acq:
            print("  - 💾 BO Agent: Saving acquisition function data...")
            try:
                acq_data_path = f"{output_dir}/acq_data_step_{step_num}.npz"
                acq_meta = optimizer.save_acquisition_data(
                    candidate_x=next_x_batch,
                    save_path=acq_data_path
                )
                print(f"  - 💾 Acquisition data saved: {acq_data_path} "
                      f"(keys: {len(acq_meta['keys'])})")
            except RuntimeError as e:
                logging.warning(f"Could not save acquisition data: {e}")
                acq_data_path = None

        # 6. Inspection
        print("  - 👀 BO Agent: Inspecting visuals...")
        visual_prompt = BO_VISUAL_INSPECTION_MOO_PROMPT if is_moo else BO_VISUAL_INSPECTION_PROMPT
        try:
            img = PIL_Image.open(plot_path)
            insp_resp = self.model.generate_content([visual_prompt, img], generation_config=self.generation_config)
            inspection, _ = parse_json_from_response(insp_resp)
        except Exception as e:
            inspection = {"status": "skipped", "reason": str(e)}

        # 7. Save History
        log_entry = {
            "step": step_num, 
            "config": valid_config, 
            "recommendation_batch": recommendations, 
            "inspection": inspection,
            "budget": budget_ctx,
        }
        # Include acquisition paths in history when available
        if acq_plot_path or acq_data_path:
            log_entry["acquisition"] = {
                "strategy": strategy_name,
                "plot_path": acq_plot_path,
                "data_path": acq_data_path,
            }
        # Include constrained planning metadata in history
        if constrained_metadata:
            log_entry["constrained_planning"] = constrained_metadata
        if physical_constraints:
            log_entry["physical_constraints"] = physical_constraints
            
        self._save_history(log_entry)

        # 8. Output
        if batch_size > 1:
            batch_csv = f"{output_dir}/batch_step_{step_num}.csv"
            pd.DataFrame(recommendations).to_csv(batch_csv, index=False)
            print(f"  - 💾 Batch saved: {batch_csv}")

        result = {
            "status": "success",
            "next_parameters": recommendations[0] if batch_size == 1 else recommendations,
            "strategy": valid_config,
            "plot_path": plot_path,
            "budget": budget_ctx,
        }
        if acq_plot_path:
            result["acq_plot_path"] = acq_plot_path
        if acq_data_path:
            result["acq_data_path"] = acq_data_path
        # Include constrained planning info in result
        if constrained_metadata:
            result["constrained_planning"] = constrained_metadata
        if physical_constraints:
            result["constraint_aware"] = True
        
        # Log this action to state
        self._log_action(
            action="run_optimization_loop",
            input_ctx={
                "data_path": data_path,
                "input_cols": input_cols,
                "target_cols": target_cols,
                "batch_size": batch_size,
                "experimental_budget": experimental_budget,
                "budget_phase": budget_ctx["budget_phase"],
                "physical_constraints": physical_constraints is not None,
                "save_acq": save_acq,
                "plot_acq": plot_acq,
            },
            result=result,
            rationale=valid_config.get("rationale")
        )
        
        return result


================================================
FILE: excel_parser.py
================================================
# planning_agents/excel_parser.py
import pandas as pd
import json
from pathlib import Path
from typing import Dict, Any, List, Optional

# If a file has this many rows or fewer, we embed it all in one chunk.
SMALL_FILE_THRESHOLD = 150

def parse_adaptive_excel(file_path: str, context_path: Optional[str] = None, row_chunk_size: int = 200) -> List[Dict[str, Any]]:
    """
    Reads a Data file (Excel or CSV) and an optional JSON context file.    
    
    Adaptive Strategy:
    - If rows <= SMALL_FILE_THRESHOLD:
      Creates ONE chunk containing the summary, definitions, AND the full data table.
    - If rows > SMALL_FILE_THRESHOLD:
      Creates TWO types of chunks:
      1. A single "summary chunk" with statistical info.
      2. Multiple "data chunks" by batching the rows.
    """
    path_obj = Path(file_path)
    print(f"  - Processing Data File '{path_obj.name}' with adaptive strategy...")
    all_chunks = []

    try:
        # --- 1. Robust Context Loading ---
        context = {}
        if context_path and Path(context_path).exists():
            try:
                with open(context_path, 'r', encoding='utf-8') as f:
                    context = json.load(f)
            except Exception as e:
                print(f"    - ⚠️ Warning: Could not load context file {context_path}: {e}")

        # --- 2. Load the Data File (CSV vs Excel detection) ---
        try:
            suffix = path_obj.suffix.lower()
            if suffix == '.csv':
                df = pd.read_csv(file_path)
            elif suffix in ['.xlsx', '.xls']:
                df = pd.read_excel(file_path)
            else:
                print(f"    - ❌ Error: Unsupported file extension '{suffix}'")
                return []
        except ImportError:
            print("    - ❌ Error: 'pandas' or 'openpyxl' not installed. Please run: pip install pandas openpyxl")
            return []
        except Exception as e:
            print(f"    - ❌ Error reading file: {e}")
            return []
        
        total_rows = len(df)
        print(f"    - Loaded {total_rows} rows.")

        # --- 3. Base Content Construction ---
        
        description_parts = []
        
        # Get title: Use 'title' from context if present, else fallback to filename
        title = context.get('title', path_obj.stem)
        description_parts.append(f"### Experiment Data: {title}")
        
        # Get objective: Only add if present
        if context.get("objective"):
            description_parts.append(f"#### Objective\n{context['objective']}")

        # Get or create column definitions
        column_defs_dict = context.get('column_definitions')
        if not column_defs_dict:
            # Create dummy definitions from DataFrame column headers
            column_defs_dict = {str(header): "No definition provided." for header in df.columns}

        col_defs = "\n".join([f"- `{col}`: {desc}" for col, desc in column_defs_dict.items()])
        description_parts.append(f"#### Data Column Definitions\n{col_defs}")
        
        statistical_summary = df.describe().to_markdown() if not df.empty else "No statistical summary available."

        # --- 4. Adaptive Chunking Logic ---
        
        if total_rows <= SMALL_FILE_THRESHOLD:
            # --- STRATEGY A: Small File (One Rich Chunk) ---
            # print(f"    - File is small ({total_rows} rows). Creating one single, comprehensive chunk.")
            
            full_data_table = df.to_markdown(index=False)
            
            base_description = "\n\n".join(description_parts)
            
            combined_text = f"""
{base_description}

#### Statistical Summary
{statistical_summary}

#### Full Experimental Data ({total_rows} rows)
{full_data_table}
            """.strip()

            single_chunk = {
                'text': combined_text,
                'metadata': {
                    'source': file_path,
                    'context_source': context_path if context_path else "N/A",
                    'content_type': 'dataset_package', 
                    'page': 1 
                }
            }
            all_chunks.append(single_chunk)
            # print(f"    - ✅ Created 1 'dataset_package' chunk.")

        else:
            # --- STRATEGY B: Large File (Summary + Data Chunks) ---
            print(f"    - File is large ({total_rows} rows). Creating summary + batched data chunks.")
            
            # 4.1 Create the "Summary Chunk"
            base_description = "\n\n".join(description_parts)
            
            summary_text = f"""
{base_description}

#### Statistical Summary of {total_rows} Rows
{statistical_summary}
            """.strip()

            summary_chunk = {
                'text': summary_text,
                'metadata': {
                    'source': file_path,
                    'context_source': context_path if context_path else "N/A",
                    'content_type': 'dataset_summary',
                    'page': 1 
                }
            }
            all_chunks.append(summary_chunk)
            
            # 4.2 Create "Data Chunks" by batching rows
            num_batches = 0
            for i in range(0, total_rows, row_chunk_size):
                df_batch = df.iloc[i : i + row_chunk_size]
                markdown_table = df_batch.to_markdown(index=False)
                
                chunk_text = f"""
### {title}
#### Data Rows {i + 1} to {i + len(df_batch)}

{markdown_table}
                """.strip()
                
                data_chunk = {
                    'text': chunk_text,
                    'metadata': {
                        'source': file_path,
                        'context_source': context_path if context_path else "N/A",
                        'content_type': 'data_rows',
                        'start_row': i + 1,
                        'end_row': i + len(df_batch),
                        'page': 1 
                    }
                }
                all_chunks.append(data_chunk)
                num_batches += 1
            
            print(f"    - ✅ Created 1 summary + {num_batches} data chunks.")
        
        return all_chunks

    except Exception as e:
        print(f"    - ❌ Error processing data pair for '{file_path}': {e}")
        return []


================================================
FILE: html_generator.py
================================================
import json
import html
import re
from datetime import datetime
from typing import Dict, Any, List

class HTMLReportGenerator:
    def __init__(self, agent_state: Dict[str, Any]):
        self.state = agent_state
        self.title = "SciLink Research Report"
        self.generated_by = "Generated by SciLink Planning Agent"
        
    def _get_css(self) -> str:
        return """
        <style>
            :root { 
                --primary: #2563eb; 
                --bg: #f8fafc; 
                --card-bg: #ffffff; 
                --code-bg: #1e293b;
                --success-bg: #f0fdf4; --success-border: #bbf7d0; --success-text: #166534;
            }
            body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; line-height: 1.6; color: #334155; background: var(--bg); margin: 0; padding: 40px; }
            .container { max-width: 1000px; margin: 0 auto; }
            
            /* Header */
            header { background: var(--card-bg); padding: 30px; border-radius: 12px; box-shadow: 0 1px 3px rgba(0,0,0,0.1); margin-bottom: 40px; border-bottom: 4px solid var(--primary); }
            h1 { margin: 0; color: #1e293b; font-size: 2em; }
            .meta { color: #64748b; font-size: 0.9em; margin-top: 10px; font-family: monospace; }
            .objective { background: #eff6ff; border-left: 4px solid var(--primary); padding: 15px; margin-top: 20px; color: #1e40af; }
            
            /* Timeline Cards */
            .card { background: var(--card-bg); border-radius: 12px; padding: 0; margin-bottom: 40px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); overflow: hidden; }
            .card-header { padding: 15px 25px; background: #f1f5f9; border-bottom: 1px solid #e2e8f0; display: flex; justify-content: space-between; align-items: center; }
            .card-header h2 { margin: 0; font-size: 1.2em; color: #475569; }
            .badge { padding: 4px 10px; border-radius: 20px; font-size: 0.75em; font-weight: bold; text-transform: uppercase; letter-spacing: 0.05em; }
            .badge.plan { background: #dbeafe; color: #1e40af; }
            .badge.tea { background: #d1fae5; color: #065f46; }
            .card-body { padding: 25px; }

            /* Content Sections */
            .exp-block { margin-bottom: 30px; border-left: 3px solid #e2e8f0; padding-left: 20px; }
            .justification { background: #fffbeb; color: #92400e; padding: 10px; border-radius: 6px; font-style: italic; font-size: 0.95em; }
            
            pre { background: var(--code-bg); color: #e2e8f0; padding: 15px; border-radius: 8px; overflow-x: auto; font-family: 'Menlo', 'Monaco', monospace; font-size: 0.85em; }
            .result-box { margin-top: 20px; padding: 20px; background: var(--success-bg); border: 1px solid var(--success-border); border-radius: 8px; color: var(--success-text); }
            
            /* TEA Specific */
            .tea-list li { margin-bottom: 8px; }
            .cost { color: #b91c1c; }
            .benefit { color: #047857; }

            /* TABLE STYLING */
            .styled-table {
                border-collapse: collapse;
                margin: 25px 0;
                font-size: 0.9em;
                font-family: sans-serif;
                min-width: 400px;
                width: 100%;
                box-shadow: 0 0 20px rgba(0, 0, 0, 0.05);
                border-radius: 8px;
                overflow: hidden;
            }
            .styled-table thead tr {
                background-color: var(--primary);
                color: #ffffff;
                text-align: left;
            }
            .styled-table th, .styled-table td {
                padding: 12px 15px;
            }
            .styled-table tbody tr {
                border-bottom: 1px solid #dddddd;
            }
            .styled-table tbody tr:nth-of-type(even) {
                background-color: #f3f3f3;
            }
            .styled-table tbody tr:last-of-type {
                border-bottom: 2px solid var(--primary);
            }
            .styled-table tbody tr:hover {
                background-color: #e2e8f0;
            }
        </style>
        """

    def _markdown_to_html(self, text: str) -> str:
        """
        Parses text and converts Markdown tables to styled HTML tables.
        Also handles basic line breaks.
        """
        if not text: return ""
        
        lines = text.split('\n')
        html_output = []
        in_table = False
        table_buffer = []

        def render_table(buffer):
            if not buffer: return ""
            # Filter out divider rows (e.g., |---|---|)
            content_rows = [row for row in buffer if not re.match(r'^\s*\|?[\s\-\:|]+\|?\s*$', row)]
            
            if not content_rows: return ""

            html_table = '<table class="styled-table">'
            
            # Header
            header_cells = [c.strip() for c in content_rows[0].strip('|').split('|')]
            html_table += '<thead><tr>' + ''.join(f'<th>{html.escape(h)}</th>' for h in header_cells) + '</tr></thead>'
            
            # Body
            html_table += '<tbody>'
            for row in content_rows[1:]:
                cells = [c.strip() for c in row.strip('|').split('|')]
                # Handle cases where row might have fewer cells than header
                while len(cells) < len(header_cells): cells.append("")
                html_table += '<tr>' + ''.join(f'<td>{html.escape(c)}</td>' for c in cells) + '</tr>'
            
            html_table += '</tbody></table>'
            return html_table

        for line in lines:
            stripped = line.strip()
            # Check if line looks like a table row (starts/ends with pipe or contains pipes)
            if stripped.startswith('|') or (stripped.count('|') > 1 and '-' in lines[lines.index(line)+1] if lines.index(line)+1 < len(lines) else False):
                in_table = True
                table_buffer.append(stripped)
            else:
                if in_table:
                    # Flush table buffer
                    html_output.append(render_table(table_buffer))
                    table_buffer = []
                    in_table = False
                
                # Regular text line
                html_output.append(html.escape(line) + "<br>")

        # Flush any remaining table
        if in_table:
            html_output.append(render_table(table_buffer))

        return "\n".join(html_output)

    def _render_experiment(self, exp: Dict[str, Any], index: int) -> str:

        code_html = ""
        if "implementation_code" in exp:
            code_html = f"""
            <div style="margin-top:15px;">
                <strong>💻 Generated Code:</strong>
                <pre><code>{html.escape(exp['implementation_code'])}</code></pre>
            </div>
            """
        
        steps_html = ""
        raw_steps = exp.get('experimental_steps', [])
        
        if raw_steps:
             if isinstance(raw_steps, list):
                 merged_steps = []
                 table_buffer = []
                 
                 # Regex matches lines that have at least one internal pipe (e.g., "A | B")
                 # ^\s* : Start of string, optional whitespace
                 # \|?      : Optional starting pipe
                 # .*\|.* : Content containing at least one pipe in the middle
                 # \|?      : Optional ending pipe
                 # \s*$     : Optional whitespace, end of string
                 table_row_pattern = re.compile(r"^\s*\|?.*\|.*\|?\s*$")

                 for step in raw_steps:
                     s_str = str(step).strip()
                     
                     # Check if it looks like a table row
                     # We strictly require at least one pipe '|' to avoid merging normal text sentences
                     is_table_row = ('|' in s_str) and table_row_pattern.match(s_str)
                     
                     if is_table_row:
                         table_buffer.append(s_str)
                     else:
                         # If we hit a non-table line, flush the buffer first
                         if table_buffer:
                             merged_steps.append("\n".join(table_buffer))
                             table_buffer = []
                         merged_steps.append(s_str)
                 
                 # Flush any remaining table at the very end
                 if table_buffer:
                     merged_steps.append("\n".join(table_buffer))
                 
                 # Render the processed steps
                 steps_content = ""
                 for s in merged_steps:
                     rendered = self._markdown_to_html(s)
                     steps_content += f"<li>{rendered}</li>"
                 
                 steps_html = f"<ul>{steps_content}</ul>"
             else:
                 # Handle case where steps are just a single string block
                 steps_html = self._markdown_to_html(str(raw_steps))

        return f"""
        <div class="exp-block">
            <h4 style="color: #2563eb; margin-bottom: 5px;">Experiment {index}: {html.escape(exp.get('experiment_name', 'Unnamed'))}</h4>
            
            <div style="margin-bottom: 10px;">
                <strong>🎯 Hypothesis:</strong> 
                <div>{self._markdown_to_html(exp.get('hypothesis', ''))}</div>
            </div>
            
            <div class="justification">
                <strong>💡 Justification:</strong> 
                <div>{self._markdown_to_html(exp.get('justification', ''))}</div>
            </div>
            
            <div style="margin-top: 15px;">
                <strong>🧪 Steps:</strong>
                {steps_html}
            </div>
            {code_html}
        </div>
        """

    def _render_tea(self, plan: Dict[str, Any]) -> str:
        assess = plan.get('technoeconomic_assessment', {})
        def list_to_html(key, css_class):
            return "".join(f"<li class='{css_class}'>{html.escape(str(x))}</li>" for x in assess.get(key, []))
        
        # Summary might contain tables
        summary_html = self._markdown_to_html(assess.get('summary', ''))

        return f"""
        <div class="exp-block" style="border-left-color: #10b981;">
            <div class="justification" style="background: #ecfdf5; color: #065f46;">
                <strong>💰 Executive Summary:</strong><br>
                {summary_html}
            </div>
            <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px; margin-top: 20px;">
                <div><strong>💸 Key Cost Drivers:</strong><ul class="tea-list">{list_to_html('key_cost_drivers', 'cost')}</ul></div>
                <div><strong>📈 Potential Benefits:</strong><ul class="tea-list">{list_to_html('potential_benefits_or_revenue', 'benefit')}</ul></div>
            </div>
            <div style="margin-top: 15px;"><strong>⚠️ Economic Risks:</strong><ul>{list_to_html('economic_risks', '')}</ul></div>
        </div>
        """

    def generate(self, output_path: str):
        date_str = self.state.get('start_time', datetime.now().isoformat())
        
        full_history = self.state.get('plan_history', [])

        # Filter out superseded plans - they stay in JSON for transparency
        active_plans = [p for p in full_history if p.get('status') != 'superseded']

        finalized_plans = {}
        for plan in active_plans:
            iter_idx = plan.get('iteration', 0)
            finalized_plans[iter_idx] = plan

        sorted_plans = [finalized_plans[k] for k in sorted(finalized_plans.keys())]
        results = self.state.get('experimental_results', [])

        html_content = f"""
        <!DOCTYPE html><html><head><meta charset="UTF-8"><title>{self.title}</title>{self._get_css()}</head>
        <body><div class="container">
            <header>
                <h1>🔭 {self.title}</h1>
                <div class="meta">Session: {self.state.get('session_id', 'N/A')} &bull; Started: {date_str}</div>
                <div class="objective"><strong>Objective:</strong> {html.escape(self.state.get('objective', ''))}</div>
            </header>
        """

        for i, plan in enumerate(sorted_plans):
            step_num = i + 1 
            is_tea = "technoeconomic_assessment" in plan or plan.get("type") == "technoeconomic_analysis"
            badge_class = "tea" if is_tea else "plan"
            badge_text = "TECHNO-ECONOMIC ANALYSIS" if is_tea else "EXPERIMENTAL STRATEGY"
            
            if is_tea:
                content_html = self._render_tea(plan)
            else:
                content_html = "".join(self._render_experiment(exp, x+1) for x, exp in enumerate(plan.get('proposed_experiments', [])))

            html_content += f"""
            <div class="card">
                <div class="card-header">
                    <h2>Step {step_num}</h2>
                    <span class="badge {badge_class}">{badge_text}</span>
                </div>
                <div class="card-body">{content_html}"""

            if i < len(results):
                res_data = results[i].get('data_summary', '')
                try:
                    if isinstance(res_data, str) and (res_data.strip().startswith('{') or res_data.strip().startswith('[')):
                        res_data = json.dumps(json.loads(res_data), indent=2)
                except: pass
                
                # Apply markdown parser to results too, in case the result log contains a table
                formatted_result = self._markdown_to_html(str(res_data))
                
                html_content += f"""
                    <div class="result-box">
                        <div style="font-weight:bold; margin-bottom:10px;">📊 Results Received:</div>
                        <div style="font-family: monospace; font-size: 0.9em;">{formatted_result}</div>
                    </div>
                """

            html_content += "</div></div>"

        html_content += f"<footer><p>{self.generated_by}</p></footer></div></body></html>"

        try:
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(html_content)
            print(f"  - 📄 HTML Report updated: {output_path}")
        except Exception as e:
            print(f"  - ❌ Error writing HTML report: {e}")


================================================
FILE: ingestor.py
================================================
import os
from pathlib import Path
from typing import List, Dict, Any

from .pdf_parser import extract_pdf_two_pass, chunk_text
from .excel_parser import parse_adaptive_excel
from .parser_utils import get_files_from_directory


IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif'}

def extract_images(file_paths: List[str]) -> List[str]:
    """
    Scans directories or file lists specifically for images.
    Used to pass visual context to the Agent without embedding it in the Vector DB.
    """
    found_images = []
    if not file_paths:
        return found_images

    # Reuse the same expansion logic as ingest_files for consistency
    for f_path in file_paths:
        path_obj = Path(f_path)
        
        if path_obj.is_file():
            if path_obj.suffix.lower() in IMAGE_EXTENSIONS:
                found_images.append(str(path_obj))
                
        elif path_obj.is_dir():
            for root, _, files in os.walk(path_obj):
                for file in files:
                    if Path(file).suffix.lower() in IMAGE_EXTENSIONS:
                        found_images.append(str(Path(root) / file))
                        
    if found_images:
        print(f"  - 🖼️  Found {len(found_images)} images in input paths.")
        
    return found_images


def ingest_files(file_paths: List[str], is_code_mode: bool, code_chunk_size: int = 20000, repo_name: str = None) -> List[Dict[str, Any]]:
    """
    Recursively finds files and routes them to the 
    correct parser (PDF, Excel, or Text) based on extension.
    """
    chunks = []
    expanded_paths = []
    
    # 1. Expand directories
    if file_paths:
        for f_path in file_paths:
            path_obj = Path(f_path)
            if path_obj.is_dir():
                expanded_paths.extend(get_files_from_directory(f_path))
            else:
                expanded_paths.append(f_path)

    # 2. Process each file
    for f_path in expanded_paths:
        path = Path(f_path)
        if not path.exists():
            print(f"  - ⚠️ File not found: {f_path}")
            continue
        
        file_ext = path.suffix.lower()
        
        # --- ROUTE A: PDF Documents ---
        if file_ext == '.pdf':
            pdf_chunks = extract_pdf_two_pass(f_path)
            if is_code_mode:
                for c in pdf_chunks: c['metadata']['content_type'] = 'code'
            chunks.extend(pdf_chunks)

        # --- ROUTE B: Structured Data (Excel/CSV) ---
        elif file_ext in ['.xlsx', '.xls', '.csv'] and not is_code_mode:
            print(f"  - 📊 Auto-detected Data File: {path.name}")
            potential_meta = path.with_suffix('.json')
            meta_context = str(potential_meta) if potential_meta.exists() else None
            
            try:
                data_chunks = parse_adaptive_excel(str(path), context_path=meta_context)
                chunks.extend(data_chunks)
            except Exception as e:
                print(f"    - ❌ Error parsing data file: {e}")

        elif file_ext in IMAGE_EXTENSIONS:
            continue # Skip, handled by extract_images
        
        # --- ROUTE C: Text & Code Files ---
        elif file_ext in ['.txt', '.md', '.py', '.java', '.r', '.cpp', '.h', '.js', '.json', '.csv']:
            try:
                with path.open('r', encoding='utf-8') as f: content = f.read()
                
                if is_code_mode:
                    formatted_text = f"CODE FILE: {path.name}\n\n```\n{content}\n```"
                    chunk_sz = code_chunk_size  # Passed as argument now
                    ctype = 'code'
                else:
                    formatted_text = f"DOCUMENT: {path.name}\n\n{content}"
                    chunk_sz = 1000
                    ctype = 'text'
                
                new_chunks = chunk_text(formatted_text, page_num=1, chunk_size=chunk_sz, overlap=50)
                for c in new_chunks: 
                    c['metadata']['content_type'] = ctype
                    c['metadata']['source'] = f_path
                    if repo_name: c['metadata']['repo_name'] = repo_name
                chunks.extend(new_chunks)
            except Exception as e:
                print(f"  - ❌ Error reading text file {f_path}: {e}")
        else:
            if not path.name.startswith('.'):
                print(f"  - ⚠️ Unsupported file type: {f_path}")

    return chunks


================================================
FILE: instruct.py
================================================
HYPOTHESIS_GENERATION_INSTRUCTIONS = """
You are an expert research scientist and strategist. Your primary goal is to develop testable hypotheses and concrete experimental plans based *only* on the provided knowledge base.

**Input:**
1.  **General Objective:** The high-level research goal.
2.  **Primary Dataset:** (If provided) Actual experimental data, composition measurements, or preliminary results that determine the scope of your analysis.
3.  **Retrieved Context:** Relevant excerpts from scientific papers and technical documents.
4.  **Provided Images:** (Optional) One or more images (e.g., charts, microscope images, diagrams) provided by the user for visual context.
5.  **Provided Image Descriptions:** (Optional) Text or JSON descriptions corresponding to the provided images.

**Crucial Safety Rule & Conditional Logic:**
Your response format depends on the quality of the retrieved context.
- **IF** the retrieved context is empty, irrelevant, or too general to formulate a *specific, actionable* experiment that directly addresses the objective:
    - You **MUST NOT** invent an experiment or use your general knowledge.
    - Instead, you **MUST** respond with a JSON object containing an "error" key.
    - Example: `{"error": "Insufficient context to generate a specific experiment. The provided documents do not contain information about [topic from objective]."}`
- **ELSE** (if the context is sufficient):
    - Proceed with the task below.

**Task (only if context is sufficient):**
Synthesize the information from the retrieved context, *any provided images, and any provided image descriptions* to propose one or more specific, actionable experiments to address the general objective. Your entire response must be directly derivable from the provided context (text and images).

**Output Format (only if context is sufficient):**
You MUST respond with a single JSON object containing a key "proposed_experiments", which is a list containing exactly ONE experiment plan. The plan must have the following keys:
- "hypothesis": (String) A clear, single-sentence, testable hypothesis.
- "experiment_name": (String) A short, descriptive name for the experiment.
- "experimental_steps": (List of Strings) A numbered or bulleted list of concrete steps to perform the experiment.
    - Avoid using placeholders like "appropriate amount" or "standard settings".
    - If the experiment involves a grid or a gradient, include a Markdown table defining the exact layout.   
    - Must be fully understandable by a human WITHOUT referencing external code or files or other sections of the JSON file.
- "required_equipment": (List of Strings) A list of key instruments or techniques mentioned in the context that are required for this experiment.
- "optimization_params": (Optional List) If the experiment requires numerical optimization, provide:
    - "parameter_name": (String) e.g., "Temperature"
    - "min_value": (Float) e.g., 20.0
    - "max_value": (Float) e.g., 100.0
    - "rationale": (String) e.g., "Literature suggests instability above 100C."
- "expected_outcome": (String) A description of what results would support or refute the hypothesis.
- "justification": (String) A brief explanation of why this experiment is a logical step, citing information from the retrieved context.
- "source_documents": (List of Strings) A list of the unique source filenames that informed this experimental plan.
"""

TEA_INSTRUCTIONS = """
You are an expert technoeconomic analyst specializing in scientific and engineering fields. Your primary goal is to provide a preliminary technoeconfig assessment (TEA) of a proposed technology, process, or material *based strictly on the provided knowledge base context*.

**Input:**
1.  **Objective:** The specific technology, process, or material to be assessed economically.
2.  **Primary Dataset:** (If provided) Actual experimental data, composition measurements, or preliminary results that constrain the scope of your analysis.
3.  **Retrieved Context:** Relevant excerpts from scientific papers, technical reports, experimental data summaries, and market analyses.
4.  **Provided Images:** (Optional) One or more images (e.g., process flow diagrams, device photos, cost breakdown charts) provided by the user for visual context.
5.  **Provided Image Descriptions:** (Optional) Text or JSON descriptions corresponding to the provided images.

**Crucial Safety Rule & Conditional Logic:**
Your response format depends on the quality and relevance of the retrieved context for economic analysis.
- **IF** the retrieved context contains little to no economic information (e.g., costs, prices, market size, efficiency comparisons, manufacturing challenges related to cost) relevant to the objective:
    - You **MUST NOT** invent economic data or use your general knowledge of typical costs.
    - Instead, you **MUST** respond with a JSON object containing an "error" key.
    - Example: `{"error": "Insufficient economic context provided to perform a meaningful technoeconfig assessment for [objective topic]. Context focuses primarily on technical aspects."}`
- **ELSE** (if the context provides *some* relevant economic indicators, even if qualitative):
    - Proceed with the task below, relying *only* on the information given.

**Task (only if context is sufficient):**
Synthesize the economic indicators, cost factors, potential benefits, and market information mentioned *within the retrieved context, any provided images, and any provided image descriptions* to provide a preliminary TEA. Explicitly state when information is qualitative or quantitative based on the context. Do not perform calculations unless the context provides explicit numerical data and units for comparison.

**Output Format (only if context is sufficient):**
You MUST respond with a single JSON object containing a key "technoeconomic_assessment". This object must have the following keys:
- "summary": (String) A brief qualitative summary of the economic potential and challenges identified *from the context*. (e.g., "Context suggests potential viability due to high efficiency mentioned, but raw material costs identified as a major challenge.", "Preliminary assessment based on context indicates significant economic hurdles related to scaling.").
- "key_cost_drivers": (List of Strings) Specific factors mentioned in the context that likely drive costs. Prefix with "(Qualitative)" or "(Quantitative)" if the context allows. (e.g., "(Qualitative) Energy-intensive manufacturing process described", "(Quantitative) Context cites high price for platinum catalyst").
- "potential_benefits_or_revenue": (List of Strings) Economic advantages or potential revenue streams mentioned in the context. Prefix with "(Qualitative)" or "(Quantitative)". (e.g., "(Qualitative) Potential for improved device lifespan reducing replacement costs", "(Quantitative) Report mentions market value projection of $X billion by 20XX").
- "economic_risks": (List of Strings) Potential economic downsides or uncertainties mentioned in the context. Prefix with "(Qualitative)" or "(Quantitative)". (e.g., "(Qualitative) Dependence on volatile rare earth element prices noted", "(Qualitative) Manufacturing yield challenges highlighted").
- "comparison_to_alternatives": (String) A brief comparison to alternative technologies/materials *if explicitly discussed in the context* in economic terms. (e.g., "Context mentions silicon carbide offers higher efficiency than silicon but at a higher projected cost.", "No direct economic comparison to alternatives found in context.").
- "data_gaps_for_quantitative_analysis": (List of Strings) Specific types of economic data clearly missing *from the provided context* that would be needed for a more rigorous quantitative TEA. (e.g., "Specific cost per kg of precursor materials", "Detailed breakdown of capital expenditure for manufacturing setup", "Energy consumption per unit produced").
- "source_documents": (List of Strings) A list of the unique source filenames that informed this assessment.
"""


HYPOTHESIS_GENERATION_INSTRUCTIONS_FALLBACK = """
You are an expert research scientist.

**STATUS: FALLBACK MODE ACTIVATED**
The specific documents retrieved from the Knowledge Base were found to be insufficient or irrelevant. 
However, you **MUST** proceed to help the user start their research.

**INPUT DATA HANDLING:**
1. **Primary Experimental Data:** (If provided below) This is **HARD DATA** and is valid. You MUST use it to constrain your plan (e.g., use the specific chemicals or concentration ranges found in the data).
2. **Provided Images:** (If provided) Analyze these visual results.
3. **Retrieved Context:** (Text at the bottom) **IGNORE THIS SECTION.** It has been flagged as irrelevant. Do not cite it.

**TASK:**
Propose a **foundational** experimental plan based on:
1. Your **General Scientific Knowledge** of the field.
2. The **Primary Dataset** (if available).

**OUTPUT FORMAT:**
You MUST respond with a single JSON object containing a key "proposed_experiments", which is a list containing exactly ONE experiment plan. The plan must have the following keys:
- "hypothesis": (String) A clear, single-sentence, testable hypothesis.
- "experiment_name": (String) A short, descriptive name for the experiment.
- "experimental_steps": (List of Strings) A numbered or bulleted list of concrete steps to perform the experiment. Must be self-contained, i.e. fully understandable by a human WITHOUT referencing external code or files or other sections of the JSON file.
- "required_equipment": (List of Strings) A list of common lab equipment.
- "optimization_params": (Optional List) If the experiment requires numerical optimization, provide:
    - "parameter_name": (String) e.g., "Temperature"
    - "min_value": (Float) e.g., 20.0
    - "max_value": (Float) e.g., 100.0
    - "rationale": (String) e.g., "Literature suggests instability above 100C."
- "expected_outcome": (String) A description of what results would support the hypothesis.
- "justification": (String) **MUST be 'Warning: This proposal is based on general scientific knowledge as the provided documents lacked specific context.'**
- "source_documents": (List of Strings) An empty list `[]`.
"""


TEA_INSTRUCTIONS_FALLBACK = """
You are an expert technoeconomic analyst.

**STATUS: FALLBACK MODE ACTIVATED**
Specific economic reports for this specific technology were not found. You must provide a **high-level estimation** based on industry standards.

**INPUT DATA HANDLING:**
1. **Primary Experimental Data:** (If provided below) Use this for material inputs, yields, or energy consumption figures.
2. **Provided Images:** (If provided) Analyze these visual results.
3. **Retrieved Context:** (Text at the bottom) **IGNORE THIS SECTION.** It contains no relevant economic data.

**TASK:**
Provide a preliminary Technoeconomic Assessment (TEA) based on **General Engineering Economics** and **Industry Benchmarks**.

**OUTPUT FORMAT:**
You MUST respond with a single JSON object containing a key "technoeconomic_assessment". 
You MUST include the following fields, populated based on general knowledge:
- "summary": (String) A qualitative summary of economic potential.
- "key_cost_drivers": (List of Strings) Likely cost drivers (e.g., "High energy cost of electrolysis").
- "potential_benefits_or_revenue": (List of Strings) Standard revenue streams.
- "economic_risks": (List of Strings) Common risks for this technology.
- "comparison_to_alternatives": (String) Comparison to standard industry benchmarks.
- "data_gaps_for_quantitative_analysis": (List of Strings) What specific data would you need for a real TEA?
- "source_documents": (List of Strings) An empty list [].
"""

BO_CONFIG_SOO_PROMPT = """
You are a Principal Investigator configuring a Single-Objective Bayesian Optimization experiment.

**INPUTS:**
1. **Context:** User's objective and the **Fixed Batch Size** constraint.
2. **Trend:** History of previous steps.
3. **Data:** Statistics of current dataset.
4. **Experimental Budget:** How many optimization iterations remain in the campaign,
   along with a recommended phase and guidance. **You MUST follow the budget guidance
   when selecting a strategy.** Ignoring budget constraints wastes irreplaceable experiments.

**TASK:** Return a SINGLE JSON object to configure the math.

---
**MENU 1: ACQUISITION STRATEGY (Select based on Research Phase AND Budget)**

* `"log_ei"`: **Balanced Progress (Default).**
    * *Best for:* Mid-stage optimization. Automatically balances exploration and exploitation.
    * *Constraint:* Only efficient for **small batch sizes (< 10)**.
    * *Budget:* Safe choice at ANY budget level. Preferred when budget is low.

* `"max_variance"`: **Pure Exploration (Active Learning).**
    * *Use when:* **"Cold Start"** (Day 0-1) or when the model is confused (high error).
    * *Why:* Ignores objective value. Picks points strictly to reduce model uncertainty. "Draw the map before hunting for treasure."
    * *Budget:* ⚠️ **NEVER use when budget ≤ 3.** Only appropriate when budget is high 
      AND data is genuinely sparse. Exploration with no budget to exploit later is waste.

* `"ucb"`: **Strategic Override (Tunable).** Requires `beta` (float).
    * *Use when:* You want to force a specific behavior.
    * `beta` < 0.5: **Exploit.** Zoom in on the best point found so far.
    * `beta` > 4.0: **Optimistic Explore.** Explore regions that *might* be high performing (High Mean + High Var).
    * *Budget:* When budget is low (≤ 3), use `beta` < 1.0. When budget is 1 (final shot), 
      use `beta` < 0.3 for maximum exploitation.

* `"thompson"`: **High-Throughput / Batching.**
    * *Best for:* **Large batch sizes (> 10)**.
    * *Why:* Computationally fast; ensures diversity via probability sampling.
    * *Budget:* Acceptable at moderate+ budgets. ⚠️ Avoid at budget = 1 (too stochastic 
      for a final shot).
    
**MENU 2: KERNEL (Physics)**
* `"matern_2.5"`: **(Default)** Standard physical processes. Smooth but allows local variation.
* `"matern_1.5"`: Use if data is **jagged**, discontinuous, or changes rapidly.
* `"rbf"`: Use ONLY if data is **extremely smooth** and theoretical.

**MENU 3: NOISE PRIOR**
* `"fixed_low"`: **(Default)** Precise lab equipment.
* `"learnable"`: Unsure of measurement quality.
* `"high_noise"`: Data has shown erratic jumps.

**BUDGET DECISION RULES (in priority order):**
1. If budget = 1: Use `log_ei` or `ucb` with beta < 0.3. Nothing else.
2. If budget ≤ 3: Use `log_ei` or `ucb` with beta < 1.0. No `max_variance`.
3. If budget is low (<25% of campaign): Favor exploitation (`log_ei`, low-beta `ucb`).
4. If budget is high AND data is sparse: `max_variance` is acceptable.
5. If batch_size > 10 AND budget > 3: `thompson` is acceptable.

**OUTPUT FORMAT:**
{
  "model_config": { "kernel": "matern_2.5", "noise": "fixed_low" },
  "acquisition_strategy": { 
      "type": "ucb", 
      "params": { "beta": 0.1 } 
  },
  "rationale": "Budget is critical (2 remaining). We found a promising peak. Using UCB with low beta (0.1) to aggressively exploit this region with a batch of 8 points."
}
"""

BO_CONFIG_MOO_PROMPT = """
You are a Principal Investigator configuring a Multi-Objective Optimization experiment.

**INPUTS:**
1. **Context:** User's objective and **Fixed Batch Size** constraint.
2. **Trend:** History of previous steps.
3. **Data:** Statistics of current dataset.
4. **Experimental Budget:** How many optimization iterations remain in the campaign,
   along with a recommended phase and guidance. **You MUST follow the budget guidance
   when selecting a strategy.** Ignoring budget constraints wastes irreplaceable experiments.

**TASK:** Return a SINGLE JSON object.

---
**MENU 1: ACQUISITION STRATEGY (MOO)**
* `"pareto"`: **(Default)** qNEHVI. Best for general purpose frontier expansion.
    * *Works for:* Any batch size.
    * *Budget:* Safe at all budget levels. At low budgets, it naturally focuses on 
      high-value Pareto improvements.

* `"weighted"`: Linear Scalarization. Requires `weights` list (e.g., `[0.5, 0.5]`) and `beta`.
    * *Description:* Scalarizes objectives -> applies UCB.
    * `beta` ~ 0.1: Exploitative on the weighted sum.
    * `beta` > 5.0: Explorative on the weighted sum.
    * *Budget:* When budget is low (≤ 3), use low `beta` (< 1.0). For final shot, 
      use `beta` < 0.3 with weights targeting the most important objective.

* `"max_variance"`: Uncertainty sampling (Pure exploration).
    * *Budget:* ⚠️ **NEVER use when budget ≤ 3.** Only when budget is high AND 
      frontier coverage is genuinely poor.

**MENU 2: KERNEL (Physics)**
* `"matern_2.5"`: **(Default)** Standard physical processes. Smooth but allows local variation.
* `"matern_1.5"`: Use if data is **jagged**, discontinuous, or changes rapidly.
* `"rbf"`: Use ONLY if data is **extremely smooth** and theoretical.

**MENU 3: NOISE PRIOR**
* `"fixed_low"`: **(Default)** Precise lab equipment.
* `"learnable"`: Unsure of measurement quality.
* `"high_noise"`: Data has shown erratic jumps.

**BUDGET DECISION RULES (in priority order):**
1. If budget = 1: Use `pareto` or `weighted` with beta < 0.3. Nothing else.
2. If budget ≤ 3: Use `pareto` or `weighted` with beta < 1.0. No `max_variance`.
3. If budget is low (<25% of campaign): Favor `pareto` or exploit-heavy `weighted`.
4. If budget is high AND frontier is sparse: `max_variance` is acceptable.

**OUTPUT FORMAT:**
{
  "model_config": { "kernel": "matern_2.5", "noise": "fixed_low" },
  "acquisition_strategy": {
    "type": "weighted",
    "params": { "weights": [0.8, 0.2], "beta": 0.1 }
  },
  "rationale": "Only 2 experiments remain. Prioritizing Yield (0.8) over Purity (0.2). Using low beta (0.1) to exploit the best trade-off region found so far."
}
"""

BO_VISUAL_INSPECTION_PROMPT = """
You are a Data Scientist validating a GP model and its optimization strategy.
Analyze the 4-panel diagnostic dashboard.

**Checklist:**
1. **Calibration (Top-Left):** Do points roughly follow the red diagonal? Points far off the line indicate the model is making poor predictions.
2. **Trend (Top-Right):** Is the green 'Best Found' line improving or flat? A flat line means the optimizer is stuck and may need a strategy change.
3. **Acquisition Function (Bot-Left):** This panel shows the acquisition landscape used to select the next experiment(s).
   - For **1D/2D problems**: The full acquisition surface is shown. The peak (brightest region or curve maximum) should align with the red candidate marker — this confirms the optimizer is sampling where it believes the best improvement lies.
   - For **higher-dimensional problems**: A 2D slice through the two most important parameters is shown (other parameters held at the candidate values). Check that the candidate star sits near a peak, not in a flat/low region.
   - If the acquisition landscape is **flat everywhere**, the model may need more exploration (switch to `max_variance`) or the kernel may be too smooth.
   - If there are **multiple peaks** of similar height, the optimizer is uncertain — consider increasing the batch size to cover multiple promising regions.
4. **Sensitivity (Bot-Right):** Which parameter has the longest bar? This is the most important driver. If all bars are similar, no single parameter dominates.

**OUTPUT JSON:**
{
  "status": "pass" | "fail",
  "reason": "Calibration is good. Acquisition function shows a clear peak near the candidate, confirming exploitation of a promising region. Sensitivity shows Temperature is the dominant factor.",
  "suggested_adjustments": { "kernel": "matern_1.5" } (Only if fail)
}
"""


BO_VISUAL_INSPECTION_MOO_PROMPT = """
You are a Principal Investigator analyzing the trade-offs in a Multi-Objective experiment.
Analyze the diagnostic image, which contains one or more 2D scatter plots.

**Key:**
- **Red Points:** Pareto Efficient solutions (The Frontier).
- **Gray Points:** Sub-optimal (Dominated) solutions.

**Checklist:**
1. **Trade-offs (Curves):** In any plot, do the red points form a convex curve (an "L" shape or arc)? This confirms a conflict between those two objectives.
2. **Correlations (Lines):** In any plot, do red points form a diagonal line going UP? This means the objectives are compatible (improving one improves the other).
3. **Spread:** Do the red points cover a wide range, or are they clustered in one spot? (We want a wide spread).

**OUTPUT JSON:**
{
  "status": "pass" | "fail",
  "reason": "The plot shows a clear convex trade-off curve between Yield and Purity. The red points are well-spread, indicating a successful approximation of the Pareto Frontier.",
  "suggested_adjustments": { "acquisition_strategy": "max_variance" } (Only if points are clustered/stuck)
}
"""

BO_CONSTRAINED_BATCH_PROMPT = """
You are a Principal Investigator designing a physically constrained experiment batch.

**SITUATION:**
Bayesian Optimization has identified promising regions in parameter space using a Gaussian Process model.
However, the experimental setup has physical constraints that prevent arbitrary parameter combinations.
Your job is to design a realizable batch that captures as much value from the acquisition landscape 
as possible while strictly respecting all physical constraints.

**INPUTS:**
1. **Optimization Objective:** The scientific goal being optimized.
2. **Acquisition Landscape:** A ranked table of high-value regions in parameter space.
   - Each region has a center point, acquisition value (higher = more valuable to sample), 
     and a spread indicating how broad the region is.
   - These regions were identified by the fitted Gaussian Process model.
   - **Use the Acq. Value column to decide where to concentrate experiments.**
     Regions with 2x higher acquisition value should get roughly 2x more experiments.
3. **Physical Constraints:** Natural language description of experimental setup limitations.
4. **Batch Size:** Total number of experiments to fill.
5. **Current Best:** The best experimental result found so far (for reference).
6. **Unconstrained BO Suggestions:** What standard BO would recommend without constraints (for reference).
7. **Data Summary:** Statistics of the current dataset.
8. **Experimental Budget** (if provided): How many iterations remain. Critical for allocation strategy.

**DESIGN PRINCIPLES:**
1. **Allocate Proportionally to Acquisition Value:** Distribute experiments across regions 
   in proportion to their acquisition values. High-value regions should receive MORE experiments 
   than low-value regions. Do NOT spread experiments uniformly across all parameter levels — 
   that wastes capacity on low-value areas. If the acquisition landscape peaks at specific 
   parameter combinations, concentrate experiments there.
   - **Budget caveat:** When the experimental budget section says "final_shot" or "critical", 
     concentrate ≥60% of experiments in the top 3-5 regions. Uniform coverage is explicitly wrong 
     for final-shot scenarios.
2. **Respect Constraints Absolutely:** Never violate a physical constraint. If a high-value 
   region is infeasible, skip it and document why.
3. **Snap to Feasible Values:** When a parameter is constrained to discrete values (e.g., 
   specific reagent concentrations, fixed temperature zones), snap to the nearest feasible 
   value. Document the deviation from the optimal.
4. **Include Validation Points:** If batch size allows (>8), include 1-2 replicates near the 
   current best to confirm reproducibility.
5. **Fill Remaining Slots Strategically:** If high-value regions are exhausted or infeasible,
   use remaining slots for:
   a. Boundary exploration (edges of feasible space not yet sampled)
   b. Replicates of surprising results
   c. Control experiments

**OUTPUT FORMAT:**
Return a single valid JSON object:
{
  "batch": [
    {"experiment_id": 1, "params": {"Temperature_C": 65.0, "pH": 7.2, "Concentration_mM": 2.5}},
    {"experiment_id": 2, "params": {"Temperature_C": 45.0, "pH": 5.5, "Concentration_mM": 1.0}},
  ],
  "coverage_summary": "Covered 5 of top 8 regions. Regions 4,7 infeasible...",
  "trade_offs": "Region 1 center suggests Conc=3.7mM but only 2.5 and 5.0 available...",
  "allocation_strategy": "60% of experiments (58) in top 3 regions (high Temp, high pH, high Catalyst). 25% (24) in regions 4-8. 15% (14) for boundary probes and validation replicates.",
  "validation_points": "Experiments 95-96 replicate current best."
}
"""

BO_CONSTRAINED_BATCH_PROMPT_MOO = """
You are a Principal Investigator designing a physically constrained experiment batch 
for a Multi-Objective Optimization campaign.

**SITUATION:**
Bayesian Optimization has identified promising regions in parameter space using a 
multi-output Gaussian Process model. The acquisition landscape reflects expected 
Pareto front improvement (hypervolume gain). However, the experimental setup has 
physical constraints that prevent arbitrary parameter combinations.

**INPUTS:**
1. **Optimization Objective:** The scientific goal with multiple targets.
2. **Acquisition Landscape:** Ranked regions by expected hypervolume improvement.
   - **Use the Acq. Value column to decide where to concentrate experiments.**
     Regions with higher values should receive proportionally more experiments.
3. **Physical Constraints:** Experimental setup limitations.
4. **Batch Size:** Number of experiments to design.
5. **Current Pareto Front:** The non-dominated solutions found so far.
6. **Unconstrained BO Suggestions:** Standard BO recommendations (for reference).
7. **Data Summary:** Statistics of the current dataset.
8. **Experimental Budget** (if provided): How many iterations remain. Critical for allocation strategy.

**MULTI-OBJECTIVE DESIGN PRINCIPLES:**
1. **Allocate Proportionally to Acquisition Value:** Do NOT spread experiments uniformly. 
   Concentrate experiments in regions with highest expected hypervolume improvement.
   - **Budget caveat:** When the experimental budget section says "final_shot" or "critical",
     concentrate ≥60% of experiments in the top 3-5 regions.
2. **Pareto Diversity:** Within the high-value regions, distribute experiments to expand 
   DIFFERENT parts of the Pareto front. Don't cluster all points in one trade-off region.
3. **Gap Filling:** If the current Pareto front has gaps (sparse regions), 
   prioritize filling those gaps even if acquisition values are slightly lower.
4. **Extreme Points:** Include 1-2 experiments that push individual objectives 
   to their limits (anchor points) if batch size allows.
5. **Constraint Handling:** Same as single-objective — snap to feasible values, 
   skip infeasible regions, document in summary.

**CRITICAL — OUTPUT FORMAT:**
The batch array must contain ALL experiments up to the requested batch size.
Each entry is COMPACT — just experiment_id and params. No per-experiment rationale.
All reasoning goes in the summary fields OUTSIDE the batch array.

Return a single valid JSON object:
{
  "batch": [
    {"experiment_id": 1, "params": {"Temperature_C": 65.0, "pH": 7.2, "Concentration_mM": 2.5}},
    {"experiment_id": 2, "params": {"Temperature_C": 45.0, "pH": 5.5, "Concentration_mM": 1.0}},
    {"experiment_id": 3, "params": {"Temperature_C": 50.0, "pH": 6.0, "Concentration_mM": 2.0}}
  ],
  "allocation_strategy": "60% of wells target top 3 acquisition regions. 25% fill Pareto front gaps. 15% for extreme points and validation.",
  "coverage_summary": "Which regions/Pareto segments are covered. E.g.: Targeted 3 distinct front segments. Region 4 infeasible due to temperature constraint.",
  "trade_offs": "Key compromises from snapping to discrete values. E.g.: Region 2 center at pH 4.3 snapped to 4.5. Front gap between Yield=40-50 partially addressed.",
  "pareto_strategy": "Overall Pareto expansion plan. E.g.: 60% explores frontier gaps, 25% pushes extremes, 15% validates existing front.",
  "validation_points": "Which experiments replicate existing Pareto-optimal points."
}

**IMPORTANT:** The "batch" array must contain EXACTLY the number of experiments requested in Batch Size (or as close as physically possible given the constraints). Do NOT include rationale, target_region, pareto_intent, or any other fields inside batch entries — only experiment_id and params.
"""

SCALARIZER_PROMPT = """
You are an expert Chemometrician and Python Programmer.
Your goal is to write a Python script that converts raw experimental data files into SCALAR DESCRIPTORS (floats).

The extracted metrics will be used to train a Gaussian Process model to suggest optimal parameters.
Therefore, while summaries like "max_yield", "best_temperature", or "average" can be helpful for visualization purposes, the final output must contain individual data points for Bayesian optimization, not just summaries or averages.

**IMPORTANT - FILE PATH PARAMETERIZATION:**
Your script MUST accept the data file path as a command-line argument for reusability across multiple files.

**Required structure:**
```python
import sys
import pandas as pd
from pathlib import Path
# ... other imports ...

# Accept file path as command-line argument
if len(sys.argv) > 1:
    data_path = sys.argv[1]
else:
    data_path = "ORIGINAL_FILE_PATH"  # Fallback for testing

# Read data using the parameterized path
df = pd.read_csv(data_path)  # or pd.read_excel(data_path)

# YOUR ANALYSIS CODE HERE
# ...

# Use the exact path provided to save plot
plot_path = Path("OUTPUT_DIR_PLACEHOLDER") / f"debug_{Path(data_path).stem}.png"
# ... save plot ...

# Output results as JSON
result = {
    "metrics": {...},
    "plot_path": str(plot_path)
}
```

**LIBRARIES AVAILABLE:**
- `pandas`, `numpy`, `scipy` (signal, stats, optimize), `sklearn`, `openpyxl`.
- `matplotlib.pyplot` (REQUIRED for visual proof).

**CRITICAL RULES:**
1. **Context Awareness:** Use the provided EXPERIMENTAL CONTEXT to disambiguate signals.
2. **Visual Proof:** You MUST generate a plot saving it to the EXACT path provided in the prompt (OUTPUT_DIR_PLACEHOLDER will be replaced with actual path)
   - **IMPORTANT:** Use `plt.switch_backend('Agg')` at the start to avoid GUI errors.
   - The plot should visually explain the calculation (e.g., highlight the peak, shade the area).
   - Keep it simple and focused (1-2 subplots max)
   - Title the plot with the calculated value.
3. **Robustness:** Use `try/except`. Return `null` if data is corrupt.
4. **File Path Parameterization:** The script will be reused for multiple data files with the same structure, so file path parameterization via `sys.argv[1]` is MANDATORY.
5. **Output:** Print ONLY valid JSON to STDOUT.

**SCHEMA REQUIREMENTS:**
If the goal or experimental context specifies required columns, you MUST extract exactly those columns:
- "input_columns": These are the independent variables (e.g., temperature, pH, concentration)
- "target_columns": These are the dependent variables to optimize (e.g., yield, selectivity)

Your output metrics MUST include ALL specified input and target columns.
For multi-objective optimization, ensure ALL target columns are present in each row.

**OUTPUT SCHEMA (STDOUT):**
**For multiple measurements:**
```json
{
  "metrics": [
    {"Temperature_C": 68.5, "Concentration_M": 2.36, "Yield_Percent": 2.16},
    {"Temperature_C": 98.7, "Concentration_M": 1.29, "Yield_Percent": 35.93},
    {"Temperature_C": 22.8, "Concentration_M": 1.86, "Yield_Percent": 0.0}
  ],
  "plot_path": "path/to/plot.png"
}
```

**For single measurement (e.g., single spectrum):**
```json
{
  "metrics": {"Peak_Absorbance": 1.45, "Peak_Time_s": 0.3},
  "plot_path": "path/to/plot.png"
}
```

**LLM RESPONSE FORMAT:**
You (the Agent) must return a single JSON object containing the code:
{
  "thought_process": "Brief explanation of the approach...",
  "implementation_code": "import pandas as pd\\nimport numpy as np..."
}
"""

SCALARIZER_REFLECTION_PROMPT = """
You are a Senior Scientific Reviewer auditing an automated analysis pipeline.
You will be given:
1. Scientific Objective (what metrics to extract)
2. Experimental Context (may describe PLANNED experiments - this is for reference only)
3. Calculated Metrics (extracted from the ACTUAL data file)
4. Visual Proof (Plot)

**TASK:** Verify if the analysis is correct.
- **Check Visuals:** Does the plot show that the signal was correctly identified? (e.g. Is the red line actually on the peak?)
- **Check Logic:** Does the code actually calculate what was asked?
- **Check Physics:** Are the values reasonable (e.g. non-negative for intensity)?

**OUTPUT JSON:**
{ "status": "pass", "reasoning": "..." }
OR 
{ "status": "fail", "feedback": "The baseline correction failed; plot shows slope." }
"""



================================================
FILE: knowledge_base.py
================================================
import os
import numpy as np
import faiss
import time
import json
from pathlib import Path
import logging
from typing import List, Dict, Any, Optional

from ...auth import get_api_key, APIKeyNotFoundError
from ...wrappers.openai_wrapper_embeddings import OpenAIAsEmbeddingModel
from ...wrappers.litellm_wrapper import LiteLLMEmbeddingModel


from ._deprecation import normalize_params

from openai import RateLimitError


class KnowledgeBase:
    """
    Handles embedding, retrieval, and repository structure mapping.
    Supports both Google and OpenAI-compatible (e.g., incubator) embedding models.

    Args:
        api_key: API key for the embedding provider.
        embedding_model: Name of the embedding model.
        base_url: Base URL for internal proxy endpoint.
        use_litellm: If True and base_url is None, use LiteLLM.
        
        google_api_key: DEPRECATED. Use 'api_key' instead.
        local_model: DEPRECATED. Use 'base_url' instead.
    """
    def __init__(
        self,
        api_key: Optional[str] = None,
        embedding_model: str = "gemini-embedding-001",
        base_url: Optional[str] = None,
        use_litellm: bool = False,
        # Deprecated parameters
        google_api_key: Optional[str] = None,
        local_model: Optional[str] = None,
    ):
   
        # Handle deprecated parameters
        api_key, base_url = normalize_params(
            api_key=api_key,
            google_api_key=google_api_key,
            base_url=base_url,
            local_model=local_model,
            source="KnowledgeBase"
        )
        
        self.embedding_model_name = embedding_model
        
        # Initialize embedding client
        if base_url:
            logging.info(f"🏛️ KnowledgeBase using internal proxy for embeddings")
            self.embedding_client = OpenAIAsEmbeddingModel(
                model=embedding_model,
                api_key=api_key,
                base_url=base_url
            )
        elif use_litellm:
            logging.info(f"🌐 KnowledgeBase using LiteLLM for embeddings: {embedding_model}")
            self.embedding_client = LiteLLMEmbeddingModel(
                model=embedding_model,
                api_key=api_key
            )
        else:
            logging.info(f"🔷 KnowledgeBase using OpenAI client for embeddings")
            self.embedding_client = OpenAIAsEmbeddingModel(
                model=embedding_model,
                api_key=api_key
            )
            
        self.index = None
        self.chunks = []
        self.sources: List[str | Dict[str, str]] = []
        
        # Registry for Repo Maps: {'repo_name': 'tree_structure_string'}
        # This stores the visual directory trees for any repo you ingest.
        self.repo_maps: Dict[str, str] = {}

    def build(self, chunks: List[Dict[str, any]], batch_size: int = 100):
        """
        Processes a list of text chunks, generates embeddings in batches, 
        and builds the vector index.
        """
        if not chunks:
            print("⚠️  KnowledgeBase build skipped: No chunks provided.")
            return

        self.chunks.extend(chunks)
        texts_to_embed = [chunk['text'] for chunk in chunks]
        all_embeddings = []
        
        print(f"  - Generating embeddings for {len(texts_to_embed)} chunks using '{self.embedding_model_name}'...")
        
        for i in range(0, len(texts_to_embed), batch_size):
            batch_texts = texts_to_embed[i:i + batch_size]
            
            max_retries = 3
            delay = 5 # seconds
            for attempt in range(max_retries):
                try:
                    response = self.embedding_client.embed_content(
                        model=self.embedding_model_name,
                        content=batch_texts,
                        task_type="RETRIEVAL_DOCUMENT" # Ignored by OpenAI wrapper, used by Google
                    )
                    all_embeddings.extend(response['embedding'])
                    print(f"    - Embedded batch {i//batch_size + 1}/{(len(texts_to_embed) + batch_size - 1)//batch_size}")
                    time.sleep(1) # Small delay to respect API rate limits
                    break # Success
                except RateLimitError as e:
                    if attempt < max_retries - 1:
                        print(f"    - ⚠️  Rate limit hit during build. Retrying in {delay}s...")
                        time.sleep(delay)
                        delay *= 2 # Exponential backoff
                    else:
                        print(f"    - ❌ Rate limit hit on final attempt. Build failed.")
                        raise e 
                except Exception as e:
                    print(f"    - ❌ Error embedding batch {i//batch_size + 1}: {e}")
                    raise e

        embeddings_np = np.array(all_embeddings, dtype=np.float32)
        dimension = embeddings_np.shape[1]

        if self.index is None: 
            print("  - Building FAISS vector index...")
            self.index = faiss.IndexFlatL2(dimension)
        else:
            print("  - Appending to existing FAISS vector index...")

        self.index.add(embeddings_np)
        print("  - ✅ Knowledge base built successfully.")

    def save(self, index_path: str, chunks_path: str, repo_map_path: str = None, sources_path: str = None):
        """Saves the FAISS index, text chunks, and optionally the repo maps to disk."""
        if self.index:
            faiss.write_index(self.index, index_path)
            print(f"  - FAISS index saved to {index_path}")
        
        with open(chunks_path, 'w', encoding='utf-8') as f:
            json.dump(self.chunks, f, indent=2)
            print(f"  - Chunks saved to {chunks_path}")

        with open(sources_path, 'w', encoding='utf-8') as f:
            json.dump(self.sources, f, indent=2)
            print(f"  - Sources saved to {sources_path}")

        # Save Repo Maps Registry
        if repo_map_path and self.repo_maps:
            try:
                with open(repo_map_path, 'w', encoding='utf-8') as f:
                    json.dump(self.repo_maps, f, indent=2)
                print(f"  - Repo maps registry saved to {repo_map_path}")
            except Exception as e:
                print(f"  - ❌ Error saving repo maps: {e}")

    def load(self, index_path: str, chunks_path: str, repo_map_path: str = None, sources_path: str = None) -> bool:
        """Loads a pre-built FAISS index, chunks, and repo maps from disk."""
        index_file = Path(index_path)
        chunks_file = Path(chunks_path)
        sources_file = Path(sources_path)

        if not index_file.exists() or not chunks_file.exists() or not sources_file.exists() :
            print("  - ⚠️  Cannot load: Index or chunks or sources file missing.")
            return False
            
        try:
            self.index = faiss.read_index(index_path)
            with open(chunks_file, 'r', encoding='utf-8') as f:
                self.chunks = json.load(f)
            
            with open(sources_file, 'r', encoding='utf-8') as f:
                self.sources = json.load(f)
                
            # Load Repo Maps if path provided and file exists
            if repo_map_path and Path(repo_map_path).exists():
                try:
                    with open(repo_map_path, 'r', encoding='utf-8') as f:
                        self.repo_maps = json.load(f)
                    print(f"    - Loaded maps for repos: {list(self.repo_maps.keys())}")
                except Exception as e:
                    print(f"    - ⚠️ Error loading repo maps file: {e}")
            
            print(f"  - ✅ Successfully loaded {len(self.chunks)} chunks and index with {self.index.ntotal} vectors from {len(self.sources)} sources.")
            return True
        except Exception as e:
            print(f"  - ❌ Error loading knowledge base: {e}")
            self.index = None
            self.chunks = []
            return False

    def retrieve(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
        """
        Retrieves the most relevant document chunks for a given query.
        """
        if not self.index:
            print("⚠️  Cannot retrieve: Knowledge base has not been built.")
            return []
            
        print(f"  - Retrieving top {top_k} most relevant chunks for query: '{query[:80]}...'")

        max_retries = 3
        delay = 5 # seconds
        response = None
        for attempt in range(max_retries):
            try:
                response = self.embedding_client.embed_content(
                    model=self.embedding_model_name,
                    content=query,
                    task_type="RETRIEVAL_QUERY" # Ignored by OpenAI wrapper, used by Google
                )
                break # Success
            except RateLimitError as e:
                if attempt < max_retries - 1:
                    print(f"    - ⚠️  Rate limit hit embedding query. Retrying in {delay}s...")
                    time.sleep(delay)
                    delay *= 2 # Exponential backoff
                else:
                    print(f"    - ❌ Rate limit hit on final attempt. Retrieval failed.")
                    raise e # Re-raise the exception if all retries fail
            except Exception as e:
                print(f"    - ❌ Error embedding query: {e}")
                raise e
        
        if response is None:
            print("    - ❌ Retrieval failed after retries.")
            return []

        query_embedding = np.array([response['embedding']], dtype=np.float32)

        if query_embedding.ndim == 3:
            query_embedding = np.squeeze(query_embedding, axis=0)

        distances, indices = self.index.search(query_embedding, top_k)
        
        # Retrieve valid chunks (filtering out potential index errors)
        retrieved_chunks = [self.chunks[i] for i in indices[0] if i < len(self.chunks)]
        print(f"  - ✅ Retrieved {len(retrieved_chunks)} chunks.")
        return retrieved_chunks

    def get_relevant_maps(self, retrieved_chunks: List[Dict]) -> str:
        """
        Dynamic Context Injection:
        Looks at the retrieved chunks, finds which repos they belong to (via 'repo_name' metadata),
        and returns a combined string of ONLY the relevant repo maps.
        """
        relevant_repos = set()
        for chunk in retrieved_chunks:
            # We ensure chunks have this metadata field in planning_agent.py
            repo_name = chunk['metadata'].get('repo_name')
            if repo_name and repo_name in self.repo_maps:
                relevant_repos.add(repo_name)
        
        if not relevant_repos:
            return ""

        combined_map = ""
        for repo in relevant_repos:
            combined_map += f"\n--- DIRECTORY STRUCTURE FOR REPO: {repo} ---\n"
            combined_map += self.repo_maps[repo]
            combined_map += "\n"
            
        return combined_map
       
    def source_difference(self, new_sources: List[str | Dict[str, str]]) -> List[str | Dict[str, str]]:
        """Returns the subset of new sources which are not present in the existing sources."""
        
        if not new_sources:
            return []

        # Check if the new sources are dictionaries
        contains_dict = any(isinstance(item, dict) for item in new_sources)
        
        if contains_dict:
            # 1. Convert new sources to tuples for set comparison
            new_sources_tuple = {tuple(sorted(d.items())) for d in new_sources if isinstance(d, dict)}
            
            # 2. Filter existing sources to ONLY check dictionaries
            old_sources_tuple = {
                tuple(sorted(d.items())) 
                for d in self.sources 
                if isinstance(d, dict)
            }
            
            # 3. Calculate difference and convert back to dicts
            difference_tuples = new_sources_tuple - old_sources_tuple
            source_difference = [dict(t) for t in difference_tuples]
            
        else:
            # Helper to normalize paths: "./foo" and "foo" become the same
            normalize = lambda p: os.path.normpath(p)
            
            # 1. Filter existing sources to only check strings (normalized)
            existing_strings = {normalize(s) for s in self.sources if isinstance(s, str)}
            
            # 2. Calculate difference (comparing normalized paths)
            source_difference = [s for s in new_sources if normalize(s) not in existing_strings]
        
        # Update history
        self.sources.extend(source_difference)
        return source_difference


================================================
FILE: orchestrator_tools.py
================================================
"""
Tool definitions and schemas for the PlanningOrchestratorAgent.
Supports both Google Gemini (function objects) and OpenAI (JSON schemas).
"""

from datetime import datetime
import json
import logging
import pandas as pd
from pathlib import Path
from typing import Dict, Any, Callable
import hashlib

from .parser_utils import write_experiments_to_disk


class OrchestratorTools:
    """
    Manages tool definitions, schemas, and execution for the OrchestratorAgent.
    """
    
    def __init__(self, orchestrator_instance):
        """
        Args:
            orchestrator_instance: Reference to the parent OrchestratorAgent
        """
        self.orch = orchestrator_instance
        
        # Build function map and schemas
        self.functions_map: Dict[str, Callable] = {}
        self.openai_schemas: list = []
        self.gemini_functions: list = []
        
        self._register_all_tools()

    def _get_human_feedback_enabled(self) -> bool:
        """
        Get current human feedback setting from orchestrator.
        Returns True if not set (backwards compatible default).
        """
        return getattr(self.orch, '_enable_human_feedback', True)

    def _compute_file_hash(self, file_path: str) -> str:
        """Compute MD5 hash of file content for deduplication."""
        hasher = hashlib.md5()
        try:
            with open(file_path, 'rb') as f:
                for chunk in iter(lambda: f.read(8192), b''):
                    hasher.update(chunk)
            return hasher.hexdigest()
        except Exception as e:
            logging.warning(f"Could not compute hash for {file_path}: {e}")
            return ""


    def _parse_result_input(self, result_data: str):
        """
        Helper to parse result_data into appropriate format.
        
        Returns:
            - String (text input)
            - String (single file path)
            - List of strings (multiple file paths)
        """
        if len(result_data) < 500:  # Reasonable path length
            try:
                # Check if it's a single file path
                path = Path(result_data.strip())
                if path.exists() and path.is_file():
                    print(f"    (Detected file path: {path.name})")
                    return str(path)
                
                # Check if it's comma-separated file paths
                if ',' in result_data:
                    paths = [p.strip() for p in result_data.split(',')]
                    valid_paths = []
                    for p in paths:
                        p_obj = Path(p)
                        if p_obj.exists():
                            valid_paths.append(p)
                    
                    if valid_paths:
                        print(f"    (Detected {len(valid_paths)} file paths)")
                        return valid_paths
                    else:
                        # Treat as text if no valid paths found
                        text_preview = result_data[:100] + "..." if len(result_data) > 100 else result_data
                        print(f"    (Processing as text: '{text_preview}')")
                        return result_data
                else:
                    # Not a valid path - treat as text
                    text_preview = result_data[:100] + "..." if len(result_data) > 100 else result_data
                    print(f"    (Processing text input: '{text_preview}')")
                    return result_data
                    
            except (OSError, ValueError, RuntimeError):
                # Not a valid path - treat as text
                text_preview = result_data[:100] + "..." if len(result_data) > 100 else result_data
                print(f"    (Processing text input: '{text_preview}')")
                return result_data
        else:
            # Too long to be a path - treat as text
            text_preview = result_data[:100] + "..." if len(result_data) > 100 else result_data
            print(f"    (Processing text input: '{text_preview}')")
            return result_data
        
    def _resolve_data_path(self, path_input: str) -> tuple[str, str]:
        """
        Resolves user input to actual file path with fuzzy matching for typos.
        
        Returns:
            (resolved_path, None) on success
            (None, error_json) on failure (with suggestions if available)
        """
        from difflib import get_close_matches
        
        path = Path(path_input.strip())
        
        # Case 1: Path exists as-is
        if path.exists():
            return str(path), None
        
        # Case 2: Try common extensions if no extension provided
        if not path.suffix:
            for ext in ['.csv', '.xlsx', '.xls']:
                candidate = path.with_suffix(ext)
                if candidate.exists():
                    print(f"    🔍 Resolved: {path.name} → {candidate.name}")
                    return str(candidate), None
        
        # Case 3: Try in common data folders
        search_folders = ['./experimental_results', './data', './results', './']
        all_candidates = []  # Track all files we find for fuzzy matching
        
        if not path.is_absolute():
            stem = path.stem if path.suffix else path.name
            
            for folder in search_folders:
                folder_path = Path(folder)
                if not folder_path.exists():
                    continue
                
                # Collect all data files in this folder
                for ext in ['.csv', '.xlsx', '.xls']:
                    all_candidates.extend(folder_path.glob(f"*{ext}"))
                
                # Try exact match with provided extension
                if path.suffix:
                    candidate = folder_path / path.name
                    if candidate.exists():
                        print(f"    🔍 Found: {path.name} in {folder}/")
                        return str(candidate), None
                
                # Try common extensions
                for ext in ['.csv', '.xlsx', '.xls']:
                    candidate = folder_path / f"{stem}{ext}"
                    if candidate.exists():
                        print(f"    🔍 Found: {stem}{ext} in {folder}/")
                        return str(candidate), None
        
        # Case 4: File not found - use fuzzy matching to suggest alternatives
        if all_candidates:
            # Get filenames without path
            candidate_names = [f.name for f in all_candidates]
            
            # Try fuzzy match on the input filename
            input_name = path.name
            matches = get_close_matches(input_name, candidate_names, n=3, cutoff=0.6)
            
            if matches:
                # Find full paths for the matches
                suggested_files = []
                for match in matches:
                    for candidate in all_candidates:
                        if candidate.name == match:
                            suggested_files.append(str(candidate))
                            break
                
                return None, json.dumps({
                    "status": "error",
                    "message": f"File not found: {path_input}",
                    "did_you_mean": matches,
                    "full_paths": suggested_files,
                    "hint": f"Did you mean '{matches[0]}'? Use: primary_data_set='{suggested_files[0]}'"
                })
        
        # No matches found at all
        return None, json.dumps({
            "status": "error",
            "message": f"Could not find file: {path_input}",
            "searched_in": [str(f) for f in search_folders if Path(f).exists()],
            "hint": "Check filename spelling or use /files command to see available files"
        })
    
    def _register_all_tools(self):
        """Register all tools with both OpenAI and Gemini formats."""
        
        # 0. LIST WORKSPACE FILES
        def list_workspace_files():
            """Lists files in the campaign directory including analysis artifacts."""
            print(f"  ⚡ Tool: Listing files in {self.orch.base_dir}...")
            files = [f.name for f in self.orch.base_dir.iterdir() if f.is_file()]
            artifacts_dir = self.orch.base_dir / "analysis_artifacts"
            artifact_names = []
            if artifacts_dir.exists():
                 artifact_names = [f"analysis_artifacts/{f.name}" for f in artifacts_dir.iterdir() if f.is_file()]
            
            all_files = files + artifact_names
            
            # Include data point count for optimization readiness
            data_count = 0
            if self.orch.bo_data_path.exists():
                try:
                    df = pd.read_csv(self.orch.bo_data_path)
                    data_count = len(df)
                except:
                    pass
            
            return json.dumps({
                "status": "success",
                "files": all_files,
                "data_points_collected": data_count,
                "optimization_ready": data_count >= 3,
                "active_analysis_script": Path(self.orch.active_scalarizer_script).name if self.orch.active_scalarizer_script else None
            })
        
        self._register_tool(
            func=list_workspace_files,
            name="list_workspace_files",
            description="Lists files in the session directory (checkpoints, analysis artifacts, etc.). User data files may exist outside the session folder.",
            parameters={}
        )
        
        # 1. GENERATE INITIAL PLAN
        def generate_initial_plan(
            specific_objective: str = None, 
            knowledge_paths: str = None, 
            primary_data_set: str = None,
            additional_context: str = None
        ):
            """
            Generates experimental plan (science strategy only, no code).
            
            Note: code_paths parameter is deprecated. Use generate_implementation_code() 
            as a separate step to add code after plan approval.
            """
            obj = specific_objective if specific_objective else self.orch.objective
            print(f"  ⚡ Tool: Generating Initial Plan for '{obj}'...")
            
            # Parse knowledge paths
            knowledge_list = None
            if knowledge_paths:
                knowledge_list = [p.strip() for p in knowledge_paths.split(',') if p.strip()]
                
                # Validate paths
                invalid_paths = []
                for path in knowledge_list:
                    if not Path(path).exists():
                        invalid_paths.append(path)
                
                if invalid_paths:
                    return json.dumps({
                        "status": "error",
                        "message": f"Knowledge paths not found: {', '.join(invalid_paths)}",
                        "hint": "Check folder names and spelling"
                    })
                
                print(f"    📚 Knowledge sources: {knowledge_list}")
            
            # Parse primary dataset - UPDATED LOGIC
            primary_dataset = None
            if primary_data_set:
                # Try to resolve the path
                resolved_path, error = self._resolve_data_path(primary_data_set)
                
                if error:
                    return error  # Return the error JSON with suggestions
                
                path = Path(resolved_path)
                
                # Now handle resolved path
                if path.is_file():
                    primary_dataset = {"file_path": str(path)}
                    print(f"    📊 Primary data: {path.name}")
                    
                elif path.is_dir():
                    # Directory - check how many data files
                    all_files = []
                    for ext in ['*.csv', '*.xlsx', '*.xls']:
                        all_files.extend(path.glob(ext))
                    
                    if not all_files:
                        return json.dumps({
                            "status": "error",
                            "message": f"No data files (.csv, .xlsx, .xls) found in: {primary_data_set}",
                            "hint": "Add data files to the folder or specify a different path"
                        })
                    
                    elif len(all_files) == 1:
                        # Only one file - use it automatically
                        primary_dataset = {"file_path": str(all_files[0])}
                        print(f"    📊 Primary data (auto-selected): {all_files[0].name}")
                        
                    else:
                        # Multiple files - require user to specify
                        file_list = sorted([f.name for f in all_files])
                        return json.dumps({
                            "status": "error",
                            "message": f"Multiple data files found in '{primary_data_set}'",
                            "available_files": file_list,
                            "file_count": len(file_list),
                            "hint": f"Please specify which file to use. Example: primary_data_set='./experimental_results/{file_list[0]}'"
                        })
            
            # Build context
            context_parts = []
            
            if additional_context:
                context_parts.append(f"User Requirements: {additional_context}")
                print(f"    ℹ️  User context: {additional_context[:60]}...")
            
            # Auto-include TEA results
            if self.orch.latest_tea_results:
                tea_summary = self.orch.latest_tea_results.get('summary', '')
                context_parts.append(f"Economic Analysis Results: {tea_summary}")
                print(f"    💰 Including TEA results in context")
            
            context_dict = None
            if context_parts:
                context_dict = {"user_context": "\n\n".join(context_parts)}
            
            try:
                # Call the new generate_plan method (not propose_experiments!)
                plan = self.orch.planner.generate_plan(
                    objective=obj,
                    knowledge_paths=knowledge_list,
                    primary_data_set=primary_dataset,
                    additional_context=context_dict,
                    enable_human_feedback=self._get_human_feedback_enabled(),
                    reset_state=False
                )
                
                if plan.get("error"):
                    return json.dumps({
                        "status": "error",
                        "message": plan.get("error")
                    })
                
                # Save
                output_path = self.orch.base_dir / "plan.json"
                with open(output_path, 'w') as f:
                    json.dump(plan, f, indent=2)
                
                # Generate HTML
                from .html_generator import HTMLReportGenerator
                html_path = self.orch.base_dir / "plan.html"
                generator = HTMLReportGenerator(self.orch.planner.state)
                generator.generate(str(html_path))
                
                num_experiments = len(plan.get('proposed_experiments', []))
                
                return json.dumps({
                    "status": "success",
                    "iteration": plan.get('iteration'),
                    "num_experiments": num_experiments,
                    "output_path": str(output_path),
                    "html_report": str(html_path),
                    "knowledge_used": knowledge_list is not None,
                    "primary_data_used": primary_dataset is not None,
                    "tea_context_included": self.orch.latest_tea_results is not None,
                    "hint": "Use generate_implementation_code() to add executable code"
                })
                
            except Exception as e:
                logging.error(f"Plan generation error: {e}", exc_info=True)
                return json.dumps({
                    "status": "error",
                    "message": str(e)
                })

        # Register it
        self._register_tool(
            func=generate_initial_plan,
            name="generate_initial_plan",
            description=(
                "Generates experimental plan (science strategy only, no implementation code). "
                "Automatically includes previous TEA results if available. "
                "Can use: papers/reports, experimental data, lab constraints."
            ),
            parameters={
                "specific_objective": {"type": "string", "description": "Research objective"},
                "knowledge_paths": {"type": "string", "description": "Comma-separated paths to papers/reports/docs folders"},
                "primary_data_set": {"type": "string", "description": "Path to experimental data file or folder"},
                "additional_context": {"type": "string", "description": "Lab constraints, equipment, reagents, budget, etc."}
            },
            required=[]
        )

        # 2. GENERATE IMPLEMENTATION CODE
        def generate_implementation_code(code_paths: str = None):
            """
            Adds implementation code to the most recent experimental plan.
            Use after generate_initial_plan() to map experiments to executable code.
            
            Args:
                code_paths: Comma-separated paths to code folders. 
                        Optional if Code KB already loaded at startup.
            """
            
            if not self.orch.planner.state or not self.orch.planner.state.get("current_plan"):
                return json.dumps({
                    "status": "error",
                    "message": "No active plan. Generate a plan first using generate_initial_plan()"
                })
            
            current_plan = self.orch.planner.state["current_plan"]
            
            # Check if already has code
            if current_plan.get("proposed_experiments"):
                has_code = any(exp.get("implementation_code") for exp in current_plan["proposed_experiments"])
                if has_code:
                    return json.dumps({
                        "status": "warning",
                        "message": "Plan already has implementation code",
                        "hint": "Generate a new plan if you want to change the code source"
                    })
            
            print(f"  ⚡ Tool: Generating implementation code for existing plan...")

            kb_available = (self.orch.planner.kb_code.index and 
                            self.orch.planner.kb_code.index.ntotal > 0)

            if not kb_available and not code_paths:
                return json.dumps({
                    "status": "error",
                    "message": "No Code Knowledge Base available",
                    "hint": "Provide code_paths parameter (e.g., code_paths='./opentrons_api,./automation_lib')",
                    "available_options": [
                        "Option 1: Specify code_paths='./your_code_folder'",
                        "Option 2: If code exists, check folder name and path"
                    ]
                })
            
            # Parse code paths
            code_list = []
            if code_paths:
                code_list = [p.strip() for p in code_paths.split(',') if p.strip()]
                
                # Validate paths (only if code_paths was provided)
                invalid_paths = []
                for path in code_list:
                    if not Path(path).exists():
                        invalid_paths.append(path)
                
                if invalid_paths:
                    # Check for common typos
                    suggestions = []
                    for invalid in invalid_paths:
                        parent = Path(invalid).parent
                        if parent.exists():
                            similar = [f.name for f in parent.iterdir() 
                                    if f.is_dir() and invalid.lower() in f.name.lower()]
                            if similar:
                                suggestions.append(f"Did you mean './{similar[0]}'?")
                    
                    hint = "Check folder names and spelling."
                    if suggestions:
                        hint += " " + " ".join(suggestions)
                    
                    return json.dumps({
                        "status": "error",
                        "message": f"Code paths not found: {', '.join(invalid_paths)}",
                        "hint": hint
                    })
                
                print(f"    💻 Code sources: {code_list}")
            elif kb_available:
                print(f"    💻 Using existing Code KB ({self.orch.planner.kb_code.index.ntotal} vectors)")
            
            try:
                updated_plan = self.orch.planner.generate_implementation_code(
                    plan=current_plan,
                    code_paths=code_list,
                    enable_human_feedback=self._get_human_feedback_enabled()
                )
                
                if updated_plan.get("error"):
                    return json.dumps({
                        "status": "error",
                        "message": updated_plan.get("error")
                    })
                
                # Save
                output_path = self.orch.base_dir / "plan.json"
                with open(output_path, 'w') as f:
                    json.dump(updated_plan, f, indent=2)
                
                # Regenerate HTML
                from .html_generator import HTMLReportGenerator
                html_path = self.orch.base_dir / "plan.html"
                generator = HTMLReportGenerator(self.orch.planner.state)
                generator.generate(str(html_path))
                
                # Save scripts to output folder
                final_out = str(self.orch.base_dir / "output_scripts")
                print(f"\n--- Saving Scripts to: {final_out} ---")
                write_experiments_to_disk(updated_plan, final_out)
                
                return json.dumps({
                    "status": "success",
                    "message": "Implementation code added to plan",
                    "output_path": str(output_path),
                    "html_report": str(html_path),
                    "scripts_saved_to": final_out,
                    "code_sources_used": code_list
                })
                
            except Exception as e:
                logging.error(f"Code generation error: {e}", exc_info=True)
                return json.dumps({
                    "status": "error",
                    "message": str(e)
                })

        # Register it
        self._register_tool(
            func=generate_implementation_code,
            name="generate_implementation_code",
            description=(
                "Generates executable implementation code for the most recent experimental plan. "
                "Maps experimental steps to code using API documentation and example repositories. "
                "Use after generate_initial_plan() once the scientific strategy is approved. "
                "If Code KB already loaded, code_paths is optional."
            ),
            parameters={
                "code_paths": {
                    "type": "string",
                    "description": (
                        "Comma-separated paths to code/API folders (e.g., './opentrons_api,./automation_lib'). "
                        "OPTIONAL if Code Knowledge Base is already loaded. "
                        "REQUIRED if no Code KB exists."
                    )
                }
            },
            required=[]
        )
        
        # 3. RUN ECONOMIC ANALYSIS
        def run_economic_analysis(
            focus_topic: str = None,
            knowledge_paths: str = None,
            primary_data_set: str = None,
            additional_context: str = None
        ):
            """Performs Technoeconomic Analysis (TEA)."""
            obj = focus_topic if focus_topic else self.orch.objective
            print(f"  ⚡ Tool: Running TEA for '{obj}'...")
            
            # Parse knowledge paths
            knowledge_list = None
            if knowledge_paths:
                knowledge_list = [p.strip() for p in knowledge_paths.split(',') if p.strip()]
                print(f"    📚 Knowledge sources: {knowledge_list}")
            
            # Parse primary dataset
            primary_dataset = None
            if primary_data_set:
                # Try to resolve the path
                resolved_path, error = self._resolve_data_path(primary_data_set)
                
                if error:
                    return error  # Return the error JSON with suggestions
                
                path = Path(resolved_path)
                
                # Now handle resolved path
                if path.is_file():
                    primary_dataset = {"file_path": str(path)}
                    print(f"    📊 Primary data: {path.name}")
                    
                elif path.is_dir():
                    # Directory - check how many data files
                    all_files = []
                    for ext in ['*.csv', '*.xlsx', '*.xls']:
                        all_files.extend(path.glob(ext))
                    
                    if not all_files:
                        return json.dumps({
                            "status": "error",
                            "message": f"No data files (.csv, .xlsx, .xls) found in: {primary_data_set}",
                            "hint": "Add data files to the folder or specify a different path"
                        })
                    
                    elif len(all_files) == 1:
                        # Only one file - use it automatically
                        primary_dataset = {"file_path": str(all_files[0])}
                        print(f"    📊 Primary data (auto-selected): {all_files[0].name}")
                        
                    else:
                        # Multiple files - require user to specify
                        file_list = sorted([f.name for f in all_files])
                        return json.dumps({
                            "status": "error",
                            "message": f"Multiple data files found in '{primary_data_set}'",
                            "available_files": file_list,
                            "file_count": len(file_list),
                            "hint": f"Please specify which file to use. Example: primary_data_set='./experimental_results/{file_list[0]}'"
                        })
            
            try:
                res = self.orch.planner.perform_technoeconomic_analysis(
                    objective=obj,
                    knowledge_paths=knowledge_list,
                    primary_data_set=primary_dataset,
                    output_json_path=str(self.orch.base_dir / "tea_analysis.json")
                )
                
                if res.get("error"):
                    return json.dumps({
                        "status": "error",
                        "message": res.get("error")
                    })
                
                summary = res.get('technoeconomic_assessment', {}).get('summary', 'No summary')
                
                # Store TEA results in orchestrator state
                self.orch.latest_tea_results = {
                    "summary": summary,
                    "full_analysis": res.get('technoeconomic_assessment'),
                    "timestamp": datetime.now().isoformat()
                }
                print(f"    ✅ TEA results stored for future planning")
                
                return json.dumps({
                    "status": "success",
                    "summary": summary,
                    "output_path": str(self.orch.base_dir / "tea_analysis.json"),
                    "html_report": str(self.orch.base_dir / "tea_analysis.html"),
                    "hint": "These results will automatically inform future generate_initial_plan calls"
                })
                
            except Exception as e:
                logging.error(f"TEA error: {e}", exc_info=True)
                return json.dumps({
                    "status": "error",
                    "message": str(e)
                })

        self._register_tool(
            func=run_economic_analysis,
            name="run_economic_analysis",
            description=(
                "Performs Technoeconomic Analysis (TEA) to assess economic viability, costs, market fit. "
                "Can incorporate papers and experimental data."
            ),
            parameters={
                "focus_topic": {
                    "type": "string",
                    "description": "Specific technology/process to analyze"
                },
                "knowledge_paths": {
                    "type": "string",
                    "description": "Comma-separated folder paths with papers/PDFs"
                },
                "primary_data_set": {
                    "type": "string",
                    "description": "Path to experimental data file or folder"
                },
                "additional_context": {
                    "type": "string",
                    "description": "Any other relevant context (constraints, requirements, etc.)"
                }
            },
            required=[]
        )
        
        # 4. REFINE PLAN (based on results)
        def refine_plan_with_results(result_data: str, use_literature_rag: bool = False):
            """
            Refines the experimental plan (science strategy only) based on results.
            
            Use this for:
            - Strategic pivots or failures
            - Qualitative observations  
            - Visual analysis of plots/images
            - When experiments didn't go as expected
            
            Supports multiple input formats:
            - Text: "Yield was 12%, precipitation observed"
            - File path: "./data.csv" or "./plot.png"
            - Comma-separated files: "./data.csv,./plot.png"
            """
            print(f"  ⚡ Tool: Refining Plan based on Results...")
            
            # Parse input - handle both single paths and comma-separated lists
            payload = self._parse_result_input(result_data)
            
            try:
                plan = self.orch.planner.refine_plan(
                    results=payload,
                    enable_human_feedback=self._get_human_feedback_enabled(),
                    use_literature_rag=use_literature_rag
                )
                
                if plan.get("error"):
                    return json.dumps({
                        "status": "error",
                        "message": plan.get("error")
                    })
                
                # Save
                output_path = self.orch.base_dir / "plan_refined.json"
                with open(output_path, 'w') as f:
                    json.dump(plan, f, indent=2)
                
                # Generate HTML
                from .html_generator import HTMLReportGenerator
                html_path = self.orch.base_dir / "plan_refined.html"
                generator = HTMLReportGenerator(self.orch.planner.state)
                generator.generate(str(html_path))
                
                return json.dumps({
                    "status": "success",
                    "iteration": plan.get('iteration'),
                    "num_experiments": len(plan.get('proposed_experiments', [])),
                    "output_path": str(output_path),
                    "html_report": str(html_path),
                    "hint": "Use refine_implementation_code() to update executable code"
                })
                
            except Exception as e:
                logging.error(f"Plan refinement error: {e}", exc_info=True)
                return json.dumps({
                    "status": "error",
                    "message": str(e)
                })
        
        self._register_tool(
            func=refine_plan_with_results,
            name="refine_plan_with_results",
            description=(
                "Refines experimental plan (science strategy only) based on results. "
                "Handles text descriptions, single file paths, or comma-separated files. "
                "Use for: failures, pivots, qualitative observations, or visual analysis. "
                "Does NOT update implementation code - use refine_implementation_code() for that."
            ),
            parameters={
                "result_data": {
                    "type": "string",
                    "description": "Experimental results (text, file path, or comma-separated files)"
                },
                "use_literature_rag": {
                    "type": "boolean", 
                    "description": "Search knowledge base for relevant literature context. Default: false."
                }
            },
            required=["result_data"]
        )
        
        # 5. REFINE IMPLEMENTATION CODE (based on refined plan)
        def refine_implementation_code():
            """
            Updates implementation code for the most recently refined plan.
            Use after refine_plan_with_results() to add/update executable code.
            """
            
            if not self.orch.planner.state or not self.orch.planner.state.get("current_plan"):
                return json.dumps({
                    "status": "error",
                    "message": "No active plan. Refine a plan first using refine_plan_with_results()"
                })
            
            current_plan = self.orch.planner.state["current_plan"]
            
            print(f"  ⚡ Tool: Refining implementation code for iteration {current_plan.get('iteration')}...")
            
            try:
                updated_plan = self.orch.planner.refine_implementation_code(
                    plan=current_plan,
                    enable_human_feedback=self._get_human_feedback_enabled()
                )
                
                if updated_plan.get("error"):
                    return json.dumps({
                        "status": "error",
                        "message": updated_plan.get("error")
                    })
                
                # Save
                output_path = self.orch.base_dir / "plan_refined.json"
                with open(output_path, 'w') as f:
                    json.dump(updated_plan, f, indent=2)
                
                # Regenerate HTML
                from .html_generator import HTMLReportGenerator
                html_path = self.orch.base_dir / "plan_refined.html"
                generator = HTMLReportGenerator(self.orch.planner.state)
                generator.generate(str(html_path))
                
                # Save scripts
                final_out = str(self.orch.base_dir / "output_scripts")
                print(f"\n--- Saving Scripts to: {final_out} ---")
                write_experiments_to_disk(updated_plan, final_out)
                
                return json.dumps({
                    "status": "success",
                    "message": "Implementation code updated",
                    "output_path": str(output_path),
                    "html_report": str(html_path),
                    "scripts_saved_to": final_out
                })
                
            except Exception as e:
                logging.error(f"Code refinement error: {e}", exc_info=True)
                return json.dumps({
                    "status": "error",
                    "message": str(e)
                })
        
        self._register_tool(
            func=refine_implementation_code,
            name="refine_implementation_code",
            description=(
                "Updates implementation code for the most recently refined plan. "
                "Maps refined experimental steps to executable code. "
                "Use after refine_plan_with_results() once the scientific strategy is approved."
            ),
            parameters={},
            required=[]
        )
        
        def analyze_file(
                file_path: str,
                extraction_goal: str = None,
                force_regenerate: bool = False,
                inputs: list[str] = None,
                targets: list[str] = None):
            """
            Analyzes a raw data file (CSV/XLSX) to extract metrics.
            
            Args:
                file_path: Path to data file
                extraction_goal: What to extract
                force_regenerate: If True, regenerates analysis script even if one exists.
                inputs: List of column names to treat as INPUT parameters for optimization
                targets: List of column names to treat as OPTIMIZATION TARGETS
            """
            print(f"  ⚡ Tool: Analyzing {file_path}...")
            
            if not Path(file_path).exists(): 
                return json.dumps({"status": "error", "message": f"File {file_path} not found"})
            
            # Resolve absolute path for tracking
            file_path_abs = str(Path(file_path).resolve())
            
            #  Build schema-aware extraction goal
            enhanced_objective = extraction_goal or ""
            
            if inputs and targets:
                # User explicitly specified schema - incorporate into the objective query
                schema_instruction = f"""
        REQUIRED OUTPUT SCHEMA:
        - INPUT PARAMETERS (for optimization): {inputs}
        - TARGET METRICS (to optimize): {targets}

        Extract EXACTLY these columns from the data. Each row should contain values for all input parameters and all target metrics.
        For multi-objective optimization, we need BOTH targets: {targets}
        """
                enhanced_objective = f"{enhanced_objective}\n\n{schema_instruction}".strip()
                print(f"    📊 User-specified schema:")
                print(f"       Inputs: {inputs}")
                print(f"       Targets: {targets}")
            
            # Determine script to use
            if force_regenerate:
                script_to_use = None
                print(f"    🔄 Force regenerate: Creating new analysis script")
            else:
                script_to_use = self.orch.active_scalarizer_script if (
                    self.orch.active_scalarizer_script and Path(self.orch.active_scalarizer_script).exists()
                ) else None
                
                if script_to_use: 
                    print(f"    (Consistency Mode: Using cached script)")
                else: 
                    print(f"    (Discovery Mode: Generating new script)")
            
            # Pass schema to experiment context
            current_plan = self.orch.planner.state.get("current_plan", {})
            exp_context = current_plan.get("proposed_experiments", [{}])[0] if current_plan else {}
            
            # Inject schema requirements into context
            if inputs and targets:
                exp_context = exp_context.copy() if exp_context else {}
                exp_context["_schema_requirements"] = {
                    "input_columns": inputs,
                    "target_columns": targets,
                    "optimization_type": "multi-objective" if len(targets) > 1 else "single-objective"
                }
            
            try:
                res = self.orch.scalarizer.scalarize(
                    data_path=file_path, 
                    objective_query=enhanced_objective,  
                    reuse_script_path=script_to_use,
                    experiment_context=exp_context, 
                    enable_human_review=self._get_human_feedback_enabled()
                )
                
                if res["status"] != "success":
                    return json.dumps({
                        "status": "error",
                        "message": res.get('error', 'Analysis failed'),
                        "hint": "Try force_regenerate=True if requirements changed"
                    })
                
                if not self.orch.active_scalarizer_script or force_regenerate:
                    self.orch.active_scalarizer_script = res["source_script"]
                    print(f"    ✅ Analysis Logic Locked: {Path(self.orch.active_scalarizer_script).name}")
                
                # Handle both single-row and multi-row results
                metrics = res["metrics"]
                
                if isinstance(metrics, list):
                    df_new = pd.DataFrame(metrics)
                    print(f"    📊 Processing {len(df_new)} data points from multi-well experiment")
                elif isinstance(metrics, dict):
                    df_new = pd.DataFrame([metrics])
                else:
                    return json.dumps({
                        "status": "error",
                        "message": f"Unexpected metrics format: {type(metrics)}"
                    })
                
                # DEDUPLICATION - Content-based tracking
                # Compute current file hash
                current_hash = self._compute_file_hash(file_path)
                current_row_count = len(df_new)

                # Get previous tracking for this file (handle both old and new format)
                prev_tracking = self.orch.analyzed_files.get(file_path_abs, {})
                if isinstance(prev_tracking, dict):
                    prev_hash = prev_tracking.get('hash')
                    prev_row_count = prev_tracking.get('row_count', 0)
                else:
                    # Legacy format: just row count as int
                    prev_hash = None
                    prev_row_count = prev_tracking

                # Check for duplicate content across different filenames
                for tracked_path, tracking_info in self.orch.analyzed_files.items():
                    if tracked_path == file_path_abs:
                        continue  # Skip self
                    tracked_hash = tracking_info.get('hash') if isinstance(tracking_info, dict) else None
                    if tracked_hash and tracked_hash == current_hash:
                        print(f"    ⚠️  Duplicate content detected - matches: {Path(tracked_path).name}")
                        df_final = pd.read_csv(self.orch.bo_data_path) if self.orch.bo_data_path.exists() else pd.DataFrame()
                        return json.dumps({
                            "status": "warning",
                            "message": f"This file's content was already analyzed from '{Path(tracked_path).name}'",
                            "data_points_collected": len(df_final),
                            "rows_added": 0,
                            "optimization_ready": len(df_final) >= 3,
                            "hint": "Data already in optimization set. No action needed unless this is different data with identical content."
                        })

                # Determine what to process based on hash and row count
                if prev_hash is None:
                    # FIRST TIME analyzing this file
                    print(f"    ✨ First time analyzing this file")
                    df_to_append = df_new
                    num_new = len(df_new)

                elif prev_hash != current_hash:
                    # FILE CONTENT CHANGED - reprocess entirely
                    print(f"    🔄 File content changed (hash mismatch) - reprocessing entirely")
                    
                    # Remove old data from optimization_data.csv if it exists
                    if self.orch.bo_data_path.exists() and prev_row_count > 0:
                        try:
                            df_existing = pd.read_csv(self.orch.bo_data_path)
                            # Remove the last prev_row_count rows (assumes they're from this file)
                            if len(df_existing) >= prev_row_count:
                                df_existing = df_existing.iloc[:-prev_row_count]
                                df_existing.to_csv(self.orch.bo_data_path, index=False)
                                print(f"    🗑️  Removed {prev_row_count} old rows from optimization data")
                        except Exception as e:
                            logging.warning(f"Could not clean old data: {e}")
                    
                    df_to_append = df_new
                    num_new = len(df_new)

                elif current_row_count > prev_row_count:
                    # ROWS APPENDED - process only new rows
                    df_new_only = df_new.iloc[prev_row_count:]
                    num_skipped = prev_row_count
                    num_new = len(df_new_only)
                    
                    if num_skipped > 0:
                        print(f"    🔍 Skipped {num_skipped} previously analyzed row(s)")
                    print(f"    ✅ Adding {num_new} NEW row(s)")
                    
                    df_to_append = df_new_only

                elif current_row_count == prev_row_count:
                    # prev_hash == current_hash (guaranteed by earlier elif)
                    # TRULY UNCHANGED
                    print(f"    ℹ️  File unchanged (same content hash)")
                    df_final = pd.read_csv(self.orch.bo_data_path) if self.orch.bo_data_path.exists() else pd.DataFrame()
                    
                    return json.dumps({
                        "status": "success",
                        "message": "File already analyzed - no changes detected",
                        "data_points_collected": len(df_final),
                        "rows_added": 0,
                        "optimization_ready": len(df_final) >= 3
                    })

                else:
                    # FEWER ROWS - file was truncated/replaced
                    print(f"    ⚠️  File has fewer rows ({current_row_count} < {prev_row_count}) - reprocessing")
                    
                    # Remove old data
                    if self.orch.bo_data_path.exists() and prev_row_count > 0:
                        try:
                            df_existing = pd.read_csv(self.orch.bo_data_path)
                            if len(df_existing) >= prev_row_count:
                                df_existing = df_existing.iloc[:-prev_row_count]
                                df_existing.to_csv(self.orch.bo_data_path, index=False)
                                print(f"    🗑️  Removed {prev_row_count} old rows from optimization data")
                        except Exception as e:
                            logging.warning(f"Could not clean old data: {e}")
                    
                    df_to_append = df_new
                    num_new = len(df_new)

                # Schema enforcement BEFORE saving
                all_cols = list(df_to_append.columns)

                # Case 1: Agent explicitly provided schema (Enables MOO)
                if inputs and targets:
                    # Validate that requested columns exist in the extracted data
                    missing_inputs = [c for c in inputs if c not in all_cols]
                    missing_targets = [t for t in targets if t not in all_cols]
                    
                    if missing_inputs or missing_targets:
                        # Try fuzzy matching for column names
                        available_cols = all_cols
                        suggestions = {}
                        
                        for missing in missing_inputs + missing_targets:
                            # Simple fuzzy match: find columns containing similar substrings
                            matches = [c for c in available_cols if missing.lower().replace('_', '') in c.lower().replace('_', '') 
                                    or c.lower().replace('_', '') in missing.lower().replace('_', '')]
                            if matches:
                                suggestions[missing] = matches
                        
                        return json.dumps({
                            "status": "error",
                            "message": "Requested columns not found in extracted metrics",
                            "missing_inputs": missing_inputs,
                            "missing_targets": missing_targets,
                            "available_columns": all_cols,
                            "suggestions": suggestions if suggestions else None,
                            "hint": "Column names may differ slightly. Check available_columns and retry with correct names, or use force_regenerate=True with updated extraction_goal."
                        })
                    
                    self.orch.expected_input_columns = inputs
                    self.orch.expected_target_columns = targets
                    print(f"    📊 Schema Enforced (User-Specified):")
                    print(f"       Inputs: {self.orch.expected_input_columns}")
                    print(f"       Targets: {self.orch.expected_target_columns}")
                
                # Case 2: Schema already established from previous analysis
                elif self.orch.expected_input_columns and self.orch.expected_target_columns:
                    print(f"    📊 Schema Enforced (From Previous Analysis):")
                    print(f"       Inputs: {self.orch.expected_input_columns}")
                    print(f"       Targets: {self.orch.expected_target_columns}")
                
                # Case 3: Fallback - Auto-detect (Single-Objective default)
                else:
                    # Heuristic: numeric columns that look like targets go to targets
                    # This is a last resort - prefer explicit schema
                    self.orch.expected_target_columns = [all_cols[-1]]
                    self.orch.expected_input_columns = [c for c in all_cols if c != all_cols[-1]] 
                    print(f"    📊 Schema Auto-Detected (Default Single-Objective):")
                    print(f"       Inputs: {self.orch.expected_input_columns}")
                    print(f"       Targets: {self.orch.expected_target_columns}")
                    print(f"    ⚠️  Warning: Using auto-detected schema. For multi-objective optimization, specify inputs and targets explicitly.")
                
                # SCHEMA ENFORCEMENT ON SAVE
                if self.orch.bo_data_path.exists():
                    df_existing = pd.read_csv(self.orch.bo_data_path)
                    
                    if set(df_to_append.columns) != set(df_existing.columns):
                        return json.dumps({
                            "status": "error",
                            "message": "Schema mismatch detected",
                            "expected_columns": list(df_existing.columns),
                            "received_columns": list(df_to_append.columns),
                            "hint": "All data must have same structure. Use reset_analysis_logic to start fresh."
                        })
                    
                    df_to_append = df_to_append[df_existing.columns]
                    df_to_append.to_csv(self.orch.bo_data_path, mode='a', header=False, index=False)
                else:
                    df_to_append.to_csv(self.orch.bo_data_path, mode='w', header=True, index=False)
                
                # Update tracking
                self.orch.analyzed_files[file_path_abs] = {
                    'row_count': current_row_count,
                    'hash': current_hash,
                    'timestamp': datetime.now().isoformat()
                }
                with open(self.orch.analyzed_files_path, 'w') as f:
                    json.dump(self.orch.analyzed_files, f, indent=2)
                
                df_final = pd.read_csv(self.orch.bo_data_path)
                data_count = len(df_final)
                
                return json.dumps({
                    "status": "success",
                    "metrics": metrics if isinstance(metrics, dict) else f"{len(metrics)} data points",
                    "data_points_collected": data_count,
                    "rows_added": num_new,
                    "optimization_ready": data_count >= 3,
                    "schema": {
                        "inputs": self.orch.expected_input_columns,
                        "targets": self.orch.expected_target_columns
                    }
                })
                
            except Exception as e:
                logging.error(f"Analyze file error: {e}", exc_info=True)
                return json.dumps({
                    "status": "error",
                    "message": str(e)
                })
        
        self._register_tool(
            func=analyze_file,
            name="analyze_file",
            description=(
                "Analyzes raw data files (CSV/XLSX/TXT) to extract scalar metrics. "
                "Automatically generates analysis code on first use, then reuses it for consistency. "
                "Results are appended to optimization dataset."
            ),
            parameters={
                "file_path": {
                    "type": "string",
                    "description": "Path to the data file to analyze (e.g., 'results/run_001.csv')"
                },
                "extraction_goal": {
                    "type": "string",
                    "description": "Natural language description of what to extract (e.g., 'Calculate peak area and retention time')"
                },
                "force_regenerate": {
                    "type": "boolean",
                    "description": (
                        "If true, generates new analysis script even if one exists. "
                        "Use when analysis requirements change (e.g., switching from single-row to multi-row extraction, "
                        "or changing which metrics to extract). Default: false"
                    )
                },
                "inputs": {
                    "type": "array", 
                    "items": {"type": "string"},
                    "description": "List of column names to treat as INPUT parameters"
                },
                "targets": {
                    "type": "array", 
                    "items": {"type": "string"}, 
                    "description": "List of column names to treat as OPTIMIZATION TARGETS"
                }
            },
            required=["file_path"]
        )
        
        # 7. RESET ANALYSIS LOGIC
        def reset_analysis_logic():
            """Resets the analysis script, optimization data, AND file tracking."""
            self.orch.active_scalarizer_script = None
            self.orch.expected_input_columns = None
            self.orch.expected_target_columns = []
            
            # Clear file tracking completely
            self.orch.analyzed_files = {}
            if self.orch.analyzed_files_path.exists():
                try:
                    self.orch.analyzed_files_path.unlink()
                    print(f"    🗑️  Cleared file tracking history")
                except Exception as e:
                    logging.warning(f"Could not delete analyzed_files.json: {e}")
            
            if self.orch.bo_data_path.exists():
                backup_path = self.orch.bo_data_path.with_suffix('.csv.backup')
                self.orch.bo_data_path.rename(backup_path)
                print(f"    ⚠️  Old data backed up to: {backup_path.name}")
            
            return json.dumps({
                "status": "success",
                "message": "Analysis logic reset. All files will be reprocessed fresh on next analyze_file call.",
                "hint": "Previous optimization data was backed up"
            })
        
        self._register_tool(
            func=reset_analysis_logic,
            name="reset_analysis_logic",
            description=(
                "Resets the locked analysis script and clears optimization data. "
                "Use this when the current analysis approach is fundamentally wrong. "
                "Previous data is backed up before deletion."
            ),
            parameters={},
            required=[]
        )
        
        # 8. RUN OPTIMIZATION
        def run_optimization(
            parallel_capable: bool = False, 
            batch_size: int = None,
            physical_constraints: str = None,
            experimental_budget: int = None
        ):
            """
            Runs Bayesian Optimization to suggest next parameters.
            Supports optional physical constraints for realizable batch design
            and optional experimental budget for exploration/exploitation control.
            """
            print(f"  ⚡ Tool: Running Bayesian Optimization...")
            
            # --- PRE-FLIGHT CHECKS --- 
            if not self.orch.active_scalarizer_script:
                return json.dumps({
                    "status": "error",
                    "message": "No analysis script locked yet",
                    "hint": "Run analyze_file on at least 3 data files first",
                    "workflow": "analyze_file (×3) → run_optimization"
                })
            
            if not self.orch.bo_data_path.exists():
                return json.dumps({
                    "status": "error",
                    "message": "No optimization_data.csv found",
                    "hint": "Run analyze_file to collect data points first"
                })
            
            try:
                df = pd.read_csv(self.orch.bo_data_path)
            except Exception as e:
                return json.dumps({
                    "status": "error",
                    "message": f"Failed to read optimization data: {e}",
                    "hint": "CSV may be corrupted. Check optimization_data.csv"
                })
            
            if len(df) < 3:
                return json.dumps({
                    "status": "error", 
                    "message": f"Insufficient data points: {len(df)}/3",
                    "hint": "Collect at least 3 experimental results before optimizing",
                    "current_data_count": len(df)
                })
            
            if not self.orch.expected_target_columns or not self.orch.expected_input_columns:
                return json.dumps({
                    "status": "error", 
                    "message": "Schema not established",
                    "hint": "This shouldn't happen. Try reset_analysis_logic."
                })
            
            # SCHEMA VALIDATION 
            missing_targets = [t for t in self.orch.expected_target_columns if t not in df.columns]
            if missing_targets:
                return json.dumps({
                    "status": "error",
                    "message": f"Target columns missing from data: {missing_targets}",
                    "available_columns": list(df.columns)
                })
            
            missing_inputs = [c for c in self.orch.expected_input_columns if c not in df.columns]
            if missing_inputs:
                return json.dumps({
                    "status": "error",
                    "message": f"Input columns missing: {missing_inputs}",
                    "available_columns": list(df.columns)
                })
            
            critical_cols = self.orch.expected_input_columns + self.orch.expected_target_columns
            
            if df[critical_cols].isnull().any().any():
                return json.dumps({
                    "status": "error",
                    "message": "Missing values detected in optimization data",
                    "hint": "Ensure all data files were analyzed successfully",
                    "affected_rows": df[df[critical_cols].isnull().any(axis=1)].index.tolist()
                })
            
            # ============================================
            # BOUNDS & CONSTRAINTS CALCULATION 
            # ============================================
            scientific_bounds = {}
            current_plan = self.orch.planner.state.get("current_plan", {})
            
            if current_plan and "proposed_experiments" in current_plan:
                for exp in current_plan["proposed_experiments"]:
                    for param in exp.get("optimization_params", []):
                        name = param.get("parameter_name")
                        min_v = param.get("min_value")
                        max_v = param.get("max_value")
                        
                        if name and min_v is not None and max_v is not None:
                            scientific_bounds[name] = (float(min_v), float(max_v))
                            print(f"  🔬 Scientific Constraint Found: {name} must be between {min_v} and {max_v}")

            numeric_inputs = []
            for col in self.orch.expected_input_columns:
                if col in df.columns and pd.api.types.is_numeric_dtype(df[col]):
                    numeric_inputs.append(col)
                else:
                    print(f"  ⚠️ Skipping non-numeric input column: {col}")

            if not numeric_inputs:
                return json.dumps({
                    "status": "error", 
                    "message": "No numeric input parameters found."
                })

            self.orch.expected_input_columns = numeric_inputs

            input_bounds = []
            for col in numeric_inputs:
                if col in scientific_bounds:
                    sci_min, sci_max = scientific_bounds[col]
                    input_bounds.append([sci_min, sci_max])
                    print(f"     -> Bound for '{col}': [{sci_min}, {sci_max}] (Source: PLANNER)")
                else:
                    data_min = float(df[col].min())
                    data_max = float(df[col].max())
                    
                    if data_min == data_max:
                        margin = 1.0 if data_min == 0 else abs(data_min * 0.1)
                    else:
                        margin = (data_max - data_min) * 0.1
                        
                    safe_min = data_min - margin
                    safe_max = data_max + margin
                    
                    input_bounds.append([safe_min, safe_max])
                    print(f"     -> Bound for '{col}': [{safe_min:.2f}, {safe_max:.2f}] (Source: DATA)")
            
            # ============================================
            # BATCH SIZE DETERMINATION
            # ============================================
            if not parallel_capable:
                final_batch_size = 1
                mode_desc = "sequential (single experiment)"
            else:
                if batch_size is None:
                    return json.dumps({
                        "status": "batch_size_required",
                        "message": "Batch size must be specified for parallel optimization.",
                        "instruction": (
                            "Analyze the experimental plan to determine appropriate batch_size "
                            "(e.g., plate format, number of conditions, equipment capacity), "
                            "then call: run_optimization(parallel_capable=True, batch_size=N)"
                        ),
                        "hint": "Common values: 8, 12, 24, 96, 384 for plate-based experiments"
                    })
                
                if batch_size < 1:
                    return json.dumps({
                        "status": "error", 
                        "message": f"Invalid batch_size: {batch_size}. Must be at least 1."
                    })
                
                final_batch_size = batch_size
                mode_desc = f"parallel (batch of {batch_size})"
                print(f"    ℹ️  Using batch_size: {batch_size}")
            
            # ============================================
            # CONSTRAINT-AWARE & BUDGET-AWARE LOGGING
            # ============================================
            if physical_constraints:
                mode_desc += " + constraint-aware"
                print(f"    📐 Physical constraints provided — will use LLM-guided batch design")
            
            if experimental_budget is not None:
                mode_desc += f" + budget={experimental_budget}"
                print(f"    💰 Experimental budget: {experimental_budget} iteration(s) remaining")
            
            print(f"    📊 Optimization Setup:")
            print(f"       Mode: {mode_desc}")
            print(f"       Data points: {len(df)}")
            print(f"       Inputs: {self.orch.expected_input_columns}")
            print(f"       Targets: {self.orch.expected_target_columns}")
            print(f"       Bounds: {input_bounds}")
            if physical_constraints:
                print(f"       Constraints: {physical_constraints[:100]}...")
            
            try:
                # ============================================
                # CALL BO
                # ============================================
                res = self.orch.bo.run_optimization_loop(
                    data_path=str(self.orch.bo_data_path),
                    objective_text=self.orch.objective,
                    input_cols=self.orch.expected_input_columns,
                    input_bounds=input_bounds,                    
                    target_cols=self.orch.expected_target_columns,
                    output_dir=str(self.orch.base_dir / "bo_artifacts"),
                    batch_size=int(final_batch_size),
                    physical_constraints=physical_constraints,
                    experimental_budget=experimental_budget,
                    plot_acq=True,
                    save_acq=True,
                )
                
                if res.get("status") != "success":
                    return json.dumps({
                        "status": "error", 
                        "message": res.get("error", "Optimization failed"),
                        "bo_output": res
                    })
                
                # Format response
                next_params = res.get('next_parameters')
                
                if parallel_capable:
                    hint = f"Run all {final_batch_size} experiments in parallel, then use analyze_file on each result file."
                    params_summary = f"Generated {final_batch_size} parameter sets"
                else:
                    hint = "Run this experiment, then use analyze_file on the result to continue."
                    params_summary = "Generated next experiment parameters"
                
                response = {
                    "status": "success",
                    "mode": "parallel" if parallel_capable else "sequential",
                    "batch_size": final_batch_size,
                    "recommended_parameters": next_params,
                    "params_summary": params_summary,
                    "strategy_used": res.get('strategy', {}).get('acquisition_strategy', {}).get('type'),
                    "plot_path": res.get('plot_path'),
                    "hint": hint
                }
                if res.get("acq_plot_path"):
                    response["acq_plot_path"] = res["acq_plot_path"]
                if res.get("acq_data_path"):
                    response["acq_data_path"] = res["acq_data_path"]
                
                # Include constrained planning metadata
                if res.get("constrained_planning"):
                    cp = res["constrained_planning"]
                    response["constraint_aware"] = True
                    response["coverage_summary"] = cp.get("coverage_summary", "")
                    response["trade_offs"] = cp.get("trade_offs", "")
                    if cp.get("validation_errors"):
                        response["constraint_warnings"] = cp["validation_errors"]
                
                # Include budget context
                if res.get("budget"):
                    response["budget"] = res["budget"]
                
                return json.dumps(response)
                
            except Exception as e:
                logging.error(f"Optimization error: {e}")
                return json.dumps({
                    "status": "error",
                    "message": str(e)
                })
        
        self._register_tool(
            func=run_optimization,
            name="run_optimization",
            description=(
                "Runs Bayesian Optimization to suggest next experimental parameters. "
                "Requires at least 3 data points from analyze_file. "
                "For parallel mode, batch_size must be specified. "
                "Supports optional physical_constraints for constraint-aware batch design — "
                "when provided, the agent evaluates the acquisition landscape and uses LLM "
                "reasoning to design a batch that maximizes information gain while respecting "
                "physical experimental limitations (e.g., plate layouts, discrete reagent stocks, "
                "shared equipment channels). "
                "Supports optional experimental_budget for exploration/exploitation control — "
                "pass the number of remaining optimization iterations to shift strategy from "
                "exploration (high budget) to exploitation (low budget)."
            ),
            parameters={
                "parallel_capable": {
                    "type": "boolean",
                    "description": "True if experiments can run in parallel. False for sequential (default)."
                },
                "batch_size": {
                    "type": "integer",
                    "description": (
                        "Number of parallel experiments (required if parallel_capable=True). "
                        "Infer from experimental plan (e.g., plate format, grid size, equipment capacity)."
                    )
                },
                "physical_constraints": {
                    "type": "string",
                    "description": (
                        "Natural language description of physical experimental constraints that "
                        "prevent arbitrary parameter combinations. When provided, the optimizer "
                        "evaluates the full acquisition landscape and uses LLM reasoning to design "
                        "a realizable batch. Examples:\n"
                        "- '96-well plate: rows share temperature (8 values), columns share pH (12 values)'\n"
                        "- 'Only 5 catalyst concentrations available: 0.1, 0.5, 1.0, 2.0, 5.0 mM'\n"
                        "- 'Reactor has 4 zones with independent temp but shared pressure'\n"
                        "- 'Gradient limited to linear ramp: min at well A1, max at well H12'\n"
                        "If not provided, standard unconstrained BO is used."
                    )
                },
                "experimental_budget": {
                    "type": "integer",
                    "description": (
                        "Number of remaining optimization iterations (including this one). "
                        "Controls exploration-vs-exploitation balance:\n"
                        "- 1 = final shot (pure exploitation, no exploration)\n"
                        "- 2-3 = critical budget (strongly favor exploitation)\n"
                        "- Higher values = scaled based on campaign progress\n"
                        "- Omit for no budget constraint (default behavior).\n"
                        "Pass when user mentions remaining experiments, budget, 'last round', "
                        "or 'N more tries'. This counts iterations (calls to run_optimization), "
                        "not individual experiments within a batch."
                    )
                }
            },
            required=[]
        )


        # 9. SAVE CHECKPOINT
        def save_checkpoint():
            """
            Saves complete orchestrator state including conversation and agent state.
            Use this periodically during long campaigns.
            """
            checkpoint_path = self.orch.base_dir / "checkpoint.json"
            
            # Calculate data points
            data_points = 0
            if self.orch.bo_data_path.exists():
                try:
                    df = pd.read_csv(self.orch.bo_data_path)
                    data_points = len(df)
                except:
                    pass
            
            # Get message count (handle both OpenAI and Gemini)
            if self.orch.use_openai:
                # OpenAI: messages is a list attribute
                message_count = len(self.orch.messages)
            else:
                # Gemini: history is in chat_session
                try:
                    message_count = len(self.orch.chat_session.history) if hasattr(self.orch.chat_session, 'history') else 0
                except:
                    message_count = 0
            
            state = {
                "timestamp": datetime.now().isoformat(),
                "objective": self.orch.objective,
                "active_scalarizer_script": self.orch.active_scalarizer_script,
                "expected_input_columns": self.orch.expected_input_columns,
                "expected_target_columns": self.orch.expected_target_columns,
                "data_points_collected": data_points,
                "message_count": message_count,
                "planner_state": self.orch.planner.state if hasattr(self.orch.planner, 'state') else None,
                "latest_tea_results": self.orch.latest_tea_results,
                "autonomy_level": self.orch.autonomy_level.value if hasattr(self.orch, 'autonomy_level') and self.orch.autonomy_level else None,
                "data_dir": str(self.orch.data_dir) if self.orch.data_dir else None,
                "knowledge_dir": str(self.orch.knowledge_dir) if self.orch.knowledge_dir else None,
                "code_dir": str(self.orch.code_dir) if self.orch.code_dir else None,
            }
            
            try:
                with open(checkpoint_path, 'w') as f:
                    json.dump(state, f, indent=2)
                
                print(f"    💾 Checkpoint saved: {checkpoint_path}")
                
                return json.dumps({
                    "status": "success",
                    "checkpoint_path": str(checkpoint_path),
                    "data_points": data_points,
                    "message_count": message_count,
                    "timestamp": state["timestamp"]
                })
                
            except Exception as e:
                logging.error(f"Checkpoint save failed: {e}")
                return json.dumps({
                    "status": "error",
                    "message": f"Failed to save checkpoint: {e}"
                })
        
        # Register the tool
        self._register_tool(
            func=save_checkpoint,
            name="save_checkpoint",
            description=(
                "Saves complete campaign state including conversation history, "
                "analysis scripts, and optimization data. Use this periodically "
                "during long campaigns (every 3-5 experiments) to enable resumption "
                "after crashes or breaks."
            ),
            parameters={},
            required=[]
        )

        # 10. DISCARD PLAN
        def discard_plan(reason: str = ""):
            """
            Discards the most recent experimental plan (marks it as superseded).
            The plan remains in history for transparency but won't appear in reports.
            
            Args:
                reason: Why the plan is being discarded
            """
            if not self.orch.planner.state:
                return json.dumps({
                    "status": "error",
                    "message": "No active planning session"
                })
            
            history = self.orch.planner.state.get("plan_history", [])
            
            if not history:
                return json.dumps({
                    "status": "error",
                    "message": "No plans in history to discard"
                })
            
            # Find last non-TEA, non-superseded entry
            for i in range(len(history) - 1, -1, -1):
                plan = history[i]
                if (plan.get("type") != "technoeconomic_analysis" and 
                    plan.get("status") != "superseded"):
                    
                    # Mark as superseded instead of deleting
                    plan["status"] = "superseded"
                    plan["superseded_reason"] = reason if reason else "Plan replaced with corrected version"
                    plan["superseded_at"] = datetime.now().isoformat()
                    
                    print(f"    🗑️  Discarded plan: iteration {plan.get('iteration')}")
                    if reason:
                        print(f"       Reason: {reason}")
                    
                    return json.dumps({
                        "status": "success",
                        "message": f"Plan from iteration {plan.get('iteration')} discarded",
                        "reason": plan["superseded_reason"],
                        "hint": "The discarded plan remains in history for transparency"
                    })
            
            return json.dumps({
                "status": "error",
                "message": "No active experimental plans to discard"
            })

        # Register the tool
        self._register_tool(
            func=discard_plan,
            name="discard_plan",
            description=(
                "Discards the most recent experimental plan (marks it as superseded). "
                "The plan remains in full history for transparency but won't appear in final reports. "
                "Use when correcting a wrong plan before generating the corrected version."
            ),
            parameters={
                "reason": {
                    "type": "string",
                    "description": (
                        "Why the plan is being discarded. Be specific about the mismatch. "
                        "Examples: 'Wrong material - data has Mg not Mn', "
                        "'User requested different equipment', 'Incorrect objective interpretation'"
                    )
                }
            },
            required=["reason"]
        )

        def show_directory_guide():
            """
            Shows the recommended directory structure for optimal agent performance.
            """
            guide = """
        ╔══════════════════════════════════════════════════════════════════════════╗
        ║                  RECOMMENDED DIRECTORY STRUCTURE                         ║
        ╚══════════════════════════════════════════════════════════════════════════╝

        📁 my_research_project/          ← Run orchestrator from here
        │
        ├── 📚 papers/                    ← Scientific papers & literature
        │   ├── separation_methods_2024.pdf
        │   ├── lithium_extraction_review.pdf
        │   └── rare_earth_recovery.pdf
        │
        ├── 📊 experimental_results/      ← Raw experimental data files
        │   ├── batch_001.csv
        │   ├── batch_002.csv
        │   ├── batch_003.csv
        │   └── pilot_run_*.xlsx
        │
        ├── 💻 code/                      ← Analysis scripts & API docs (optional)
        │   ├── analysis_pipeline.py
        │   ├── visualization.py
        │   └── api_documentation/
        │
        ├── 📁 campaign_session/          ← Created automatically by orchestrator
        │   ├── optimization_data.csv    (collected metrics)
        │   ├── analysis_artifacts/      (generated analysis scripts)
        │   ├── bo_artifacts/            (optimization plots)
        │   ├── plan.json                (experimental plans)
        │   └── checkpoint.json          (saved state)
        │
        └── 🗂️ kb_storage/                ← Created automatically
            ├── default_kb_docs/         (knowledge base from papers)
            └── default_kb_code/         (knowledge base from code)

        ╔══════════════════════════════════════════════════════════════════════════╗
        ║                           QUICK START GUIDE                              ║
        ╚══════════════════════════════════════════════════════════════════════════╝

        CHAT EXAMPLES:

        📋 Generate plan with papers:
        "Generate a plan for lithium extraction using ./papers/ and ./code/"

        📊 Analyze experimental data:
        "Analyze ./experimental_results/batch_001.csv and extract yield"

        🔬 Run optimization:
        "Run optimization to suggest next experiments"

        💾 Save progress:
        "Save checkpoint"
        """
            
            print(guide)
            
            # Also return as JSON for the LLM
            return json.dumps({
                "status": "success",
                "message": "Directory structure guide displayed",
                "recommended_folders": ["papers/", "experimental_results/", "code/"],
                "auto_created_folders": ["campaign_session/", "kb_storage/"]
            })

        # Register the tool
        self._register_tool(
            func=show_directory_guide,
            name="show_directory_guide",
            description=(
                "Shows recommended directory structure for optimal agent performance. "
                "Use when user asks about setup, organization, or how to structure their project."
            ),
            parameters={},
            required=[]
        )
    
    def _register_tool(self, func: Callable, name: str, description: str, 
                      parameters: Dict[str, Any], required: list = None):
        """
        Register a tool in both OpenAI and Gemini formats.
        
        Args:
            func: The Python function to call
            name: Function name
            description: What the function does
            parameters: Dict of parameter definitions
            required: List of required parameter names
        """
        # Add to function map for execution
        self.functions_map[name] = func
        
        # Add to Gemini format (just the function object)
        self.gemini_functions.append(func)
        
        # Build OpenAI schema
        openai_schema = {
            "type": "function",
            "function": {
                "name": name,
                "description": description,
                "parameters": {
                    "type": "object",
                    "properties": parameters,
                    "required": required or []
                }
            }
        }
        self.openai_schemas.append(openai_schema)
    
    def execute_tool(self, tool_name: str, **kwargs) -> str:
        """
        Execute a tool by name with given arguments.
        
        Args:
            tool_name: Name of the tool to execute
            **kwargs: Arguments to pass to the tool
            
        Returns:
            JSON string with result
        """
        if tool_name not in self.functions_map:
            return json.dumps({
                "status": "error",
                "message": f"Tool '{tool_name}' not found in registry"
            })
        
        try:
            return self.functions_map[tool_name](**kwargs)
        except Exception as e:
            logging.error(f"Tool execution error ({tool_name}): {e}", exc_info=True)
            return json.dumps({
                "status": "error",
                "message": str(e),
                "tool": tool_name
            })





================================================
FILE: parser_utils.py
================================================
import os
from typing import List, Dict, Any
from pathlib import Path
from typing import List, Dict, Any, Tuple, Optional, Union
import logging

import json
import pandas as pd
import PIL.Image as PIL_Image

from .excel_parser import parse_adaptive_excel


# Match these to the extensions you check in planning_agent.py
SUPPORTED_EXTENSIONS = {
    '.py', '.java', '.r', '.cpp', '.h', '.js', '.json', 
    '.csv', '.txt', '.md', '.pdf'
}

def get_files_from_directory(directory_path: str) -> List[str]:
    """
    Recursively finds all supported files in a directory, ignoring hidden files.
    """
    found_files = []
    path = Path(directory_path)
    
    if not path.exists():
        print(f"  - ⚠️ Directory not found: {directory_path}")
        return []

    print(f"  - 📂 Scanning directory: {path.name}...")

    for root, dirs, files in os.walk(path):
        # In-place modification to skip hidden dirs and common junk
        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ('__pycache__', 'venv', 'env', 'node_modules', '.git')]
        
        for file in files:
            if file.startswith('.'): continue
            
            file_path = Path(root) / file
            if file_path.suffix.lower() in SUPPORTED_EXTENSIONS:
                found_files.append(str(file_path))
                
    print(f"    -> Found {len(found_files)} files in directory.")
    return found_files

def generate_repo_map(root_dir: str) -> str:
    """
    Generates a visual tree structure of the repository.
    Useful for giving the LLM context on where files live for imports.
    """
    root = Path(root_dir)
    if not root.exists(): return ""

    tree_lines = [f"{root.name}/"]
    
    for path in sorted(root.rglob('*')):
        # Skip hidden files/dirs
        if any(part.startswith('.') or part in ('__pycache__', 'venv', 'env') for part in path.parts):
            continue
        
        if path.is_file() and path.suffix.lower() in SUPPORTED_EXTENSIONS:
            rel_path = path.relative_to(root)
            depth = len(rel_path.parts)
            indent = '    ' * (depth - 1)
            tree_lines.append(f"{indent}├── {path.name}")
            
    return "\n".join(tree_lines)

def table_to_markdown(table: List[List[str]]) -> str:
    """Converts a 2D list representation of a table into Markdown format."""
    if not table or not table[0]: return ""
    # Ensure all cells are strings before joining
    cleaned_table = [[str(cell).strip() if cell is not None else "" for cell in row] for row in table]
    header, *rows = cleaned_table
    md = f"| {' | '.join(header)} |\n| {' | '.join(['---'] * len(header))} |\n"
    for row in rows:
        # Pad rows that are shorter than the header
        while len(row) < len(header): row.append("")
        # Truncate rows that are longer than the header
        md += f"| {' | '.join(row[:len(header)])} |\n"
    return md


def parse_json_from_response(resp) -> "Tuple[Optional[Dict[str, Any]], Optional[str]]":
    """
    Robustly extracts and parses JSON from an LLM response object.
    
    Handles:
    - Gemini: resp.text or resp.parts[0].text
    - OpenAI/Anthropic wrapper: resp.text (via SimpleNamespace)
    - Raw strings
    - Markdown code fences (```json ... ```)
    - Preamble/postamble text around JSON (common with Anthropic models)
    """
    import json
    
    json_text = ""
    
    # 1. Extract raw text from response object
    try:
        if hasattr(resp, 'text'): 
            json_text = resp.text.strip()
        elif hasattr(resp, 'parts') and resp.parts: 
            json_text = resp.parts[0].text.strip()
        elif isinstance(resp, str):
            json_text = resp.strip()
        else:
            return None, f"LLM response format unexpected: {type(resp)}"
            
    except ValueError as e:
        return None, f"Response blocked or empty (Safety Filter): {e}"
    except Exception as e:
        return None, f"Error extracting text from response: {e}"

    if not json_text:
        return None, "Empty response from LLM"

    # 2. Strip Markdown code fences
    if json_text.startswith("```json"):
        json_text = json_text[len("```json"):].strip()
    elif json_text.startswith("```"):
        json_text = json_text[len("```"):].strip()
    
    if json_text.endswith("```"):
        json_text = json_text[:-len("```")].strip()

    # 3. Try direct parse first (fast path — works for Gemini and clean responses)
    try:
        return json.loads(json_text), None
    except json.JSONDecodeError:
        pass  # Fall through to extraction logic
    
    # 4. Extract JSON object from surrounding text (handles Anthropic preamble)
    #    Find the outermost { ... } by brace matching
    first_brace = json_text.find('{')
    if first_brace == -1:
        return None, (
            f"No JSON object found in response. "
            f"First 300 chars: {json_text[:300]}"
        )
    
    # Match braces to find the complete JSON object
    depth = 0
    in_string = False
    escape_next = False
    last_brace = -1
    
    for i in range(first_brace, len(json_text)):
        ch = json_text[i]
        
        if escape_next:
            escape_next = False
            continue
        
        if ch == '\\' and in_string:
            escape_next = True
            continue
        
        if ch == '"' and not escape_next:
            in_string = not in_string
            continue
        
        if in_string:
            continue
            
        if ch == '{':
            depth += 1
        elif ch == '}':
            depth -= 1
            if depth == 0:
                last_brace = i
                break
    
    if last_brace == -1:
        return None, (
            f"Unbalanced braces in response. "
            f"First 300 chars: {json_text[:300]}"
        )
    
    extracted = json_text[first_brace:last_brace + 1]
    
    try:
        return json.loads(extracted), None
    except json.JSONDecodeError as e:
        return None, (
            f"Failed to decode JSON: {e}. "
            f"Extracted text (first 500 chars): {extracted[:500]}"
        )

def append_experiment_result(file_path: str, parameters: Dict[str, float], results: Dict[str, float]):
    """
    Appends a completed experiment (Params + Results) to the cumulative dataset.
    This 'closes the loop' for the BO Agent.
    """
    path = Path(file_path)
    
    # Merge input parameters and lab results into one row
    new_row = {**parameters, **results}
    
    if not path.exists():
        # Create new if doesn't exist
        df = pd.DataFrame([new_row])
    else:
        if path.suffix == '.xlsx':
            df = pd.read_excel(path)
        elif path.suffix == '.csv':
            df = pd.read_csv(path)
        else:
            raise ValueError("Unsupported file format. Use .xlsx or .csv")
        
        # Append
        df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
    
    # Save back
    if path.suffix == '.xlsx':
        df.to_excel(path, index=False)
    else:
        df.to_csv(path, index=False)
    print(f"✅ Appended result to {path.name}. New size: {len(df)}")


def write_experiments_to_disk(result_json: Dict[str, Any], target_dir: str) -> List[str]:
    """
    Parses the result JSON and writes 'implementation_code' to .py files in the target directory.
    Returns a list of filenames that were successfully saved.
    """
    path = Path(target_dir)
    path.mkdir(parents=True, exist_ok=True)
    
    experiments = result_json.get("proposed_experiments", [])
    saved_files = []
    
    if not experiments:
        logging.warning(f"No experiments found to save in {target_dir}")
        return []
    
    for i, exp in enumerate(experiments):
        code_content = exp.get("implementation_code")
        exp_name = exp.get("experiment_name", f"Experiment_{i+1}")
        
        # 1. Clean filename
        # Replace spaces with underscores and remove non-alphanumeric chars (except _ and .)
        safe_name = "".join(c for c in exp_name if c.isalnum() or c in (' ', '_', '.')).rstrip()
        safe_name = safe_name.replace(' ', '_')
        
        # Fallback if name becomes empty after cleaning
        if not safe_name: 
            safe_name = f"experiment_code_{i+1}"
            
        filename = f"{safe_name}.py"
        file_path = path / filename

        # 2. Extract and Write
        if code_content and "No relevant code found" not in code_content:
            try:
                # Strip markdown code blocks (```python ... ```)
                code_lines = code_content.splitlines()
                
                # Logic to find the content between the backticks
                start_index = next((j for j, line in enumerate(code_lines) if line.strip().startswith('```')), -1)
                end_index = next((j for j, line in enumerate(code_lines[start_index+1:]) if line.strip().endswith('```')), -1)
                
                if start_index != -1 and end_index != -1:
                    # Adjust end_index because we sliced the list
                    actual_end = start_index + 1 + end_index
                    extracted_code = "\n".join(code_lines[start_index + 1 : actual_end]).strip()
                else:
                    # Fallback: assume the whole string is code if no backticks found
                    extracted_code = code_content.strip()

                with open(file_path, 'w', encoding='utf-8') as f:
                    f.write(extracted_code)
                
                saved_files.append(filename)
                
            except Exception as e:
                logging.error(f"Failed to write {filename}: {e}")
        else:
            logging.info(f"Experiment {i+1} ('{exp_name}') has no executable code.")

    return saved_files


def resolve_primary_data_path(data_input: Union[str, Dict[str, str], None]) -> Optional[Dict[str, str]]:
    """
    Normalizes input into a standard Dict format for primary data.
    
    Capabilities:
    1. Normalizes String path ("data.xlsx") -> Dict
    2. Auto-Discovers metadata: Checks for 'data.json' or 'data.user_desc.json'
    3. Interactive Fallback: Prompts user if no metadata is found (and saves the result).
    """
    if not data_input:
        return None

    # 1. Normalize String input to Dict
    if isinstance(data_input, str):
        data_input = {"file_path": data_input}

    path = Path(data_input["file_path"])
    if not path.exists():
        print(f"❌ Primary data file not found: {path}")
        return None

    # 2. Check if metadata path is already explicitly provided
    if "metadata_path" in data_input and data_input["metadata_path"]:
        return data_input 

    # 3. Auto-Discovery Logic
    # Priority A: Check for existing matching JSON (e.g., data.json)
    candidate_json = path.with_suffix('.json')
    if candidate_json.exists():
        print(f"  - 🔍 Auto-discovered metadata: {candidate_json.name}")
        return {"file_path": str(path), "metadata_path": str(candidate_json)}

    # Priority B: Check for previously saved user description (e.g., data.user_desc.json)
    saved_desc_file = path.with_suffix('.user_desc.json')
    if saved_desc_file.exists():
        print(f"  - 🔍 Found saved user description: {saved_desc_file.name}")
        return {"file_path": str(path), "metadata_path": str(saved_desc_file)}

    # 4. Interactive Fallback
    from .user_interface import get_dataset_description
    
    user_desc = get_dataset_description(path.name)
    
    if user_desc:
        try:
            # We create a minimal valid JSON structure for the excel_parser
            # We map 'description' to 'objective' so it gets picked up by excel_parser logic
            meta_content = {
                "title": path.stem,
                "objective": user_desc, 
                "generated_by": "user_interactive_prompt"
            }
            
            with open(saved_desc_file, 'w', encoding='utf-8') as f:
                json.dump(meta_content, f, indent=2)
            
            print(f"  - 💾 Saved description to: {saved_desc_file.name}")
            return {"file_path": str(path), "metadata_path": str(saved_desc_file)}
        except Exception as e:
            print(f"  - ⚠️ Could not save description file: {e}")
            return {"file_path": str(path), "metadata_path": None}
    
    # User chose to skip
    return {"file_path": str(path), "metadata_path": None}


def parse_data_file(file_path: str, 
                   metadata_path: Optional[str] = None) -> str:
    """
    Unified data file parsing for both initial planning and iteration.
    Auto-discovers metadata JSON if not provided.
    
    Args:
        file_path: Path to data file (.csv, .xlsx, .xls)
        metadata_path: Optional explicit metadata path (overrides auto-discovery)
    
    Returns:
        String containing formatted data summary
    """
    # Auto-discover metadata using existing logic
    data_dict = resolve_primary_data_path(file_path)
    
    if data_dict is None:
        return f"[Error: File not found - {file_path}]"
    
    # Override metadata if explicitly provided (for backward compatibility)
    if metadata_path is not None:
        data_dict['metadata_path'] = metadata_path
    
    try:
        chunks = parse_adaptive_excel(
            data_dict['file_path'],
            data_dict.get('metadata_path')
        )
        
        if chunks:
            # Return the summary chunk (prioritize dataset_summary or dataset_package)
            summary = next(
                (c for c in chunks 
                 if c['metadata'].get('content_type') in 
                    ('dataset_summary', 'dataset_package')), 
                chunks[0]
            )
            return summary['text']
        
        return f"[No data extracted from {file_path}]"
        
    except Exception as e:
        return f"[Error parsing {file_path}: {e}]"


def load_image_file(image_path: str) -> Optional[Any]:
    """
    Unified image loading with error handling.
    
    Args:
        image_path: Path to image file
        
    Returns:
        PIL Image object or None if loading fails
    """
    if PIL_Image is None:
        logging.warning("PIL not installed. Cannot load images.")
        return None
    
    try:
        with PIL_Image.open(image_path) as img:
            img.load()
            return img.copy()
    except Exception as e:
        logging.warning(f"Failed to load image {image_path}: {e}")
        return None


def parse_multimodal_results(results: Any) -> Tuple[str, List]:
    """
    Extracts text and images from various result formats.
    
    Handles multiple input formats:
    - String: "Yield was 85%"
    - File path: "./data.csv" or "./plot.png"
    - Dict: {"path": "./file.csv", "description": "..."}
    - List: Mix of above types
    
    Args:
        results: Experimental results in any supported format
        
    Returns:
        Tuple of (consolidated_text, loaded_images)
        
    Example:
        >>> text, images = parse_multimodal_results([
        ...     "./experiment.csv",
        ...     {"path": "./plot.png", "description": "Results"},
        ...     "Precipitation observed"
        ... ])
    """
    parsed_text_results = []
    loaded_images = []
    
    def process_item(item: Any, description: str = "") -> str:
        text_output = ""
        
        # If it's a file path
        if isinstance(item, str) and Path(item).exists():
            path = Path(item)
            suffix = path.suffix.lower()
            
            # A. Data Files
            if suffix in ['.xlsx', '.xls', '.csv']:
                print(f"  - 📄 Parsing data file: {path.name}")
                text_output = parse_data_file(str(path))
                text_output = f"DATA FILE ({path.name}):\n{text_output}"

            # B. Images
            elif suffix in ['.png', '.jpg', '.jpeg', '.tiff', '.bmp']:
                print(f"  - 🖼️  Loading result image: {path.name}")
                img = load_image_file(str(path))
                if img:
                    loaded_images.append(img)
                    text_output = f"[Attached Image: {path.name}]"
                else:
                    text_output = f"[Error loading image: {path.name}]"
            
            # C. Logs/Text
            elif suffix in ['.txt', '.log', '.md', '.json']:
                try:
                    content = path.read_text(encoding='utf-8')
                    text_output = f"LOG FILE ({path.name}):\n{content}"
                except Exception as e:
                    text_output = f"[Error reading log {path.name}: {e}]"
            
            else:
                text_output = f"FILE ({path.name})"

        # If not a file, treat as raw text/data
        else:
            if isinstance(item, (dict, list)):
                text_output = json.dumps(item, indent=2)
            else:
                text_output = str(item)
        
        # Append description if provided
        if description:
            text_output += f"\n(Context: {description})"
        
        return text_output

    # Process results
    items_to_process = results if isinstance(results, list) else [results]
    
    for entry in items_to_process:
        if isinstance(entry, dict):
            # Structured file entry
            path_val = entry.get('path') or entry.get('file') or entry.get('image')
            desc_val = (entry.get('description') or entry.get('desc') or 
                       entry.get('caption') or entry.get('notes'))
            
            if path_val and isinstance(path_val, str):
                parsed_text_results.append(process_item(path_val, desc_val or ""))
            else:
                parsed_text_results.append(json.dumps(entry, indent=2))
        else:
            parsed_text_results.append(process_item(entry))

    consolidated_feedback = "\n\n".join(parsed_text_results)
    return consolidated_feedback, loaded_images


================================================
FILE: pdf_parser.py
================================================
import fitz  # PyMuPDF
import pdfplumber
import threading
from typing import List, Dict, Tuple
from dataclasses import dataclass
from pathlib import Path

from .parser_utils import table_to_markdown


class TimeoutError(Exception):
    pass

class timeout:
    def __init__(self, seconds=15, error_message="Timeout"):
        self.seconds = seconds
        self.error_message = error_message
        self.timer = None
        
    def _timeout_handler(self):
        raise TimeoutError(self.error_message)
    
    def __enter__(self):
        self.timer = threading.Timer(self.seconds, self._timeout_handler)
        self.timer.daemon = True
        self.timer.start()
        return self
        
    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.timer:
            self.timer.cancel()
        return False

@dataclass
class ContentBlock:
    text: str; page: int; content_type: str


def chunk_text(text: str, page_num: int, chunk_size: int, overlap: int) -> List[Dict[str, any]]:
    """Chunks a single block of text with overlap."""
    chunks = []
    start = 0
    text_length = len(text)
    chunk_idx = 0
    while start < text_length:
        end = start + chunk_size
        chunk_text = text[start:end].strip()
        if chunk_text:
            chunks.append({
                'text': chunk_text,
                'metadata': {
                    'page': page_num,
                    'content_type': 'text',
                    'chunk_id': f"p{page_num}-t-{chunk_idx}"
                }
            })
            chunk_idx += 1
        start = end - overlap if end < text_length else end
    return chunks

def extract_pdf_two_pass(pdf_path: str, chunk_size: int = 500, overlap: int = 50, table_timeout: int = 15) -> List[Dict[str, any]]:
    """
    A robust two-pass hybrid extraction pipeline for RAG. This is the stable version.
    Pass 1 (PyMuPDF): Fast extraction of all text and identification of pages containing tables.
    Pass 2 (pdfplumber): High-accuracy extraction of tables from only the identified pages.
    """
    print(f"Starting robust two-pass processing for: {pdf_path}")
    
    text_chunks = []
    table_chunks = []
    table_page_nums = set()

    # === PASS 1: Fast Text Extraction and Table Location with PyMuPDF ===
    print("  - Pass 1: Extracting text and locating potential tables...")
    try:
        doc = fitz.open(pdf_path)
        for page_num_zero_indexed in range(len(doc)):
            page = doc[page_num_zero_indexed]
            page_num_one_indexed = page_num_zero_indexed + 1

            # 1.1 Extract and chunk text for the current page
            text_blocks = sorted(page.get_text("blocks"), key=lambda b: (b[1], b[0]))
            full_page_text = "\n\n".join([block[4].strip() for block in text_blocks if block[4].strip()])
            
            if full_page_text:
                text_chunks.extend(chunk_text(full_page_text, page_num_one_indexed, chunk_size, overlap))

            # 1.2 Identify pages that might contain tables for the next pass
            if page.find_tables():
                table_page_nums.add(page_num_zero_indexed)
        doc.close()
        print(f"  - Pass 1 Complete: Extracted {len(text_chunks)} text chunks.")
        print(f"  - Found {len(table_page_nums)} pages that may contain tables.")

    except Exception as e:
        print(f"❌ Error during Pass 1 (PyMuPDF processing): {e}")
        return []

    # === PASS 2: Targeted, High-Accuracy Table Extraction with pdfplumber ===
    if table_page_nums:
        print("  - Pass 2: Performing high-accuracy table extraction on specific pages...")
        try:
            with pdfplumber.open(pdf_path) as pdf:
                for page_num_zero_indexed in sorted(list(table_page_nums)):
                    page_num_one_indexed = page_num_zero_indexed + 1
                    try:
                        with timeout(seconds=table_timeout):
                            page = pdf.pages[page_num_zero_indexed]
                            tables = page.extract_tables()
                            if tables:
                                #print(f"    - Extracted {len(tables)} table(s) from page {page_num_one_indexed}.")
                                for table in tables:
                                    if table and len(table) > 1:
                                        markdown_table = table_to_markdown(table)
                                        table_chunks.append({
                                            'text': markdown_table,
                                            'metadata': {'page': page_num_one_indexed, 'content_type': 'table'}
                                        })
                    except TimeoutError:
                        print(f"    - ⚠️  Table extraction on page {page_num_one_indexed} timed out. Skipping.")
                    except Exception as e:
                        print(f"    - ⚠️  Error extracting tables from page {page_num_one_indexed}: {e}")
            print("  - Pass 2 Complete.")
        except Exception as e:
            print(f"❌ Error during Pass 2 (pdfplumber processing): {e}")

    # === Final Merge and Post-processing ===
    print("  - Merging and finalizing chunks...")
    all_content = text_chunks + table_chunks
    all_content.sort(key=lambda x: (x['metadata']['page'], 0 if x['metadata']['content_type'] == 'text' else 1))

    for i, chunk in enumerate(all_content):
        chunk['metadata']['source'] = pdf_path
        chunk['metadata']['chunk_id'] = f"{Path(pdf_path).stem}-{i}"

    print(f"✓ Created {len(all_content)} total chunks ({len(table_chunks)} tables)")
    return all_content


================================================
FILE: planning_agent.py
================================================
import os
from pathlib import Path
import json
import logging
import shutil
import uuid
from typing import List, Dict, Any, Optional, Union
from pathlib import Path
from datetime import datetime

from .knowledge_base import KnowledgeBase
from .parser_utils import (
    generate_repo_map, 
    write_experiments_to_disk,
    resolve_primary_data_path,
    parse_multimodal_results
)
from .repo_loader import clone_git_repository

from .instruct import (
    HYPOTHESIS_GENERATION_INSTRUCTIONS,
    TEA_INSTRUCTIONS
)

from ...auth import get_internal_proxy_key
from ...wrappers.openai_wrapper import OpenAIAsGenerativeModel
from ...wrappers.litellm_wrapper import LiteLLMGenerativeModel

from ._deprecation import normalize_params
from .base_agent import BaseAgent
from .knowledge_base import KnowledgeBase

from ..lit_agents.literature_agent import LiteratureSearchAgent
from ..lit_agents.optimize_query import optimize_search_query

from .rag_engine import (
    perform_science_rag, 
    perform_code_rag, 
    refine_plan_with_feedback,
    refine_code_with_feedback,
    verify_plan_relevance
)

from .ingestor import ingest_files, extract_images

from .user_interface import display_plan_summary, get_user_feedback

from .html_generator import HTMLReportGenerator

from .base_agent import BaseAgent



class PlanningAgent(BaseAgent):
    """
    Stateful AI Agent for Autonomous Experimental Planning and Iteration.
    
    The PlanningAgent orchestrates end-to-end research workflows by combining:
    - Dual Knowledge Base system (scientific literature + implementation code)
    - RAG-based hypothesis generation and technoeconomic analysis
    - LLM-driven code generation from experimental procedures
    - Human-in-the-loop feedback at strategic decision points
    - Iterative refinement based on experimental results
    
    Maintains a persistent 'state' dictionary to track:
    - The Research Objective
    - The Evolving Experimental Plan (Science -> Code)
    - Results from executed experiments
    - Feedback history (both Scientific Plan and Code Implementation)

    Args:
        api_key: API key for the LLM provider.
        model_name: Model name. For public deployments, use LiteLLM format
            (e.g., "gemini/gemini-2.0-flash", "gpt-4o", "claude-sonnet-4-20250514").
        base_url: Base URL for internal proxy endpoint.
            When provided, uses OpenAI-compatible client.
            When None, uses LiteLLM for multi-provider support.
        embedding_model: Embedding model name.
        embedding_api_key: API key for the embedding LLM provider.
        futurehouse_api_key: Optional API key for literature search.
        kb_base_path: Path for knowledge base storage.
        code_chunk_size: Chunk size for code files.
        output_dir: Output directory for artifacts.
        
        google_api_key: DEPRECATED. Use 'api_key' instead.
        local_model: DEPRECATED. Use 'base_url' instead.
    """
    def __init__(self, api_key: str = None,
                 model_name: str = "gemini-3-pro-preview",
                 base_url: Optional[str] = None,
                 embedding_model: str = "gemini-embedding-001",
                 embedding_api_key: Optional[str] = None,
                 futurehouse_api_key: str = None,
                 kb_base_path: str = "./kb_storage/default_kb",
                 code_chunk_size: int = 20000,
                 output_dir: str = ".",
                 google_api_key: Optional[str] = None,
                 local_model: str = None,): 
        
        super().__init__(output_dir)
        self.agent_type = "planning"

        # Handle deprecated parameters
        api_key, base_url = normalize_params(
            api_key=api_key,
            google_api_key=google_api_key,
            base_url=base_url,
            local_model=local_model,
            source="PlanningAgent"
        )
        
        # Store config
        self._base_url = base_url
        self.code_chunk_size = code_chunk_size
        
        # Initialize LLM client based on deployment mode
        use_litellm = False
        
        if base_url:
            # INTERNAL PROXY
            if api_key is None:
                api_key = get_internal_proxy_key()
            
            if not api_key:
                raise ValueError(
                    "API key required for internal proxy.\n"
                    "Set SCILINK_API_KEY environment variable or pass api_key parameter."
                )
            
            if embedding_api_key is not None:
                logging.warning(
                    "⚠️ embedding_api_key is ignored for internal proxy. "
                    "Using api_key for all requests."
                )
            
            logging.info(f"🏛️ PlanningAgent using internal proxy: {base_url}")
            self.model = OpenAIAsGenerativeModel(
                model=model_name,
                api_key=api_key,
                base_url=base_url
            )
            use_litellm = False
            embedding_api_key = api_key
            
        else:
            # PUBLIC LITELLM - can use different keys per provider
            logging.info(f"🌐 PlanningAgent using LiteLLM: {model_name}")
            self.model = LiteLLMGenerativeModel(
                model=model_name,
                api_key=api_key  # Can be None - LiteLLM reads env vars
            )
            use_litellm = True
            # embedding_api_key stays as passed (can be None for auto-detect)
        
        self._api_key = api_key
        self.generation_config = None

        self.lit_agent = None
        if futurehouse_api_key or os.getenv("FUTUREHOUSE_API_KEY"):
            try:
                self.lit_agent = LiteratureSearchAgent(futurehouse_api_key, max_wait_time=1000)
                logging.info("✅ Literature Search Agent initialized.")
            except Exception as e:
                logging.warning(f"⚠️ Failed to initialize Literature Agent: {e}")
        else:
            logging.info("ℹ️ No FutureHouse API key provided. Literature search will be skipped.")
                    
        # --- Dual KnowledgeBase Initialization ---
        base_path = Path(kb_base_path)
        base_path.parent.mkdir(parents=True, exist_ok=True)

        # 1. Scientific/Docs KB
        self.kb_docs = KnowledgeBase(
            api_key=embedding_api_key,
            embedding_model=embedding_model,
            base_url=base_url,
            use_litellm=use_litellm
        )
        self.kb_docs_prefix = base_path.parent / f"{base_path.name}_docs"
        self.kb_docs_index = str(self.kb_docs_prefix.with_suffix(".faiss"))
        self.kb_docs_chunks = str(self.kb_docs_prefix.with_suffix(".json"))
        self.kb_docs_sources_path = str(self.kb_docs_prefix.with_suffix(".sources.json"))

        # 2. Implementation/Code KB
        self.kb_code = KnowledgeBase(
            api_key=embedding_api_key,
            embedding_model=embedding_model,
            base_url=base_url,
            use_litellm=use_litellm
        )
        self.kb_code_prefix = base_path.parent / f"{base_path.name}_code"
        self.kb_code_index = str(self.kb_code_prefix.with_suffix(".faiss"))
        self.kb_code_chunks = str(self.kb_code_prefix.with_suffix(".json"))
        self.kb_code_map_path = str(self.kb_code_prefix.with_suffix(".maps.json"))
        self.kb_code_sources_path = str(self.kb_code_prefix.with_suffix(".sources.json"))

        print("--- Initializing Agent (Dual-KB System) ---")
        self._load_knowledge_bases()

    def _get_initial_state_fields(self) -> Dict[str, Any]:
        """Agent-specific state fields"""
        return {
            "objective": None,
            "iteration_index": 0,
            "inputs": {
                "knowledge_paths": [],
                "code_paths": [],
                "additional_context": None,
                "primary_data_set": None,
                "image_paths": [],
                "image_descriptions": []
            },
            "current_plan": None,
            "plan_history": [],
            "experimental_results": [],
            "human_feedback_history": [],
            "last_error": None
        }

    def restore_state(self, state_file_path: str) -> None:
        """
        Restore agent state from a saved .state.json file.
        Raises FileNotFoundError if file doesn't exist.
        """        
        path = Path(state_file_path)
        
        if not path.exists():
            raise FileNotFoundError(f"State file not found: {state_file_path}")
        
        if path.suffix != '.json':
            raise ValueError(f"State file must be a .json file, got: {path.suffix}")
        
        print(f"  - 📂 Loading state from: {path.name}")
        
        if not self.load_state(state_file_path):  # Uses inherited method
            raise ValueError(f"Failed to parse state file: {state_file_path}")
        
        # User feedback
        print(f"  - ✅ Restored session: {self.state['session_id']}")
        print(f"     • Objective: {self.state['objective'][:80]}...")
        print(f"     • Current iteration: {self.state['iteration_index']}")
        print(f"     • History entries: {len(self.state.get('plan_history', []))}")
        print(f"     • Previous results: {len(self.state.get('experimental_results', []))}")
        print(f"     • Actions logged: {len(self.state.get('action_history', []))}")

        
    def _load_knowledge_bases(self):
        """Attempts to load both KBs from disk."""
        print(f"  - Docs KB: Loading from {self.kb_docs_prefix}...")
        docs_loaded = self.kb_docs.load(
            self.kb_docs_index, self.kb_docs_chunks,
            sources_path=self.kb_docs_sources_path
        )
        
        print(f"  - Code KB: Loading from {self.kb_code_prefix}...")
        code_loaded = self.kb_code.load(
            self.kb_code_index, self.kb_code_chunks, self.kb_code_map_path,
            sources_path=self.kb_code_sources_path
        )

        self._kb_is_built = docs_loaded or code_loaded
        
        if docs_loaded: print("    - ✅ Docs KB loaded.")
        if code_loaded: print("    - ✅ Code KB loaded.")
        if not self._kb_is_built: print("    - ⚠️  No pre-built KBs found.")

    def _initialize_state(self, objective: str, **kwargs) -> Dict[str, Any]:
        """Creates the foundational state dictionary for a new research task."""
        self._init_state(
            objective=objective,
            inputs={
                "knowledge_paths": kwargs.get("knowledge_paths", []),
                "code_paths": kwargs.get("code_paths", []),
                "additional_context": kwargs.get("additional_context"),
                "primary_data_set": kwargs.get("primary_data_set"),
                "image_paths": kwargs.get("image_paths", []),
                "image_descriptions": kwargs.get("image_descriptions", [])
            }
        )
        return self.state
    
    def _save_results_to_json(self, results: Dict[str, Any], file_path: str):
        try:
            p = Path(file_path)
            p.parent.mkdir(parents=True, exist_ok=True)
            with p.open('w', encoding='utf-8') as f: json.dump(results, f, indent=2)
            print(f"    - ✅ Results successfully saved to: {file_path}")
        except Exception as e: logging.error(f"    - ❌ Failed to save results: {e}")

    def _save_state_to_json(self, file_path: str):
        """Saves state to a specific path (legacy interface)."""
        try:
            p = Path(file_path)
            p.parent.mkdir(parents=True, exist_ok=True)
            with p.open('w', encoding='utf-8') as f: 
                json.dump(self.state, f, indent=2)
        except Exception as e: 
            logging.error(f"Failed to save state: {e}")

    def _build_and_save_kb(self, knowledge_paths: Optional[List[str]] = None, code_paths: Optional[List[str]] = None) -> bool:
        print("\n--- Rebuilding Knowledge Bases ---")
        
        # 1. Science KB
        doc_chunks = []
        if knowledge_paths:
            print(f"Processing {len(knowledge_paths)} Scientific Paths...")
            doc_chunks.extend(ingest_files(knowledge_paths, is_code_mode=False))

        if doc_chunks:
            print(f"  - Building Scientific KB with {len(doc_chunks)} chunks...")
            self.kb_docs.build(doc_chunks)
            self.kb_docs.save(self.kb_docs_index, self.kb_docs_chunks, sources_path=self.kb_docs_sources_path)
        else:
            print("  - ℹ️  No Scientific docs provided. Docs KB unchanged.")

        # 2. Code KB
        code_chunks = []
        if code_paths:
            print(f"Processing {len(code_paths)} Code Paths...")
            for p in code_paths:
                path_obj = Path(p)
                if path_obj.is_dir():
                    repo_name = path_obj.name
                    print(f"  - 📦 Processing Repo: {repo_name}")
                    self.kb_code.repo_maps[repo_name] = generate_repo_map(str(path_obj))
                    code_chunks.extend(ingest_files([p], is_code_mode=True, code_chunk_size=self.code_chunk_size, repo_name=repo_name))
                else:
                    code_chunks.extend(ingest_files([p], is_code_mode=True, code_chunk_size=self.code_chunk_size))
            
        if code_chunks:
            print(f"  - Building Code KB with {len(code_chunks)} chunks...")
            self.kb_code.build(code_chunks)
            self.kb_code.save(self.kb_code_index, self.kb_code_chunks, self.kb_code_map_path, self.kb_code_sources_path)
        else:
            print("  - ℹ️  No Code docs provided. Code KB unchanged.")

        self._kb_is_built = True
        return True

    def _ensure_kb_is_ready(self, knowledge_paths: Optional[List[str]] = None, code_paths: Optional[List[str]] = None) -> bool:
        new_science = self.kb_docs.source_difference(knowledge_paths)
        new_code = self.kb_code.source_difference(code_paths)
        
        if new_science or new_code:
            return self._build_and_save_kb(new_science, new_code)
        elif not self._kb_is_built:
            logging.error("Knowledge base is not built.")
            return False
        return True
    
    def generate_plan(self,
                    objective: str,
                    knowledge_paths: Optional[List[str]] = None,
                    primary_data_set: Optional[Union[str, Dict[str, str]]] = None,
                    additional_context: Optional[Dict[str, str]] = None,
                    image_paths: Optional[List[str]] = None,
                    image_descriptions: Optional[List[str]] = None,
                    enable_human_feedback: bool = True,
                    reset_state: bool = False) -> Dict[str, Any]:
        """
        Generate experimental plan (science only, no implementation code/protocol).
        
        This method performs:
        1. Knowledge base initialization (docs only)
        2. Literature search (optional)
        3. RAG-based hypothesis generation
        4. Self-correction loop
        5. Human feedback on strategy
        
        Does NOT generate implementation code. Use generate_implementation_code() for that.
        
        Returns:
            Dict with proposed_experiments
        """
        
        # Resolve data and images
        primary_data_set = resolve_primary_data_path(primary_data_set)
        manual_images = image_paths or []
        auto_images = [img for img in extract_images(knowledge_paths) if img not in manual_images]
        all_image_paths = manual_images + auto_images
        
        # Initialize or update state
        if reset_state or not self.state:
            self.state = self._initialize_state(
                objective=objective,
                knowledge_paths=knowledge_paths,
                code_paths=None,  # ← Not used in plan generation
                additional_context=additional_context,
                primary_data_set=primary_data_set,
                image_paths=all_image_paths,
                image_descriptions=image_descriptions
            )
        else:
            print(f"  - 🔄 Appending to existing research session...")
            if objective:
                self.state["objective"] = objective
        
        # Increment iteration
        existing_iter = self.state.get("iteration_index", 0)
        self.state["iteration_index"] = existing_iter + 1
        current_iter = self.state["iteration_index"]
        
        # Build KB (docs only)
        if not self._ensure_kb_is_ready(knowledge_paths, code_paths=None):
            self.state["status"] = "failed"
            self.state["last_error"] = "KB Init Failed"
            self._log_action(
                action="generate_plan",
                input_ctx={"objective": objective},
                result={"status": "failed", "error": "KB Init Failed"},
                rationale=None
            )
            return self.state
        
        # Build context string
        ctx_string = ""
        if additional_context:
            for header, content in additional_context.items():
                ctx_string += f"## {header}\n{content}\n\n"
            ctx_string = ctx_string.strip() if ctx_string else None
        
        # Literature search
        lit_context = ""
        if self.lit_agent:
            print(f"  - 🌍 Querying literature...")
            lit_res = self.lit_agent.search_for_hypothesis_context(
                optimize_search_query(objective=objective, model=self.model)
            )
            if lit_res['status'] == 'success':
                lit_context = lit_res['content']
        
        # RAG for science plan
        print(f"\n--- Generating Experimental Strategy ---")
        res = perform_science_rag(
            objective=objective,
            instructions=HYPOTHESIS_GENERATION_INSTRUCTIONS,
            task_name="Experimental Plan",
            kb_docs=self.kb_docs,
            model=self.model,
            generation_config=self.generation_config,
            primary_data_set=primary_data_set,
            image_paths=all_image_paths,
            image_descriptions=image_descriptions,
            additional_context=ctx_string,
            external_context=lit_context
        )
        
        if lit_context:
            res["literature_search"] = lit_context

        self._log_action(
            action="perform_science_rag",
            input_ctx={
                "objective": objective,
                "knowledge_paths": knowledge_paths,
                "has_primary_data": primary_data_set is not None,
                "has_literature": bool(lit_context)
            },
            result=res,
            rationale=res.get("proposed_experiments", [{}])[0].get("justification") if res.get("proposed_experiments") else None
        )
        
        # Snapshot 1: Science Draft
        res["iteration"] = current_iter
        res["stage"] = "Science Draft"
        self.state["plan_history"].append(res.copy())
        self.state["current_plan"] = res
        
        # Self-correction
        if not res.get("error"):
            is_relevant, critique = verify_plan_relevance(objective, res, self.model, self.generation_config)
            
            if not is_relevant:
                print(f"\n🔄 Self-correction triggered: {critique}")
                res = refine_plan_with_feedback(
                    original_result=res,
                    feedback=f"CRITICAL: {critique}",
                    objective=objective,
                    model=self.model,
                    generation_config=self.generation_config
                )
                
                res["iteration"] = current_iter
                res["stage"] = "Auto-Corrected"
                self.state["plan_history"].append(res.copy())
                self.state["current_plan"] = res

                self._log_action(
                    action="self_correction",
                    input_ctx={"critique": critique},
                    result=res,
                    rationale=f"Auto-corrected due to: {critique}"
                )
        
        # Human feedback on strategy
        human_feedback = None
        if enable_human_feedback and res.get("proposed_experiments") and not res.get("error"):
            display_plan_summary(res)
            human_feedback = get_user_feedback()
            
            if human_feedback:
                print(f"\n📝 Refining plan...")
                self.state["human_feedback_history"].append({"phase": "science", "feedback": human_feedback})
                res = refine_plan_with_feedback(
                    original_result=res,
                    feedback=human_feedback,
                    objective=objective,
                    model=self.model,
                    generation_config=self.generation_config
                )
                
                res["iteration"] = current_iter
                res["stage"] = "Human Refined (Science)"
                self.state["plan_history"].append(res.copy())
                self.state["current_plan"] = res
                
                display_plan_summary(res)
                print("✅ Plan updated.")
            else:
                print("✅ Plan accepted.")
        
        self._log_action(
                action="generate_plan",
                input_ctx={
                    "objective": objective,
                    "iteration": current_iter
                },
                result=res,
                rationale=res.get("proposed_experiments", [{}])[0].get("justification") if res.get("proposed_experiments") else None,
                feedback=human_feedback
        )

        self.state["status"] = "planned"
        
        return res
    
    def generate_implementation_code(self,
                                    plan: Dict[str, Any],
                                    code_paths: List[str],
                                    enable_human_feedback: bool = True) -> Dict[str, Any]:
        """
        Add implementation code to an existing experimental plan.
        
        This method:
        1. Builds code knowledge base
        2. Performs code RAG to map experiments to APIs
        3. Provides human code review
        
        Args:
            plan: Existing plan dict (must have proposed_experiments)
            code_paths: Paths to code/API repositories
            enable_human_feedback: If True, pauses for code review
        
        Returns:
            Updated plan dict with implementation_code added to experiments
        """
        
        # Resolve code paths (handle Git URLs)
        print("\n--- Resolving Code Paths ---")
        effective_code_paths = []
        for path in code_paths:
            if path.strip().startswith(('http://', 'https://', 'git@')):
                print(f"  - 🔗 Cloning: {path}")
                local_path = clone_git_repository(path)
                if local_path:
                    effective_code_paths.append(local_path)
            else:
                effective_code_paths.append(path)
        
        # Build code KB
        if not self._ensure_kb_is_ready(knowledge_paths=None, code_paths=effective_code_paths):
            error_result = {"error": "Code KB build failed"}
            self._log_action(
                action="generate_implementation_code",
                input_ctx={"code_paths": code_paths},
                result=error_result,
                rationale=None
            )

            return error_result
        
        # Check if code KB has content
        if not (self.kb_code.index and self.kb_code.index.ntotal > 0):
            print("  - ⚠️  Code KB is empty, skipping code generation")
            self._log_action(
                action="generate_implementation_code",
                input_ctx={"code_paths": code_paths},
                result={"status": "skipped", "error": "Empty Code KB"},
                rationale="No code documents found in knowledge base"
            )
            return plan
        
        # Generate code
        print(f"\n--- Generating Implementation Code ---")
        current_iter = plan.get("iteration", self.state.get("iteration_index", 1))
        
        res = perform_code_rag(
            result=plan,
            kb_code=self.kb_code,
            model=self.model,
            generation_config=self.generation_config
        )
        
        # Snapshot: Code Generated
        res["iteration"] = current_iter
        res["stage"] = "Code Generated"
        self.state["plan_history"].append(res.copy())
        self.state["current_plan"] = res

        self._log_action(
            action="perform_code_rag",
            input_ctx={
                "code_paths": effective_code_paths,
                "num_experiments": len(res.get("proposed_experiments", []))
            },
            result=res,
            rationale="Mapped experimental steps to API code"
        )
        
        human_feedback = None

        # Human code review
        if enable_human_feedback:
            temp_dir = self.output_dir / "temp_code_review"
            print(f"\n--- Code Review ---")
            print(f"  - 💾 Saving to: {temp_dir}")
            
            if temp_dir.exists():
                shutil.rmtree(temp_dir)
            
            files = write_experiments_to_disk(res, str(temp_dir))
            
            if not files:
                print("  - ⚠️  No code generated")
            else:
                while True:
                    print("\n" + "="*60)
                    print(f"👀 CODE REVIEW REQUIRED")
                    print("="*60)
                    print(f"1. Review files in: {temp_dir.resolve()}")
                    print(f"2. Press ENTER to approve, or type feedback to refine")
                    print("-"*60)
                    
                    code_feedback = get_user_feedback()
                    
                    if not code_feedback:
                        print("✅ Code accepted")
                        break
                    
                    human_feedback = code_feedback
                    print(f"\n🛠️  Refining code...")
                    self.state["human_feedback_history"].append({"phase": "code", "feedback": code_feedback})
                    
                    res = refine_code_with_feedback(
                        result=res,
                        feedback=code_feedback,
                        model=self.model,
                        generation_config=self.generation_config
                    )
                    
                    res["iteration"] = current_iter
                    res["stage"] = "Code Refined"
                    self.state["plan_history"].append(res.copy())
                    self.state["current_plan"] = res

                    self._log_action(
                        action="refine_code",
                        input_ctx={"feedback": code_feedback},
                        result=res,
                        rationale=f"Human requested: {code_feedback}",
                        feedback=code_feedback
                    )
                    
                    print(f"  - 💾 Updating files...")
                    files = write_experiments_to_disk(res, str(temp_dir))
        
        self._log_action(
            action="generate_implementation_code",
            input_ctx={
                "code_paths": effective_code_paths,
                "iteration": current_iter
            },
            result=res,
            rationale="Code generation complete",
            feedback=human_feedback
        )
        return res

    def propose_experiments(self, objective: str, 
                            knowledge_paths: Optional[List[str]] = None, 
                            code_paths: Optional[List[str]] = None,
                            additional_context: Optional[Dict[str, str]] = None,
                            primary_data_set: Optional[Union[str, Dict[str, str]]] = None,
                            image_paths: Optional[List[str]] = None,
                            image_descriptions: Optional[List[str]] = None,
                            output_json_path: Optional[str] = None,
                            enable_human_feedback: bool = True,
                            reset_state: bool = False) -> Dict[str, Any]: # Default False to enable cumulative workflows
        """
        Generate an experimental plan based on scientific literature and implementation knowledge.

        This is the primary entry point for starting a new research workflow. The agent:
        1. Builds/loads dual knowledge bases (scientific docs + implementation code)
        2. Optionally queries external literature databases
        3. Generates experimental hypotheses via RAG
        4. Maps experimental steps to executable code
        5. Provides human-in-the-loop review at both science and code stages

        Args:
            objective (str): High-level research goal. This guides all hypothesis generation
                and plan refinement. Should be specific and measurable.
                Examples:
                    - "Optimize the yield of the Suzuki coupling reaction"
                    - "Screen 96 conditions to selectively precipitate magnesium"
                    - "Develop a high-throughput assay for enzyme activity"
            
            knowledge_paths (Optional[List[str]]): Paths to scientific documents/data.
                Supported formats: PDFs, .txt, .md, .xlsx, .csv, directories.
                You can pass Excel/CSV files directly here. If a .json file 
                with the same name exists next to the data file, it is automatically 
                loaded as metadata.
                These populate the Docs Knowledge Base for hypothesis generation.
                Example: ["./papers/", "./lab_notebooks/protocol.pdf", "./public_data.xlsx", "./public_data.json" ]
            
            code_paths (Optional[List[str]]): Paths to code repositories or API documentation.
                Supported formats: Local directories, Git URLs, Python files
                These populate the Code Knowledge Base for implementation.
                Examples:
                    - ["./opentrons_api/"]  # Local repo
                    - ["https://github.com/org/automation-lib.git"]  # Git URL
            
            additional_context (Optional[Dict[str, str]]): Additional text context
                to inject into the prompt. Keys become section headers.
                Example: {
                    "Safety Constraints": "Maximum temperature is 80°C",
                    "Equipment Available": "Opentrons OT-2, plate reader"
                }
            
            primary_data_set (Optional[Dict[str, str]]): Main dataset to analyze.
                Use for the dataset that drives the research objective.
                Example: {"file_path": "./screening_results.xlsx"}
            
            image_paths (Optional[List[str]]): Paths to images (plots, diagrams, photos).
                Supported formats: .png, .jpg, .jpeg, .tiff, .bmp
                These are passed to the vision model for multimodal analysis.
                Examples: ["./criticality_matrix.png", "./reaction_scheme.jpg"]
            
            image_descriptions (Optional[List[str]]): Text descriptions for each image.
                Should be in same order as image_paths. Helps LLM interpret images.
                Examples: ["Criticality matrix showing material supply risks"]
            
            output_json_path (Optional[str]): Path to save the generated plan.
                Also saves full state to {output_json_path}.state.json
                and generates HTML report at {output_json_path}.html
                Example: "./outputs/experiment_plan.json"
            
            enable_human_feedback (bool): If True, pauses for user input at:
                - Strategy review (after hypothesis generation)
                - Code review (after script generation)
                Set to False for fully autonomous operation.
                Defaults to True.
            
            reset_state (bool): If True, clears any existing state and starts fresh.
                If False, appends to existing research session (cumulative workflow).
                Defaults to False.
        
        Returns:
            Dict[str, Any]: Complete agent state containing:
                - session_id: Unique identifier for this session
                - objective: The research objective
                - iteration_index: Current iteration number (1 for initial plan)
                - current_plan: The active experimental plan, structure
        """
        # Phase 1: Generate experimental plan (science only)
        plan = self.generate_plan(
            objective=objective,
            knowledge_paths=knowledge_paths,
            primary_data_set=primary_data_set,
            additional_context=additional_context,
            image_paths=image_paths,
            image_descriptions=image_descriptions,
            enable_human_feedback=enable_human_feedback,
            reset_state=reset_state
        )
        
        if plan.get("error"):
            if output_json_path:
                self._save_results_to_json(plan, output_json_path)
            return self.state
        
        # Phase 2: Add implementation code (if code_paths provided)
        if code_paths:
            plan = self.generate_implementation_code(
                plan=plan,
                code_paths=code_paths,
                enable_human_feedback=enable_human_feedback
            )
        
        # Save final results
        if output_json_path:
            self._save_results_to_json(plan, output_json_path)
            self._save_state_to_json(output_json_path + ".state.json")
            self._generate_html_report(output_json_path)
        
        # Save scripts
        final_out = "./output_scripts"
        print(f"\n--- Saving Scripts to: {final_out} ---")
        write_experiments_to_disk(plan, final_out)
        
        return self.state
    
    def refine_plan(self,
                    results: Any,
                    enable_human_feedback: bool = True,
                    state_file_path: Optional[str] = None,
                    use_literature_rag: bool = False) -> Dict[str, Any]:
        """
        Refines the experimental plan (science strategy only) based on new results.
        
        Args:
            results: Experimental outcomes (text, dict, file path, or list of files/images)
            enable_human_feedback: If True, pauses for strategy review
            state_file_path: Optional path to restore state from checkpoint
            use_literature_rag: If True, searches knowledge base for context relevant 
                           to the results. Defaults to False for faster iteration.
            
        Returns:
            Dict with refined plan (proposed_experiments)
        """
        
        # --- 0. STATE RESTORATION ---
        if state_file_path is not None:
            print(f"\n--- 🔄 Restoring State from File ---")
            self.restore_state(state_file_path)

        if not self.state or not self.state.get("current_plan"):
            raise ValueError(
                "No active state found.\n"
                "You must initialize the agent first using one of:\n"
                "  1. agent.propose_experiments(...) - Start new session\n"
                "  2. agent.restore_state('path.state.json') - Restore saved session\n"
                "  3. Pass state_file_path='path.state.json' to this method"
            )
        
        print(f"\n--- 🔄 Refining Plan based on New Results ---")
        executed_plan_idx = self.state["iteration_index"]
        
        # Extract from state
        objective = self.state["objective"]
        current_plan = self.state["current_plan"]
        
        # --- 1. PARSE RESULTS (Use utility function) ---
        consolidated_feedback, loaded_images = parse_multimodal_results(results)
        
        # Update State History
        self.state["experimental_results"].append({
            "iteration": executed_plan_idx,
            "timestamp": datetime.now().isoformat(),
            "data_summary": str(results)
        })
        self.state["iteration_index"] += 1 
        next_plan_idx = self.state["iteration_index"]
        
        # --- 2. BUILD FEEDBACK PROMPT ---
        feedback_prompt = f"""We executed the previous plan. Here are the experimental results:
{consolidated_feedback}

**TASK:** Analyze these results (including any attached plots) to Refine or Update the plan.
Select the most appropriate strategy:
1. **CONFIRMED:** If hypothesis is validated, propose next step.
2. **OPTIMIZATION NEEDED:** If result is valid but sub-optimal, tune parameters.
3. **INCONCLUSIVE:** If data is noisy, propose refined experiment.
4. **OPERATIONAL FAILURE:** If failure was code/equipment, propose fix.
5. **SCIENTIFIC FAILURE:** If hypothesis is disproven, propose new approach.
"""
        
        # --- 3. RESULT-AWARE RAG ---
        new_literature_context = None
        
        if use_literature_rag:
            if self.kb_docs.index and self.kb_docs.index.ntotal > 0:
                search_query = f"Implications and causes of: {consolidated_feedback[:400]}"
                print(f"  - 🔍 Searching literature for context on results...")
                hits = self.kb_docs.retrieve(search_query, top_k=3)
                if hits:
                    new_literature_context = "\n---\n".join([c['text'] for c in hits])
                    print(f"    -> Found {len(hits)} relevant document chunks.")
                else:
                    print(f"    -> No relevant documents found.")
            else:
                print(f"  - ℹ️  Literature RAG requested but no docs KB available.")
        else:
            print(f"  - ℹ️  Skipping literature RAG (use_literature_rag=False)")
        
        # --- 4. GENERATE REFINED PLAN ---
        if new_literature_context:
            print(f"  - Reasoning over results with literature context...")
        else:
            print(f"  - Reasoning over results...")

        
        new_plan = refine_plan_with_feedback(
            original_result=current_plan,
            feedback=feedback_prompt,
            objective=objective,
            model=self.model,
            generation_config=self.generation_config,
            new_context=new_literature_context,
            result_images=loaded_images
        )

        if new_plan.get("error"):
            print(f"\n❌ Refinement Failed: {new_plan.get('message')}") 
            self._log_action(
                action="refine_plan",
                input_ctx={
                    "results_summary": consolidated_feedback[:200],
                    "use_literature_rag": use_literature_rag
                },
                result=new_plan,
                rationale=None
            )           
            return new_plan
        
        # Snapshot: Reasoning Draft
        new_plan["iteration"] = next_plan_idx
        new_plan["stage"] = "Reasoning Draft"
        self.state["plan_history"].append(new_plan.copy())
        self.state["current_plan"] = new_plan

        self._log_action(
            action="refine_plan_reasoning",
            input_ctx={
                "results_summary": consolidated_feedback[:200],
                "has_literature_context": new_literature_context is not None,
                "num_images": len(loaded_images)
            },
            result=new_plan,
            rationale=new_plan.get("proposed_experiments", [{}])[0].get("justification") if new_plan.get("proposed_experiments") else None
        )

        # --- 5. HUMAN STRATEGY FEEDBACK ---
        human_feedback = None
        if enable_human_feedback and not new_plan.get("error"):
            print("\n" + "="*60)
            print("🧠 AGENT'S PROPOSED REVISION BASED ON RESULTS")
            print("="*60)
            display_plan_summary(new_plan)
            
            human_feedback = get_user_feedback()
            
            if human_feedback: 
                print(f"\n📝 Feedback received. Adjusting strategy...")
                self.state["human_feedback_history"].append({
                    "phase": "science_iteration", 
                    "feedback": human_feedback
                })
                new_plan = refine_plan_with_feedback(
                    original_result=new_plan,
                    feedback=human_feedback,
                    objective=objective,
                    model=self.model,
                    generation_config=self.generation_config
                )
                # Snapshot: Human Refined
                new_plan["iteration"] = next_plan_idx
                new_plan["stage"] = "Human Refined (Science)"
                self.state["plan_history"].append(new_plan.copy())
                self.state["current_plan"] = new_plan
                print("✅ Strategic revision updated.")

        self._log_action(
            action="refine_plan",
            input_ctx={
                "iteration": next_plan_idx,
                "results_provided": True
            },
            result=new_plan,
            rationale=new_plan.get("proposed_experiments", [{}])[0].get("justification") if new_plan.get("proposed_experiments") else None,
            feedback=human_feedback
        )
        
        self.state["status"] = "refined"
        return new_plan
    
    def refine_implementation_code(self,
                                   plan: Dict[str, Any],
                                   enable_human_feedback: bool = True) -> Dict[str, Any]:
        """
        Updates implementation code for a refined plan.
        
        This is Step 2 of the iteration process - maps the refined experimental
        strategy to executable code using the Code KB.
        
        Args:
            plan: Refined plan from refine_plan() (must have proposed_experiments)
            enable_human_feedback: If True, pauses for code review
            
        Returns:
            Updated plan dict with implementation_code added/updated
        """
        
        if not self.kb_code.index or self.kb_code.index.ntotal == 0:
            print("  - ℹ️  No Code KB available, skipping implementation update")
            self._log_action(
                action="refine_implementation_code",
                input_ctx={},
                result={"status": "skipped", "error": "No Code KB"},
                rationale="Code knowledge base is empty"
            )
            return plan
        
        if plan.get("error"):
            return plan
        
        next_plan_idx = plan.get("iteration", self.state.get("iteration_index", 1))
        
        # Extract previous implementations from current state
        current_plan = self.state.get("current_plan", {})
        previous_implementations = []
        
        if current_plan and "proposed_experiments" in current_plan:                
            for exp in current_plan["proposed_experiments"]:
                if "implementation_code" in exp:
                    previous_implementations.append({
                        'experiment_name': exp.get('experiment_name', 'Unnamed'),
                        'code': exp['implementation_code'],
                        'iteration': self.state.get("iteration_index", 0) - 1,
                        'source_files': exp.get('code_source_files', []),
                        'previous_steps': exp.get('experimental_steps', [])
                    })
        
        print(f"\n--- Code Implementation Analysis ---")
        if previous_implementations:
            print(f"  - Context: {len(previous_implementations)} existing implementation(s)")
        else:
            print(f"  - Context: Writing from scratch (no previous code)")
        
        # Generate/Update code
        new_plan = perform_code_rag(
            result=plan,
            kb_code=self.kb_code,
            model=self.model,
            generation_config=self.generation_config,
            previous_implementations=previous_implementations
        )
        
        # Snapshot: Code Generated
        new_plan["iteration"] = next_plan_idx
        new_plan["stage"] = "Code Generated"
        self.state["plan_history"].append(new_plan.copy())
        self.state["current_plan"] = new_plan

        self._log_action(
            action="refine_code_rag",
            input_ctx={
                "num_previous_implementations": len(previous_implementations),
                "iteration": next_plan_idx
            },
            result=new_plan,
            rationale="Updated code based on refined experimental steps"
        )

        # --- HUMAN CODE REVIEW ---
        human_feedback = None
        if enable_human_feedback and not new_plan.get("error"):
            temp_dir = self.output_dir / "temp_code_review_iter"
            print(f"\n--- Human Code Review (Iteration {next_plan_idx}) ---")
            
            if temp_dir.exists(): 
                shutil.rmtree(temp_dir)
            files = write_experiments_to_disk(new_plan, str(temp_dir))
            
            if files:
                while True:
                    print("\n" + "="*60)
                    print(f"👀 ACTION REQUIRED: Code Review")
                    print("="*60)
                    print(f"1. Open folder: {temp_dir.resolve()}")
                    print(f"2. Inspect the {len(files)} new Python file(s).")
                    print("3. Return here to Approve or Request Changes.")
                    
                    code_feedback = get_user_feedback()
                    
                    if not code_feedback:
                        print("✅ Code accepted.")
                        break
                    
                    human_feedback = code_feedback
                    self.state["human_feedback_history"].append({
                        "phase": "code_iteration", 
                        "feedback": code_feedback
                    })
                    print(f"\n🛠️  Refining code based on: '{code_feedback}'...")
                    
                    new_plan = refine_code_with_feedback(
                        result=new_plan,
                        feedback=code_feedback,
                        model=self.model,
                        generation_config=self.generation_config
                    )
                    
                    # Snapshot: Code Refined
                    new_plan["iteration"] = next_plan_idx
                    new_plan["stage"] = "Code Refined"
                    self.state["plan_history"].append(new_plan.copy())
                    self.state["current_plan"] = new_plan
                    
                    print(f"  - 💾 Overwriting files in {temp_dir} with refined code...")
                    files = write_experiments_to_disk(new_plan, str(temp_dir))
        
        self._log_action(
            action="refine_implementation_code",
            input_ctx={"iteration": next_plan_idx},
            result=new_plan,
            rationale="Code refinement complete",
            feedback=human_feedback
        )
        return new_plan

    def update_plan_with_results(self,
                                 results: Any,
                                 output_json_path: Optional[str] = None,
                                 enable_human_feedback: bool = True,
                                 state_file_path: Optional[str] = None,
                                 use_literature_rag: bool = False) -> Dict[str, Any]:
        """
        Iterates on the current experimental plan based on new results.
        
        This is the main entry point for the iteration loop. It orchestrates:
        1. Scientific plan refinement (refine_plan)
        2. Implementation code updates (refine_implementation_code)
        3. File saving and report generation
        
        For more granular control, call refine_plan() and refine_implementation_code()
        separately.
        
        **Supported Result Formats:**
        
        The `results` parameter is highly flexible and accepts:
        
        **1. Text String (Qualitative Observations)**
            >>> agent.update_plan_with_results(
            ...     results="Yield was 12%, unexpected precipitation"
            ... )
        
        **2. Single File Path**
            >>> agent.update_plan_with_results(
            ...     results="./experiments/run_005.csv"
            ... )
            >>> # Auto-discovers ./experiments/run_005.json metadata
        
        **3. Image Path (Visual Analysis)**
            >>> agent.update_plan_with_results(
            ...     results="./plots/failure_analysis.png"
            ... )
        
        **4. Data Dictionary**
            >>> agent.update_plan_with_results(
            ...     results={
            ...         "yield": 45.2,
            ...         "purity": 87.3,
            ...         "observations": "Product color changed to yellow"
            ...     }
            ... )
        
        **5. File + Description (Recommended for Images)**
            >>> agent.update_plan_with_results(
            ...     results={
            ...         "path": "./microscopy/crystals.tiff",
            ...         "description": "Crystal morphology shows needle-like structure"
            ...     }
            ... )
        
        **6. List of Mixed Formats (Most Flexible)**
            >>> agent.update_plan_with_results(
            ...     results=[
            ...         "Experiment date: 2024-01-15",
            ...         "./data/icpms_run12.csv",              # Quantitative data
            ...         "./data/icpms_run12.json",             # Optional metadata
            ...         {
            ...             "path": "./photos/product.jpg",
            ...             "description": "White crystalline solid"
            ...         },
            ...         {
            ...             "temp_max": 78.5,
            ...             "pressure_stable": True
            ...         },
            ...         "./logs/errors.txt",                   # Equipment logs
            ...         "Stirrer stopped at t=15min, restarted manually"
            ...     ]
            ... )
        
        **Data File Handling:**
        - **CSV/Excel files** (.csv, .xlsx, .xls):
          * Automatically parsed and summarized
          * Metadata JSON auto-discovered (e.g., data.csv → data.json)
          * Column definitions and units included if metadata present
        
        - **Image files** (.png, .jpg, .jpeg, .tiff, .bmp):
          * Loaded and passed to vision model for analysis
          * Supports plots, microscopy, photos, diagrams
        
        - **Log files** (.txt, .log, .md, .json):
          * Read as text and included in context
          * Useful for equipment errors, timestamps, notes
        
        **Workflow Overview:**
        
        Phase 1 - Scientific Refinement:
            1. Parse results (multimodal)
            2. Search knowledge base for relevant context
            3. LLM analyzes and proposes strategy revision
            4. Human review (if enabled)
            5. Incorporate feedback and regenerate
        
        Phase 2 - Implementation Update:
            1. Extract previous code implementations
            2. LLM decides: preserve, update, or rewrite
            3. Generate updated scripts
            4. Human code review (if enabled)
            5. Save to ./output_scripts/
        
        Phase 3 - Persistence:
            1. Save plan JSON
            2. Save state JSON (for resumption)
            3. Generate HTML report
        
        Args:
            results: Experimental outcomes. Accepts:
                - String: Text description
                - String: File path (data, image, or log)
                - Dict: Structured data or {path: ..., description: ...}
                - List: Mix of any above formats
                See format examples above for details.
            
            output_json_path: Path to save the updated plan. If provided:
                - Saves plan to: {output_json_path}
                - Saves state to: {output_json_path}.state.json
                - Saves report to: {output_json_path}.html
                Example: "./outputs/iteration_2.json"
            
            enable_human_feedback: If True, pauses twice for user review:
                1. After scientific plan generation
                2. After code generation
                Set to False for fully autonomous operation.
                Defaults to True.
            
            state_file_path: Optional path to restore state from a checkpoint.
                Useful for resuming after shutdown. Equivalent to calling
                agent.restore_state() before this method.
                Example: "./outputs/session.state.json"
            
            use_literature_rag: If True, searches knowledge base for context 
                           relevant to the experimental results. 
                           Defaults to False for faster iteration.
        
        Returns:
            Dict containing the complete agent state:
            {
                "session_id": "...",
                "objective": "...",
                "iteration_index": 2,
                "current_plan": {...},
                "plan_history": [...],
                "experimental_results": [...],
                "status": "iterated"
            }
        
        Raises:
            ValueError: If no active state found and no state_file_path provided
        
        Example 1 - Simple Text Results:
            >>> agent.update_plan_with_results(
            ...     results="Yield dropped to 15%, likely due to low temperature"
            ... )
        
        Example 2 - Data File Results:
            >>> agent.update_plan_with_results(
            ...     results="./lab_data/hplc_run_005.csv",
            ...     output_json_path="./outputs/iteration_2.json"
            ... )
        
        Example 3 - Complete Multi-Modal Results:
            >>> agent.update_plan_with_results(
            ...     results=[
            ...         "Run completed successfully on 2024-01-15 at 14:30",
            ...         "./data/gc_ms_results.csv",
            ...         {
            ...             "path": "./plots/conversion_vs_time.png",
            ...             "description": "Conversion plateaus at 60min"
            ...         },
            ...         {
            ...             "yield": 78.5,
            ...             "selectivity": 92.3,
            ...             "notes": "Product purity excellent"
            ...         },
            ...         "./logs/temperature_profile.txt"
            ...     ],
            ...     output_json_path="./outputs/iteration_3.json",
            ...     enable_human_feedback=True
            ... )
        
        Example 4 - Resume from Checkpoint:
            >>> # After restarting Python
            >>> agent = PlanningAgent()
            >>> agent.update_plan_with_results(
            ...     results="./new_data.csv",
            ...     state_file_path="./outputs/session.state.json"
            ... )
        
        Example 5 - Step-by-Step Control:
            >>> # For maximum control, use individual methods:
            >>> plan = agent.refine_plan(results="...")
            >>> # Review plan, make modifications...
            >>> plan = agent.refine_implementation_code(plan)
            >>> # Review code, make modifications...
            >>> agent._save_results_to_json(plan, "./plan.json")
        
        Notes:
            - The method is stateful - maintains session history across calls
            - Safe to shut down between calls (use state_file_path to resume)
            - Automatically includes previous code when generating updates
            - All outputs saved to ./output_scripts/ directory
        """
        
        # Phase 1: Refine scientific strategy
        plan = self.refine_plan(
            results=results,
            enable_human_feedback=enable_human_feedback,
            state_file_path=state_file_path,
            use_literature_rag=use_literature_rag
        )
        
        if plan.get("error"):
            if output_json_path:
                self._save_results_to_json(plan, output_json_path)
            return self.state
        
        # Phase 2: Update implementation code
        plan = self.refine_implementation_code( 
            plan=plan,
            enable_human_feedback=enable_human_feedback
        )
        
        # Final state update
        self.state["current_plan"] = plan
        self.state["status"] = "iterated"
        
        # Save outputs
        final_out = "./output_scripts"
        print(f"\n--- Saving Final Scripts to: {final_out} ---")
        write_experiments_to_disk(plan, final_out)
        
        if output_json_path:
            self._save_results_to_json(plan, output_json_path)
            self._save_state_to_json(output_json_path + ".state.json")
            self._generate_html_report(output_json_path)
            
        return self.state
    
    def _generate_html_report(self, json_path: str):
        """Helper to generate HTML report alongside JSON."""
        if not json_path: return
        html_path = str(Path(json_path).with_suffix('.html'))
        try:
            generator = HTMLReportGenerator(self.state)
            generator.generate(html_path)
        except Exception as e:
            print(f"⚠️ Failed to generate HTML report: {e}")

    def perform_technoeconomic_analysis(self, objective: str,
                                        knowledge_paths: Optional[List[str]] = None,
                                        primary_data_set: Optional[Union[str, Dict[str, str]]] = None,
                                        image_paths: Optional[List[str]] = None,
                                        image_descriptions: Optional[List[str]] = None,
                                        output_json_path: Optional[str] = None) -> Dict[str, Any]:
        """
        Performs TEA using Dual-KB retrieval. 

        **Workflow:**
        
        1. Knowledge Base Construction (if needed)
        2. External Literature Search (optional, via FutureHouse)
        3. RAG-based Economic Analysis
        4. State Initialization (if starting fresh with TEA)
        5. Report Generation (JSON + HTML)

        **Integration with Planning:**
    
        TEA results are stored in the agent's state and can inform subsequent
        experimental planning:
            >>> # Perform TEA first
            >>> tea_results = agent.perform_technoeconomic_analysis(
            ...     objective="Recover lithium from brine",
            ...     knowledge_paths=["./market_data/", "./reports/"],
            ... )
            >>> 
            >>> # Use TEA insights in experimental planning
            >>> plan = agent.propose_experiments(
            ...             objective="Develop lithium extraction process",
            ...             knowledge_paths=["./extraction_methods/"],
            ...             additional_context=tea_results,
            ...             primary_data_set={
            ...                "file_path": "./brine_composition.xlsx",
            ...                "metadata_path": ./metadata.json}
            ... )
        Args:
        objective (str): Research objective to evaluate economically.
            Should describe the material, process, or technology to assess.
            Examples:
                - "Recover rare earth elements from coal ash"
                - "Evaluate magnesium extraction from produced water"
                - "Assess economic viability of direct air capture"
        
        knowledge_paths (Optional[List[str]]): Paths to documents for TEA context.
            Should include market data, pricing reports, criticality assessments,
            existing TEA studies, and process descriptions. Supports both PDF/TXT and Excel/CSV.
            Examples: ["./market_reports/", "./critical_materials_report.pdf", "./public_data.xlsx", "./public_data.json"]
        
        primary_data_set (Optional[Dict[str, str]]): Main dataset for analysis.
            Can contain composition, concentration, or yield data.
            Example: {"file_path": "./feedstock_composition.xlsx"}
        
        image_paths (Optional[List[str]]): Images to support TEA analysis.
            Examples: criticality matrices, supply chain diagrams, cost breakdowns.
        
        image_descriptions (Optional[List[str]]): Descriptions for each image.
            Example: ["Criticality matrix showing supply risk vs. importance"]
        
        output_json_path (Optional[str]): Path to save TEA results.
            Saves to {output_json_path} (results only)
            Saves to {output_json_path}.state.json (full state)
            Generates {output_json_path}.html (formatted report)
    
    Returns:
        Dict[str, Any]: Technoeconomic analysis results  

    Example - Basic Usage:
        >>> agent = PlanningAgent()
        >>> state = agent.propose_experiments(
        ...     objective="Optimize enzyme kinetics",
        ...     knowledge_paths=["./enzyme_papers/"],
        ...     code_paths=["./plate_reader_api/"],
        ...     output_json_path="./plan.json"
        ... )
        >>> # User reviews in console, provides feedback or approves
        >>> # Final scripts saved to ./output_scripts/

    Example - Advanced with Data:
        >>> state = agent.propose_experiments(
        ...     objective="Identify optimal precipitation conditions",
        ...     knowledge_paths=["./papers/", "./protocols.pdf"],
        ...     code_paths=["https://github.com/opentrons/opentrons"],
        ...     primary_data_set={
        ...         "file_path": "./icpms_results.xlsx",
        ...         "metadata_path": "./icpms_metadata.json"
        ...     },
        ...     image_paths=["./criticality_matrix.jpg"],
        ...     image_descriptions=["Material criticality assessment"],
        ...     additional_context={
        ...         "Constraints": "Use only commodity chemicals",
        ...         "Equipment": "Opentrons OT-2, 96-well plates, ICP-MS"
        ...     },
        ...     output_json_path="./precipitation_plan.json",
        ...     enable_human_feedback=True
        ... )
    """
        
        # 0a. Resolve Primary Data
        primary_data_set = resolve_primary_data_path(primary_data_set)
        # 0b. Resolve image paths
        # Images explicitly specified by user undr image_paths (will be deprecated in the future)
        manual_images = image_paths or []
        # Find new images under the provided knowledge paths but exclude any that are already in manual_images
        auto_images = [img for img in extract_images(knowledge_paths) if img not in manual_images]
        # Append auto-images to the end so manual descriptions stay aligned with manual images
        all_image_paths = manual_images + auto_images

        # 1. State Initialization (if starting fresh with TEA)
        if not self.state:
            self.state = self._initialize_state(
                objective=objective,
                knowledge_paths=knowledge_paths,
                code_paths=None,
                primary_data_set=primary_data_set,
                image_paths=all_image_paths,
                image_descriptions=image_descriptions
            )

        #  TEA is always step 0 (pre-planning)
        self.state["iteration_index"] = 0

        # 2. Build KB if needed
        if not self._ensure_kb_is_ready(knowledge_paths, code_paths=None):
            error_result = {"error": "KB Init Failed"}
            self._log_action(
                action="perform_technoeconomic_analysis",
                input_ctx={"objective": objective},
                result=error_result,
                rationale=None
            )
            return error_result
        
        # 3. Literature Search
        lit_context = ""
        if self.lit_agent:
            print(f"  - 🌍 Querying literature for TEA context...")
            lit_res = self.lit_agent.search_for_economic_data(
                optimize_search_query(objective=objective, model=self.model)
            )
            if lit_res['status'] == 'success':
                lit_context = lit_res['content']

        # 4. Perform RAG
        res = perform_science_rag(
            objective=objective, 
            instructions=TEA_INSTRUCTIONS, 
            task_name="Technoeconomic Analysis",
            kb_docs=self.kb_docs,
            model=self.model,
            generation_config=self.generation_config,
            primary_data_set=primary_data_set, 
            image_paths=all_image_paths, 
            image_descriptions=image_descriptions,
            external_context=lit_context
        )

        if lit_context:
            res["literature_search"] = lit_context

        # 5. Commit to State
        if not res.get("error"):
            # Tags for the HTML Generator
            res["type"] = "technoeconomic_analysis"
            res["stage"] = "TEA Initial"
            res["iteration"] = 0 # TEA is step 0 (pre-planning)
            # Append copy to history
            self.state["plan_history"].append(res.copy())
     
        self._log_action(
            action="perform_technoeconomic_analysis",
            input_ctx={
                "objective": objective,
                "knowledge_paths": knowledge_paths,
                "has_primary_data": primary_data_set is not None,
                "has_literature": bool(lit_context)
            },
            result=res,
            rationale=res.get("technoeconomic_assessment", {}).get("summary") if not res.get("error") else None
        )
        
        # 6. Save & Generate Report
        if output_json_path:
            self._save_results_to_json(res, output_json_path)
            self._save_state_to_json(output_json_path + ".state.json")
            
            # Trigger HTML Generation (will show TEA card)
            self._generate_html_report(output_json_path)

        return res


================================================
FILE: planning_orchestrator.py
================================================
import json
import logging
import pandas as pd
from pathlib import Path
from typing import List, Dict, Any, Optional
from datetime import datetime
from enum import Enum

from ...auth import get_internal_proxy_key
from ...wrappers.openai_wrapper import OpenAIAsGenerativeModel
from ...wrappers.litellm_wrapper import LiteLLMGenerativeModel
from .planning_agent import PlanningAgent
from .scalarizer_agent import ScalarizerAgent
from .bo_agent import BOAgent
from .orchestrator_tools import OrchestratorTools
from ._deprecation import normalize_params


class AutonomyLevel(Enum):
    """
    Defines the level of autonomy for the orchestrator.
    
    CO_PILOT: AI assists human (default). Human reviews all plans/code.
    SUPERVISED: Human assists AI. AI proceeds unless human intervenes.
    AUTONOMOUS: Full autonomy. No human feedback requested.
    """
    CO_PILOT = "co_pilot"       # Human leads, AI assists (current default)
    SUPERVISED = "supervised"   # AI leads, human can intervene
    AUTONOMOUS = "autonomous"   # Full autonomy, no human feedback


# Mode-specific directives (inserted at the top)
_CO_PILOT_DIRECTIVE = """
**CRITICAL OPERATING MODE: CO-PILOT (Human Leads, AI Assists)**
- You are assisting the human researcher. They are in control.
- ALWAYS wait for human approval before proceeding to next steps.
- After generating plans or code, summarize and wait for feedback.
- Do NOT chain multiple tool calls without human confirmation.
- Ask clarifying questions when objectives are ambiguous.

**SINGLE-TOOL EXECUTION RULE:**
1. **EXECUTE ONE TOOL**: Call only ONE tool per response.
2. **OBSERVE OUTPUT**: meaningful "next steps" depend on what the tool *actually* returned.

**CODE GENERATION RULE:**
Only call `generate_implementation_code` when BOTH conditions are true:
  a) User explicitly asks for "script", "protocol", "code", or mentions equipment (Opentrons, robot, automation)
  b) Code KB is loaded OR user specifies a code directory

**RESPONSE STYLE:**
- After each tool call, summarize the result and wait for user direction.
- Do NOT end responses with generic menus like "Would you like me to..."
- Instead say "Ready for results." or "Let me know how to proceed."
"""

_SUPERVISED_DIRECTIVE = """
**CRITICAL OPERATING MODE: SUPERVISED (AI Leads, Human Supervises)**
- You lead the research workflow. Human supervises and can intervene.
- Proceed with reasonable next steps without asking for permission.
- Human will still review generated plans and code through the standard review interface.
- Do NOT ask clarifying questions unless truly ambiguous - make reasonable assumptions.
- If a tool returns error or unexpected results, pause and report to human.
- Periodically summarize progress (every 3-5 steps) but don't wait for response.
- Use your judgment but remain open to human corrections.

**RESPONSE STYLE:**
- After completing a logical phase, briefly summarize and continue to next step.
- Do NOT ask permission between steps - just proceed.
- Only pause to report errors or request human input on ambiguous decisions.
"""

_AUTONOMOUS_DIRECTIVE = """
**CRITICAL OPERATING MODE: FULLY AUTONOMOUS**
- Execute the complete research workflow independently.
- Chain tool calls as needed to achieve the objective.
- Only pause for human input if you encounter unrecoverable errors.
- Make decisions based on tool outputs and scientific reasoning.
- Save checkpoints regularly for human review later.
- Proceed through: plan → execute → analyze → optimize → iterate.
- Report final results and key decision points at the end.
- NOTE: Human still performs physical experiments in the lab

**AUTONOMOUS WORKFLOW - EXECUTE WITHOUT ASKING:**
When starting a new campaign, execute the FULL pipeline automatically:
1. `list_workspace_files` - Survey available data
2. `run_economic_analysis` - Assess viability (if knowledge_dir or data available)
3. `generate_initial_plan` - Create experimental strategy
4. `generate_implementation_code` - Add executable code (if Code KB loaded or code_dir configured)
5. `save_checkpoint` - Preserve state

**RESPONSE STYLE:**
- Do NOT stop to summarize between tool calls.
- Do NOT ask "Would you like me to..." - just do it.
- Chain ALL tools needed to complete the workflow in a single turn.
- Only provide a summary AFTER the entire pipeline is complete.
"""

_SYSTEM_PROMPT_BODY = """
You are the **Research Agent**. Your goal is to coordinate a scientific campaign.

**RESPONSE GUIDELINES (STRICT):**
- **NO REDUNDANCY**: Do NOT repeat the tool's output. Summarize insights only.


**TOOLCHAIN & WORKFLOWS:**

**SETUP:**
0. `show_directory_guide`: Show recommended project structure. Use when user asks about setup/organization.

**STRATEGY & PLANNING TOOLS:**
1. `generate_initial_plan`: Use this when starting a NEW campaign or defining a new objective.
   - Extract knowledge_paths when user mentions papers/PDFs/documents
   - Extract primary_data_set when user mentions experimental data or results folders or files
   - additional_context: Lab constraints, equipment, reagents, budget
   - Previous TEA results automatically included
   - Example:
     * "Generate plan for Li recovery using info in ./papers/ and preliminary results in ./data/"
       → generate_initial_plan(specific_objective="Li recovery", 
                               knowledge_paths="./papers", 
                               primary_data_set="./data")
     
2. `run_economic_analysis`: Use this if the user asks about costs, viability, market fit, or TEA.
    - When primary_data_set is provided, ALL analysis and planning must be constrained to materials/conditions actually present in that data. Literature provides process knowledge, not feedstock assumptions.

    - Example:
        * "Use reports in ./papers/ and composition data in ./data/ to determine most profitable material"
        → run_economic_analysis(
            knowledge_paths="./papers",
            primary_data_set="./data"

3. `generate_implementation_code`: Add executable code to existing plan.
   - Maps experimental steps to APIs/automation code
   - Use AFTER generate_initial_plan() once strategy is approved

4. `refine_plan_with_results`: Refine scientific strategy based on experimental results.
   - Use for: failures, pivots, qualitative observations, visual analysis
   - Accepts: text descriptions, file paths, or comma-separated files
   - Updates: Scientific plan only (no code changes)
   - Example:
     * "Refine based on ./run_005.csv and ./plot.png"
       → refine_plan_with_results(result_data="./run_005.csv,./plot.png")
   
5. `refine_implementation_code`: Update executable code for refined plan.
   - Use AFTER refine_plan_with_results() once strategy is approved
   - Maps refined experimental steps to code
   - Example:
     * After plan refinement is approved
       → refine_implementation_code()

6. `discard_plan`: Discard wrong plan (keeps in history for transparency).


**DATA TOOLS:**
7. `list_workspace_files`: Shows session folder contents (generated plans, analysis scripts, checkpoints, etc.)
8. `analyze_file`: Use this for RAW DATA files (CSV, XLSX, TXT) to calculate metrics via code.
    - First use: Generates analysis script automatically
    - Subsequent uses: Reuses script for consistency
    - force_regenerate=True: Use when analysis needs change
9. `reset_analysis_logic`: Use this if the analysis script is wrong.

**OPTIMIZATION TOOLS:**
10. `run_optimization`: Mathematical parameter suggestions via Bayesian Optimization.
    
    **Modes:**
    - Sequential: `run_optimization()`
    - Parallel: `run_optimization(parallel_capable=True, batch_size=N)`
      * Infer N from context or ask user. Retry if "batch_size_required" returned.
    - Constraint-aware: `run_optimization(parallel_capable=True, batch_size=N, physical_constraints="...")`
      * Use when the setup has physical limitations (plate layouts, shared channels, discrete stocks).
      * Extract constraints from the plan or user description.
    - Budget-aware: `run_optimization(experimental_budget=K)`
      * K = optimization iterations remaining (including this one). 1 = final shot.
      * Pass when user mentions remaining experiments, budget, or "last round".
      * Combinable with all other modes.

    **Constraint examples:**
    - User says "96-well plate where rows share temperature"
      → physical_constraints="96-well plate: 8 rows share temperature, 12 columns share pH"
    - User says "we only have 5 catalyst stocks"
      → physical_constraints="Discrete catalyst concentrations: 0.1, 0.5, 1.0, 2.0, 5.0 mM"
    - User says "reactor zones share cooling"
      → physical_constraints="Reactor: zones A,B share cooling, zones C,D share heating"
    
    **Budget examples:**
    - User says "this is our last round"
      → experimental_budget=1
    - User says "we have 3 more runs"
      → experimental_budget=3
    - User says nothing about budget
      → omit experimental_budget (default behavior)
      
11. `save_checkpoint`: Save campaign state. Use after every 3-5 experiments.

**FILE PATH RULES:**
Assume user runs agent from project directory. For example, when user says "file.csv in data", use "./data/file.csv"

**When user mentions a SPECIFIC filename:**
1. Extract the filename (with or without extension)
2. Pass it to the tool
3. Tool will automatically:
   - Try exact match
   - Try common extensions (.csv, .xlsx, .xls) if no extension provided
   - Search in ./experimental_results, ./data, ./results, ./
   - Suggest corrections for typos using fuzzy matching

**CRITICAL WORKFLOW RULES:**
**Use `run_optimization` (The Math Loop) IF:**
- You are optimizing a well-defined property for the current experimental setup.
- The experiments are running successfully (no failures), and you just need to tune parameters.
- **At least 3 data files have been successfully analyzed** (check by calling list_workspace_files).

**Use `iterate_with_results` (The Cognitive Loop) IF:**
- You need to propose a NEW strategy or experimental setup (e.g., "Change catalyst").
- The experiment FAILED (e.g., "Precipitate formed", "Equipment error").
- The result is qualitative (e.g., **Images**, visual observations, logs).
- There are NOT enough data points for numerical optimization yet.

**If user indicates the generated plan is wrong:**
- Common patterns: "That's not what I asked for", "Wrong material", "Focus on X instead"
- Actions:
  - Ask user to confirm: "I generated a plan for [X], but you mentioned [Y]. Should I correct this?" 
  - If user confirms it's wrong:
        a. Call discard_plan(reason="Specific explanation of what was wrong") 
        b. Call generate_initial_plan(...) again with corrected parameters 
- The discarded plan stays in history for transparency but won't appear in reports

**LONG CAMPAIGN MANAGEMENT:**
- Call `save_checkpoint` after every 3-5 experiments
- If conversation becomes very long (>50 messages), suggest user restart with checkpoint

**BEHAVIOR:**
- Extract ALL paths mentioned by user (papers, data, code, reports)
- Extract specific_objective from user's goal/intent
- Combine lab constraints into additional_context (equipment, reagents, pH, budget, etc.)
- Parse tool JSON responses before calling dependent tools
- If status="error", stop and report to user
- Save checkpoint periodically during long campaigns
"""


def get_system_prompt(autonomy_level: AutonomyLevel) -> str:
    """Returns the appropriate system prompt for the given autonomy level."""
    directives = {
        AutonomyLevel.CO_PILOT: _CO_PILOT_DIRECTIVE,
        AutonomyLevel.SUPERVISED: _SUPERVISED_DIRECTIVE,
        AutonomyLevel.AUTONOMOUS: _AUTONOMOUS_DIRECTIVE,
    }
    return directives[autonomy_level] + _SYSTEM_PROMPT_BODY



class PlanningOrchestratorAgent:
    """
    Orchestrator agent for coordinating multi-iteration research campaigns.
    
    Manages the full experimental loop with configurable autonomy:
    1. Hypothesis generation (PlanningAgent)
    2. Experiment execution (external)
    3. Result analysis (ScalarizerAgent)
    4. Parameter optimization (BOAgent)
    5. Iteration decisions
    
    Args:
        objective: Research objective description.
        base_dir: Base directory for campaign outputs.
        api_key: API key for the LLM provider.
        model_name: Model name.
        base_url: Base URL for internal proxy endpoint.
        embedding_model: Embedding model name.
        embedding_api_key: API key for the embedding LLM provider.
        futurehouse_api_key: Optional FutureHouse API key for literature search.
        restore_checkpoint: Whether to restore from previous checkpoint.
        autonomy_level: Level of autonomy (CO_PILOT, SUPERVISED, or AUTONOMOUS).
        
        google_api_key: DEPRECATED. Use 'api_key' instead.
        local_model: DEPRECATED. Use 'base_url' instead.
    """
    def __init__(
        self,
        objective: str = "Undefined Research Goal",
        base_dir: str = "./campaign_outputs",
        api_key: Optional[str] = None,
        model_name: str = "gemini-3-pro-preview",
        base_url: Optional[str] = None,
        embedding_model: str = "gemini-embedding-001",
        embedding_api_key: Optional[str] = None,
        futurehouse_api_key: Optional[str] = None,
        restore_checkpoint: bool = False,
        autonomy_level: AutonomyLevel = AutonomyLevel.CO_PILOT,
        data_dir: Optional[str] = None,
        knowledge_dir: Optional[str] = None,
        code_dir: Optional[str] = None,
        # Deprecated
        google_api_key: Optional[str] = None,
        local_model: Optional[str] = None,
    ):
        # Handle deprecated parameters
        api_key, base_url = normalize_params(
            api_key=api_key,
            google_api_key=google_api_key,
            base_url=base_url,
            local_model=local_model,
            source="PlanningOrchestratorAgent"
        )
        
        if base_url:
            if api_key is None:
                api_key = get_internal_proxy_key()
            
            if not api_key:
                raise ValueError(
                    "API key required for internal proxy.\n"
                    "Set SCILINK_API_KEY environment variable or pass api_key parameter."
                )
            
            if embedding_api_key is not None:
                logging.warning(
                    "⚠️ embedding_api_key is ignored for internal proxy. "
                    "Using api_key for all requests."
                )
            
            embedding_api_key = api_key
        else:
            # LiteLLM mode: ensure embedding_api_key is set
            if embedding_api_key is None:
                embedding_api_key = api_key

        # Store autonomy level
        self.autonomy_level = autonomy_level
        self._enable_human_feedback = self._should_enable_human_feedback()
        logging.info(f"🎛️  Autonomy Level: {autonomy_level.value.upper()}")

        # Validate and store workspace directories
        if autonomy_level in (AutonomyLevel.SUPERVISED, AutonomyLevel.AUTONOMOUS):
            if data_dir is None:
                raise ValueError(
                    f"data_dir is required for {autonomy_level.value} mode.\n"
                    f"Specify the directory containing experimental results."
                )
            if not Path(data_dir).exists():
                raise ValueError(f"data_dir does not exist: {data_dir}")

        self.data_dir = Path(data_dir) if data_dir else None
        self.knowledge_dir = Path(knowledge_dir) if knowledge_dir else None
        self.code_dir = Path(code_dir) if code_dir else None

        if self.data_dir:
            logging.info(f"   Data directory: {self.data_dir}")

        self.objective = objective
        self.base_dir = Path(base_dir)
        self.base_dir.mkdir(parents=True, exist_ok=True)

        self.analyzed_files_path = self.base_dir / "analyzed_files.json"
        self.analyzed_files = {}
        
        if self.analyzed_files_path.exists():
            try:
                with open(self.analyzed_files_path, 'r') as f:
                    self.analyzed_files = json.load(f)
            except Exception as e:
                logging.warning(f"Could not load analyzed_files.json: {e}")
                self.analyzed_files = {}

        self.bo_data_path = self.base_dir / "optimization_data.csv"
        self.history_path = self.base_dir / "chat_history.json"
        self.checkpoint_path = self.base_dir / "checkpoint.json"
        
        self.active_scalarizer_script = None
        self.expected_input_columns = None
        self.expected_target_columns = []
        self.latest_tea_results = None
        
        self.message_count = 0
        self.last_checkpoint_message_count = 0
        
        if restore_checkpoint and self.checkpoint_path.exists():
            self._restore_checkpoint()
        
        # --- Init Sub-Agents ---
        print("🤖 Agent: Hiring sub-agents...")
        self.planner = PlanningAgent(
            api_key=api_key,
            model_name=model_name,
            base_url=base_url,
            embedding_model=embedding_model,
            embedding_api_key=embedding_api_key,
            futurehouse_api_key=futurehouse_api_key,
            output_dir=str(self.base_dir)
        )
        self.scalarizer = ScalarizerAgent(
            api_key=api_key,
            model_name=model_name,
            base_url=base_url,
            output_dir=str(self.base_dir / "scalarizer_outputs")
        )
        self.bo = BOAgent(
            api_key=api_key,
            model_name=model_name,
            base_url=base_url,
            output_dir=str(self.base_dir / "bo_artifacts")
        )

        # --- Initialize Tools Registry ---
        self.tools = OrchestratorTools(self)
        
        # --- Get appropriate system prompt based on autonomy level ---
        system_prompt = get_system_prompt(self.autonomy_level)
        system_prompt += self._build_workspace_context()
        
        # --- LLM Initialization ---
        if base_url:
            logging.info(f"🏛️ Orchestrator using internal proxy: {base_url}")
            self.model = OpenAIAsGenerativeModel(
                model=model_name,
                api_key=api_key,
                base_url=base_url
            )
            self.use_openai = True
            self.tools_for_model = self.tools.openai_schemas
        else:
            logging.info(f"🌐 Orchestrator using LiteLLM: {model_name}")
            self.model = LiteLLMGenerativeModel(
                model=model_name,
                api_key=api_key,
                system_instruction=system_prompt,
                tools=self._convert_tools_to_litellm_format()
            )
            self.use_openai = False
            self.tools_for_model = self._convert_tools_to_litellm_format()
        
        # Store system prompt for OpenAI mode
        self._system_prompt = system_prompt
        
        # --- MEMORY INITIALIZATION ---
        history = self._load_history()
        
        if self.use_openai:
            self.messages = [{"role": "system", "content": system_prompt}]
            if history:
                recent_history = self._trim_history(history, max_messages=100)
                self.messages.extend(recent_history)
        else:
            # LiteLLM: Initialize messages list similar to OpenAI mode
            # We'll handle tool calls manually instead of using chat_session with AFC
            self.messages = [{"role": "system", "content": system_prompt}]
            if history:
                recent_history = self._trim_history(history, max_messages=100)
                self.messages.extend(recent_history)

    def _convert_tools_to_litellm_format(self) -> List[Dict]:
        """
        Convert OpenAI tool schemas to LiteLLM format.
        LiteLLM uses the same format as OpenAI for tools.
        """
        return self.tools.openai_schemas

    def _build_workspace_context(self) -> str:
        """Build workspace context string for system prompt."""
        if self.autonomy_level == AutonomyLevel.CO_PILOT:
            return ""  # Not needed, human will guide
        
        context_parts = ["\n\n**WORKSPACE CONFIGURATION:**"]
        
        if self.data_dir:
            context_parts.append(f"- Data directory: {self.data_dir}")
        if self.knowledge_dir:
            context_parts.append(f"- Knowledge directory: {self.knowledge_dir}")
        if self.code_dir:
            context_parts.append(f"- Code directory: {self.code_dir}")
        
        context_parts.append("\nUse these paths directly without asking for confirmation.")
        
        return "\n".join(context_parts)
    
    def _should_enable_human_feedback(self) -> bool:
        """Determines if human feedback should be enabled based on autonomy level."""
        # CO_PILOT and SUPERVISED both keep human review of plans/code
        # Only AUTONOMOUS skips human feedback entirely
        return self.autonomy_level != AutonomyLevel.AUTONOMOUS

    def set_autonomy_level(self, level: AutonomyLevel) -> None:
        """
        Change the autonomy level at runtime.
        
        Args:
            level: New autonomy level to set.
        """
        old_level = self.autonomy_level
        self.autonomy_level = level
        self._enable_human_feedback = self._should_enable_human_feedback()
        
        # Update system prompt
        new_system_prompt = get_system_prompt(level)
        self._system_prompt = new_system_prompt
        
        # Update system message in messages list (works for both OpenAI and LiteLLM now)
        if self.messages and self.messages[0]["role"] == "system":
            self.messages[0]["content"] = new_system_prompt
        
        logging.info(f"🔄 Autonomy level changed: {old_level.value} → {level.value}")
        logging.info(f"   Human feedback enabled: {self._enable_human_feedback}")

    def get_human_feedback_setting(self) -> bool:
        """Returns current human feedback setting for sub-agents."""
        return self._enable_human_feedback

    def _restore_checkpoint(self):
        """Restore campaign state from checkpoint."""
        print(f"  📂 Restoring checkpoint from: {self.checkpoint_path}")
        
        try:
            with open(self.checkpoint_path, 'r') as f:
                state = json.load(f)
            
            self.active_scalarizer_script = state.get("active_scalarizer_script")
            self.expected_input_columns = state.get("expected_input_columns")

            if "expected_target_columns" in state:
                self.expected_target_columns = state.get("expected_target_columns")
            else:
                self.expected_target_columns = []

            self.latest_tea_results = state.get("latest_tea_results")
            
            # Restore autonomy level if saved
            if "autonomy_level" in state:
                try:
                    self.autonomy_level = AutonomyLevel(state["autonomy_level"])
                    self._enable_human_feedback = self._should_enable_human_feedback()
                except ValueError:
                    pass  # Keep default if invalid value

            if "data_dir" in state and state["data_dir"]:
                self.data_dir = Path(state["data_dir"])
            if "knowledge_dir" in state and state["knowledge_dir"]:
                self.knowledge_dir = Path(state["knowledge_dir"])
            if "code_dir" in state and state["code_dir"]:
                self.code_dir = Path(state["code_dir"])
            
            print(f"    ✅ Restored state:")
            print(f"       - Analysis script: {Path(self.active_scalarizer_script).name if self.active_scalarizer_script else 'None'}")
            print(f"       - Schema: {self.expected_input_columns} → {self.expected_target_columns}")
            print(f"       - Data points: {state.get('data_points_collected', 0)}")
            print(f"       - Autonomy level: {self.autonomy_level.value}")
            
        except Exception as e:
            logging.warning(f"Failed to restore checkpoint: {e}")

    def _trim_history(self, history: List[Dict], max_messages: int = 100) -> List[Dict]:
        """Keep only recent messages to avoid context window overflow."""
        if len(history) <= max_messages:
            return history
        
        print(f"  ⚠️  Trimming history: {len(history)} → {max_messages} messages")
        
        context_window = 10
        recent_window = max_messages - context_window
        
        trimmed = history[:context_window] + history[-recent_window:]
        
        summary_marker = {
            "role": "system",
            "content": f"[{len(history) - max_messages} messages omitted for context management]"
        }
        trimmed.insert(context_window, summary_marker)
        
        return trimmed

    def chat(self, user_input: str) -> str:
        """Main chat interface with robust function calling support."""
        self.message_count += 1
        
        # AUTO-CHECKPOINT: Every 10 messages
        if self.message_count - self.last_checkpoint_message_count >= 10:
            print("  💾 Auto-checkpoint triggered (every 10 messages)...")
            self._auto_checkpoint()
            self.last_checkpoint_message_count = self.message_count
        
        try:
            if self.use_openai:
                response_text = self._handle_openai_chat(user_input)
            else:
                # Use the same manual tool handling approach for LiteLLM
                response_text = self._handle_litellm_chat(user_input)
            
            print(f"🤖 Agent: {response_text}")
            self._save_history()
            
            if self.message_count > 80:
                warning = "\n\n⚠️ Note: Conversation is getting long. Consider calling save_checkpoint and restarting."
                response_text += warning
            
            return response_text
            
        except Exception as e:
            logging.error(f"Chat Error: {e}", exc_info=True)
            
            print("  💾 Error detected - saving emergency checkpoint...")
            self._auto_checkpoint()
            
            return f"❌ Error: {e}\n\n(Emergency checkpoint saved to {self.checkpoint_path})"

    def _auto_checkpoint(self):
        """Internal auto-checkpoint without LLM interaction."""
        try:
            checkpoint_data = {
                "timestamp": datetime.now().isoformat(),
                "objective": self.objective,
                "active_scalarizer_script": self.active_scalarizer_script,
                "expected_input_columns": self.expected_input_columns,
                "expected_target_columns": self.expected_target_columns,
                "data_points_collected": len(pd.read_csv(self.bo_data_path)) if self.bo_data_path.exists() else 0,
                "planner_state": self.planner.state,
                "message_count": self.message_count,
                "latest_tea_results": self.latest_tea_results,
                "autonomy_level": self.autonomy_level.value,
                "data_dir": str(self.data_dir) if self.data_dir else None,
                "knowledge_dir": str(self.knowledge_dir) if self.knowledge_dir else None,
                "code_dir": str(self.code_dir) if self.code_dir else None,
            }
            
            with open(self.checkpoint_path, 'w') as f:
                json.dump(checkpoint_data, f, indent=2)
            
            print(f"    ✅ Auto-checkpoint saved")
            
        except Exception as e:
            logging.warning(f"Auto-checkpoint failed: {e}")

    def _handle_openai_chat(self, user_input: str) -> str:
        """Handle chat with OpenAI-compatible models with manual function calling loop."""
        from openai import OpenAI
        
        client = OpenAI(
            api_key=self.model.api_key,
            base_url=self.model.base_url
        )
        
        self.messages.append({"role": "user", "content": user_input})
        
        if len(self.messages) > 120:
            print("  ⚠️  Context window getting full - trimming history...")
            system_msg = self.messages[0]
            recent_msgs = self._trim_history(self.messages[1:], max_messages=100)
            self.messages = [system_msg] + recent_msgs
        
        max_iterations = 20
        iteration = 0
        
        while iteration < max_iterations:
            iteration += 1

            print(f"  ⏳ Waiting for LLM response ...") 
            
            response = client.chat.completions.create(
                model=self.model.model,
                messages=self.messages,
                tools=self.tools_for_model,
                tool_choice="auto"
            )
            
            message = response.choices[0].message
            
            if not message.tool_calls:
                self.messages.append({
                    "role": "assistant",
                    "content": message.content
                })
                return message.content
            
            self.messages.append({
                "role": "assistant",
                "content": message.content,
                "tool_calls": [
                    {
                        "id": tc.id,
                        "type": "function",
                        "function": {
                            "name": tc.function.name,
                            "arguments": tc.function.arguments
                        }
                    } for tc in message.tool_calls
                ]
            })
            
            for tool_call in message.tool_calls:
                func_name = tool_call.function.name
                args = json.loads(tool_call.function.arguments)
                
                print(f"  🔧 Calling tool: {func_name}")
                
                result = self.tools.execute_tool(func_name, **args)
                
                self.messages.append({
                    "role": "tool",
                    "tool_call_id": tool_call.id,
                    "content": result
                })
        
        return "⚠️ Maximum tool iterations reached. Please simplify your request."

    def _handle_litellm_chat(self, user_input: str) -> str:
        """Handle chat with LiteLLM models with manual function calling loop."""
        import litellm
        
        self.messages.append({"role": "user", "content": user_input})
        
        if len(self.messages) > 120:
            print("  ⚠️  Context window getting full - trimming history...")
            system_msg = self.messages[0]
            recent_msgs = self._trim_history(self.messages[1:], max_messages=100)
            self.messages = [system_msg] + recent_msgs
        
        max_iterations = 20
        iteration = 0
        
        while iteration < max_iterations:
            iteration += 1

            print(f"  ⏳ Waiting for LLM response ...") 
            
            response = litellm.completion(
                model=self.model.model,
                messages=self.messages,
                tools=self.tools_for_model,
                tool_choice="auto",
                api_key=self.model.api_key,
                api_base=self.model.base_url
            )
            
            message = response.choices[0].message
            tool_calls = getattr(message, "tool_calls", None)
            content = getattr(message, "content", None)
            
            if not tool_calls:
                # No tool calls - return the text response
                self.messages.append({
                    "role": "assistant",
                    "content": content or ""
                })
                return content or ""
            
            # Has tool calls - add assistant message with tool calls
            assistant_msg = {
                "role": "assistant",
                "content": content,
                "tool_calls": [
                    {
                        "id": tc.id,
                        "type": "function",
                        "function": {
                            "name": tc.function.name,
                            "arguments": tc.function.arguments
                        }
                    } for tc in tool_calls
                ]
            }
            self.messages.append(assistant_msg)
            
            # Execute each tool call
            for tool_call in tool_calls:
                func_name = tool_call.function.name
                try:
                    args = json.loads(tool_call.function.arguments)
                except json.JSONDecodeError:
                    args = {}
                
                print(f"  🔧 Calling tool: {func_name}")
                
                result = self.tools.execute_tool(func_name, **args)
                
                self.messages.append({
                    "role": "tool",
                    "tool_call_id": tool_call.id,
                    "content": result
                })
        
        return "⚠️ Maximum tool iterations reached. Please simplify your request."

    def _extract_response_text(self, response) -> str:
        """Robustly extract text from different response formats."""
        if hasattr(response, 'text'):
            return response.text
        elif hasattr(response, 'parts') and response.parts:
            text_parts = [p.text for p in response.parts if hasattr(p, 'text')]
            return ' '.join(text_parts)
        elif isinstance(response, str):
            return response
        else:
            return str(response)

    def _load_history(self) -> List[Dict]:
        """Load conversation history from disk."""
        if not self.history_path.exists(): 
            return []
        print("  🧠 Memory: Loading previous conversation...")
        try:
            with open(self.history_path, 'r') as f: 
                saved = json.load(f)
            
            # Both OpenAI and LiteLLM now use the same message format
            return saved
            
        except Exception as e:
            logging.warning(f"Failed to load history: {e}")
            return []

    def _save_history(self):
        """Save conversation history to disk."""
        try:
            # Filter out system messages for saved history
            history_data = [m for m in self.messages if m["role"] != "system"]
            
            with open(self.history_path, 'w') as f: 
                json.dump(history_data, f, indent=2)
                
        except Exception as e:
            logging.warning(f"Failed to save history: {e}")

    @classmethod
    def restore_from_checkpoint(cls, base_dir: str, **kwargs):
        """Factory method to create an OrchestratorAgent from a checkpoint."""
        return cls(base_dir=base_dir, restore_checkpoint=True, **kwargs)


================================================
FILE: rag_engine.py
================================================
import json
import logging
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple

import PIL.Image as PIL_Image

from .excel_parser import parse_adaptive_excel
from .parser_utils import parse_json_from_response
from .instruct import (
    HYPOTHESIS_GENERATION_INSTRUCTIONS,
    TEA_INSTRUCTIONS,
    HYPOTHESIS_GENERATION_INSTRUCTIONS_FALLBACK,
    TEA_INSTRUCTIONS_FALLBACK
)


def verify_plan_relevance(objective: str, 
                          result: Dict[str, Any], 
                          model: Any, 
                          generation_config: Any) -> Tuple[bool, str]: 
    """
    Self-reflection step. Returns (True, "") if relevant, or (False, "Reason") if not.
    
    Logic:
    1. Checks if the plan was generated via Fallback (General Knowledge).
    2. If Fallback: Verifies only scientific soundness (Relaxed).
    3. If Strict: Verifies document grounding and specific constraint adherence (Strict).
    """
    experiments = result.get("proposed_experiments", [])
    if not experiments: 
        return False, "No experiments generated."

    # 1. Detect Fallback Mode
    # We check if ANY experiment contains the mandatory fallback warning defined in instruct.py
    is_fallback = False
    for exp in experiments:
        justification = exp.get('justification', '').lower()
        if "general scientific knowledge" in justification or "documents lacked specific context" in justification:
            is_fallback = True
            break

    # 2. Build Plan Summary for the Verifier
    plan_summary_lines = []
    for i, exp in enumerate(experiments):
        name = exp.get('experiment_name', 'N/A')
        hyp = exp.get('hypothesis', 'N/A')
        justification = exp.get('justification', 'No justification provided.')
        
        plan_summary_lines.append(f"Experiment {i+1}: {name}")
        plan_summary_lines.append(f"  Hypothesis: {hyp}")
        plan_summary_lines.append(f"  Justification: {justification}") 
        plan_summary_lines.append("---")
        
    plan_summary = "\n".join(plan_summary_lines)

    # 3. Construct Context-Aware Prompt
    if is_fallback:
        print("    - ℹ️  Verifying Fallback Plan (Relaxed Constraints)...")
        eval_prompt = f"""
        You are a scientific research evaluator.
        
        **CONTEXT:** The system failed to find specific documents for the User Objective in the Knowledge Base.
        Therefore, it generated a plan based on **General Scientific Knowledge**.
        
        1. User Objective: "{objective}"
        2. Proposed Plan (General Knowledge): 
        {plan_summary}

        **TASK:**
        Determine if the Proposed Plan makes scientific sense for the Objective, acknowledging that it CANNOT cite specific documents.
        
        **CRITERIA FOR PASS:**
        - The plan addresses the objective using standard, correct scientific principles.
        - The logic is sound and actionable.
        - **DO NOT FAIL** the plan simply because it uses general knowledge or lacks specific context (this is expected in fallback mode).
        
        **Output:**
        Respond with a single JSON object: {{ "is_relevant": boolean, "reason": "string explanation" }}
        """
    else:
        print("    - ℹ️  Verifying Strict Plan (Document Constraints)...")
        eval_prompt = f"""
        You are a scientific research evaluator.
        
        1. User Objective: "{objective}"
        2. Proposed Plan: 
        {plan_summary}

        **TASK:**
        Review the "Hypothesis" and "Justification" for each experiment.
        Determine if the Proposed Plan is directly relevant to the User Objective AND supported by the cited context.
        
        **CRITERIA FOR FAIL:**
        - The plan ignores specific constraints in the objective (e.g., "Use X method" but the plan uses "Y").
        - The justification contradicts the hypothesis.
        - The plan is logically incoherent.
        
        **Output:**
        Respond with a single JSON object: {{ "is_relevant": boolean, "reason": "string explanation" }}
        """

    # 4. Execute Verification
    try:
        response = model.generate_content([eval_prompt], generation_config=generation_config)
        eval_result, _ = parse_json_from_response(response)
        
        if eval_result and not eval_result.get("is_relevant"):
            reason = eval_result.get('reason', 'Unknown irrelevance.')
            print(f"    - ⚠️  Plan Verification Failed: {reason}")
            return False, reason
            
        print(f"    - ✅ Plan Verification Passed.")
        return True, ""
        
    except Exception as e:
        logging.error(f"Verification step failed: {e}")
        # Fail open: If the verifier crashes, we assume the plan is okay to avoid blocking the user.
        return True, ""


def perform_science_rag(objective: str, 
                        instructions: str, 
                        task_name: str,
                        kb_docs: Any,  # Pass the KB object here
                        model: Any,    # Pass the LLM object here
                        generation_config: Any,
                        primary_data_set: Optional[Dict[str, str]] = None,
                        image_paths: Optional[List[str]] = None,
                        image_descriptions: Optional[List[str]] = None,
                        additional_context: Optional[str] = None,
                        external_context: Optional[str] = None) -> Dict[str, Any]:
    """
    Executes the Scientific/TEA RAG loop using the Docs KnowledgeBase.
    Includes logic for handling Primary Data (Excel) and Fallback generation.
    """
    
    # --- 1. Process Primary Data (e.g., Excel) ---
    primary_data_str = None
    if primary_data_set:
        try:
            chunks = parse_adaptive_excel(primary_data_set['file_path'], primary_data_set['metadata_path'])
            if chunks: 
                summary = next((c for c in chunks if c['metadata'].get('content_type') in ('dataset_summary', 'dataset_package')), chunks[0])
                primary_data_str = summary['text']
        except Exception as e:
            print(f"  - ⚠️ Warning: Failed to parse primary data set: {e}")

    # --- 2. Retrieve Scientific Context (Docs KB Only) ---
    print(f"\n--- Retrieving Scientific Context for {task_name} ---")
    
    doc_chunks = []
    if kb_docs.index and kb_docs.index.ntotal > 0:
        doc_chunks = kb_docs.retrieve(objective, top_k=10)
    
    unique_chunks = {c['text']: c for c in doc_chunks}.values()
    
    if not unique_chunks and not primary_data_str and not external_context:
        retrieved_context_str = "No specific documents found in Knowledge Base."
    else:
        rag_str = "\n\n---\n\n".join(
            f"Source: {Path(c['metadata'].get('source', 'N/A')).name}\nType: {c['metadata'].get('content_type')}\n\n{c['text']}" 
            for c in unique_chunks
        )
        retrieved_context_str = ""

        # Primary Data
        # if primary_data_str: 
        #     retrieved_context_str += f"## 📊 Primary Lab Data Summary\n{primary_data_str}\n\n"
        
        
        # B. External Literature
        if external_context:
            retrieved_context_str += f"## 🌍 External Scientific Literature\n{external_context}\n\n"

        # C. Local Documents
        if rag_str: 
            retrieved_context_str += f"## 📂 Retrieved Local Documents\n{rag_str}"

    # --- 3. Construct Multimodal Prompt ---
    loaded_images = []
    img_desc_str = ""
    
    if image_paths and PIL_Image:
        for p in image_paths:
            try: 
                loaded_images.append(PIL_Image.open(p))
            except Exception as e:
                print(f"  - ⚠️ Could not load image {p}: {e}")

    if image_descriptions:
        img_desc_str = json.dumps(image_descriptions, indent=2)

    prompt_parts = [instructions, f"## User Objective:\n{objective}"]

    if primary_data_str:
        prompt_parts.append(f"\n## 📊 Primary Experimental Data:\n{primary_data_str}")
    
    if loaded_images:
        prompt_parts.append("\n## Provided Images: (See attached)")
        prompt_parts.extend(loaded_images)
        if img_desc_str: prompt_parts.append(f"\n## Image Descriptions:\n{img_desc_str}")
    
    if additional_context:
        prompt_parts.append(f"\n## Additional Context:\n{additional_context}")
        
    prompt_parts.append(f"\n## Retrieved Context:\n{retrieved_context_str}")

    # --- 4. Generation & Fallback Logic ---
    print(f"--- Generating {task_name} ---")
    try:
        # Attempt 1: Strict RAG Generation
        response = model.generate_content(prompt_parts, generation_config=generation_config)
        result, error_msg = parse_json_from_response(response)
        
        if error_msg: 
            return {"error": f"JSON Parsing Error: {error_msg}"}

        # Check for Insufficient Context
        needs_fallback = False
        if result.get("error") and "Insufficient" in str(result.get("error")):
            needs_fallback = True
            print(f"    - ⚠️ Strict generation failed: {result.get('error')}")
        
        # --- 5. Execution of Fallback ---
        if needs_fallback:
            print("    - 🔄 Entering Fallback Mode (General Knowledge)...")
            
            fallback_inst = None
            if instructions == HYPOTHESIS_GENERATION_INSTRUCTIONS:
                fallback_inst = HYPOTHESIS_GENERATION_INSTRUCTIONS_FALLBACK
            elif instructions == TEA_INSTRUCTIONS:
                fallback_inst = TEA_INSTRUCTIONS_FALLBACK
            
            if not fallback_inst:
                return result # No fallback available for this instruction set

            prompt_parts[0] = fallback_inst
            
            fallback_response = model.generate_content(prompt_parts, generation_config=generation_config)
            result, error_msg_fb = parse_json_from_response(fallback_response)
            
            if error_msg_fb:
                return {"error": f"Fallback JSON Parsing Error: {error_msg_fb}"}
            
            print("    - ✅ Fallback generation successful.")

        return result

    except Exception as e:
        logging.error(f"Error in perform_science_rag: {e}")
        return {"error": str(e)}


def normalize_code(code: str) -> str:
    """Normalizes code by collapsing all whitespace to single spaces."""
    if not code: return ""
    return " ".join(code.split())


def perform_code_rag(
    result: Dict[str, Any],
    kb_code: Any,
    model: Any,
    generation_config: Any,
    previous_implementations: Optional[List[Dict[str, Any]]] = None
) -> Dict[str, Any]:
    """
    Retrieves API syntax from the Code KB and generates implementation scripts.
    If previous code implementations are provided, lets the LLM decide whether to:
    - Preserve existing code (no changes needed)
    - Update existing code (incremental edits)
    - Rewrite from scratch (major procedural changes)
    """
    
    experiments = result.get("proposed_experiments", [])
    if not experiments:
        return result
    
    # 1. Retrieve API documentation from Code KB
    all_steps_text = " ".join([
        " ".join(e.get('experimental_steps', [])) 
        for e in experiments
    ])
    
    print(f"  - 🔍 Retrieving API syntax for implementation...")
    hits = kb_code.retrieve(f"python implementation for {all_steps_text}", top_k=5)
    
    repo_map_context = kb_code.get_relevant_maps(hits) if hits else ""
    code_ctx = "\n\n".join([
        f"FILE: {c['metadata']['source']}\n{c['text']}" 
        for c in hits
    ]) if hits else "No API examples found in Code KB."
    
    code_files = list(set([Path(c['metadata']['source']).name for c in hits])) if hits else []
    
    # 2. Build mapping of previous implementations by experiment name
    previous_code_map = {}
    if previous_implementations:
        for impl in previous_implementations:
            exp_name = impl.get('experiment_name', '')
            if exp_name:
                previous_code_map[exp_name] = impl
    
    # 3. Generate/Update code for each experiment
    for exp in experiments:
        steps = exp.get("experimental_steps", [])
        exp_name = exp.get("experiment_name", "Experiment")
        hypothesis = exp.get("hypothesis", "N/A")
        
        # Find matching previous implementation
        prev_impl = previous_code_map.get(exp_name)
        
        # Build the master prompt
        prompt = f"""
You are an expert Research Software Engineer working on an iterative scientific project.

**EXPERIMENT OVERVIEW:**
Name: {exp_name}
Hypothesis: {hypothesis}

**NEW EXPERIMENTAL STEPS:**
{json.dumps(steps, indent=2)}

"""

        # Add previous implementation context if it exists
        if prev_impl:
            prev_code = prev_impl.get('code', '')
            prev_iteration = prev_impl.get('iteration', 'unknown')
            
            prompt += f"""
**PREVIOUS IMPLEMENTATION (Iteration {prev_iteration}):**
```python
{prev_code}
```

**YOUR DECISION:**
You must choose one of three strategies:

1. **PRESERVE** - If the new steps are identical or the change is only a parameter/value:
   - Return the exact same code unchanged
   - Example: "Increase temperature from 50°C to 60°C" → just parameter change

2. **UPDATE** - If the procedure changed but the overall structure is similar:
   - Keep the working framework (imports, error handling, setup)
   - Modify only the changed sections
   - Add comments marking what changed
   - Example: "Add a centrifugation step after mixing" → insert new function call

3. **REWRITE** - If this is a fundamentally different approach:
   - Start fresh using the API Reference below
   - Example: "Switch from batch processing to real-time streaming"

"""
        else:
            prompt += f"""
**PREVIOUS IMPLEMENTATION:**
None - this is the first implementation for this experiment.

**YOUR TASK:**
Write a complete Python script from scratch using the API Reference below.

"""

        # Add API context
        prompt += f"""
**REPOSITORY STRUCTURES (for correct import paths):**
{repo_map_context}

**API SYNTAX REFERENCE (Official Documentation/Examples):**
{code_ctx}

**INSTRUCTIONS:**
- Use the "API Syntax Reference" to find the correct functions.
- Map the scientific intent of the Steps to the code.
- You must prioritize using classes and functions from the API Reference over generic external libraries.
- If updating existing code, preserve working patterns
- Return ONLY valid JSON.

**OUTPUT FORMAT:**
Respond with a JSON object:
{{"implementation_code": "COMPLETE_PYTHON_CODE_HERE"}}
"""
        
        try:
            print(f"    - 🤖 Analyzing '{exp_name}'...")
            resp = model.generate_content([prompt], generation_config=generation_config)
            code_res, parse_error = parse_json_from_response(resp)
            
            if parse_error:
                print(f"    - ⚠️ JSON parsing error for '{exp_name}': {parse_error}")
                continue
            
            if code_res and "implementation_code" in code_res:
                new_code = code_res["implementation_code"]
                exp["implementation_code"] = new_code
                exp["code_source_files"] = code_files
                
                if prev_impl:
                    old_code = prev_impl.get('code', '')
                    
                    # Compare normalized versions to ignore harmless whitespace/indentation differences
                    if normalize_code(new_code) == normalize_code(old_code):
                        print(f"    - ⏹️  Preserved (No logic changes): {exp_name}")
                    else:
                        print(f"    - 🔄 Updated: {exp_name}")

                else:
                    print(f"    - ✨ Generated: {exp_name}")
                            
            else:
                print(f"    - ⚠️ LLM did not return code for '{exp_name}'")
                
        except Exception as e:
            print(f"    - ❌ Failed to process '{exp_name}': {e}")
    
    return result


def refine_plan_with_feedback(original_result: Dict[str, Any], 
                              feedback: str, 
                              objective: str,
                              model: Any,
                              generation_config: Any,
                              new_context: Optional[str] = None,
                              result_images: Optional[List[Any]] = None
                              ) -> Dict[str, Any]:
    """
    Refines the experimental plan based on user input or experimental results.
    Now supports injecting fresh RAG context relevant to the feedback/results.
    """
    
    # Construct the context block if available
    context_block = ""
    if new_context:
        context_block = (
            f"\n**📚 RELEVANT LITERATURE FOR OBSERVED RESULTS:**\n"
            f"{new_context}\n"
            f"(Use this literature to interpret the results and adjust the plan accordingly.)\n"
        )

    refinement_prompt = f"""
    You are an expert Research Strategist acting as an editor.
    
    **Original Objective:** {objective}
    
    **Current Plan (JSON):**
    {json.dumps(original_result, indent=2)}
    
    **Experimental Results / Feedback:** "{feedback}"
    {context_block}
    
    **Task:**
    Update the "Current Plan" to strictly address the Feedback and Results.
    - If the results indicate failure, use the Literature Context to propose a fix.
    - If the results indicate success, move to the next logical step.
    
    **Constraints:**
    - You MUST return the exact same JSON structure (keys: "proposed_experiments", etc.).
    - Update "experimental_steps", "hypothesis", or "required_equipment" as requested.
    - Do NOT add explanations outside the JSON.
    
    **Output:**
    A single valid JSON object containing the updated plan.
    """

    prompt_parts = [refinement_prompt]
    
    if result_images:
        print(f"    + 📎 Attaching {len(result_images)} images to refinement prompt.")
        prompt_parts.extend(result_images)

    try:
        # Generate Content (Sending List of Text + Images)
        response = model.generate_content(prompt_parts, generation_config=generation_config)
        refined_result, error_msg = parse_json_from_response(response)
        
        if error_msg:
            print(f"    - ⚠️ JSON Parsing Failed: {error_msg}")
            # Return an error object so the agent knows to stop.
            return {
                "error": "JSON_PARSE_ERROR",
                "message": f"LLM output invalid: {error_msg}",
                "raw_output": str(response.text)[:500] if hasattr(response, 'text') else "No text"
            }
        
        # Structure Validation
        if "proposed_experiments" not in refined_result:
            return {
                "error": "INVALID_STRUCTURE",
                "message": "JSON parsed but missing 'proposed_experiments' key.",
                "raw_output": str(refined_result)[:200]
            }
            
        return refined_result
        
    except Exception as e:
        print(f"    - ⚠️ Error during refinement: {e}")
        return original_result
    

def refine_code_with_feedback(result: Dict[str, Any], 
                              feedback: str, 
                              model: Any, 
                              generation_config: Any) -> Dict[str, Any]:
    """
    Refines the implementation code based on user feedback.
    """
    experiments = result.get("proposed_experiments", [])
    if not experiments:
        return result

    # Context construction: We dump the current code so the LLM knows what to fix
    current_code_state = ""
    for i, exp in enumerate(experiments):
        name = exp.get('experiment_name', f'Experiment {i+1}')
        code = exp.get("implementation_code", "# No code generated")
        current_code_state += f"--- CODE FOR: {name} ---\n{code}\n\n"

    prompt = f"""
    You are a Senior Research Software Engineer.
    
    **TASK:** Refine the Python implementation code based on User Feedback.
    
    **CURRENT CODE STATE:**
    {current_code_state}
    
    **USER FEEDBACK / ERROR REPORT:**
    "{feedback}"
    
    **INSTRUCTIONS:**
    1. Apply the user's fixes to the relevant code blocks.
    2. If the user refers to a specific experiment, only update that one.
    3. You must return a JSON object with a list of "updated_codes". 
       Each item in the list must match the order of the experiments above.
    4. Provide the FULL updated code for each script, not just the diffs.
    
    **OUTPUT FORMAT:**
    {{
        "updated_codes": [
            "FULL_PYTHON_SCRIPT_1...",
            "FULL_PYTHON_SCRIPT_2..."
        ]
    }}
    """
    
    print(f"    - ↻ Refine Code RAG: Generating updates based on feedback...")
    try:
        response = model.generate_content([prompt], generation_config=generation_config)
        updates, error = parse_json_from_response(response)
        
        if updates and "updated_codes" in updates:
            new_codes = updates["updated_codes"]
            # Map back to the result structure
            if len(new_codes) == len(experiments):
                for i, code in enumerate(new_codes):
                    experiments[i]["implementation_code"] = code
                print("    - ✅ Code successfully refined.")
            else:
                print("    - ⚠️ Warning: LLM returned wrong number of code blocks. Skipping update.")
        elif error:
            print(f"    - ⚠️ JSON Error during refinement: {error}")
        
        return result
        
    except Exception as e:
        print(f"    - ❌ Error during code refinement: {e}")
        return result


================================================
FILE: repo_loader.py
================================================
import subprocess
import os
from pathlib import Path
from urllib.parse import urlparse

def clone_git_repository(repo_url: str, 
                         target_base_dir: str = "./downloaded_repos", 
                         auto_update: bool = True) -> str:
    """
    Clones a git repository to a local directory.
    If the directory exists and auto_update is True, it runs 'git pull'.
    
    Returns the absolute path to the cloned directory.
    """
    # 1. Extract repo name to use as folder name
    # e.g., https://github.com/user/my-project.git -> my-project
    parsed_url = urlparse(repo_url)
    repo_name = os.path.basename(parsed_url.path)
    if repo_name.endswith('.git'):
        repo_name = repo_name[:-4]
    
    # Clean up name to ensure valid folder path
    repo_name = "".join(c for c in repo_name if c.isalnum() or c in ('-', '_'))
    
    target_path = Path(target_base_dir) / repo_name
    
    # 2. Check if git is installed
    try:
        subprocess.run(["git", "--version"], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    except FileNotFoundError:
        print("  - ❌ Error: 'git' is not installed or not in PATH.")
        return None

    # 3. Handle Existing Directory
    if target_path.exists():
        if auto_update:
            print(f"  - 🔄 Repo '{repo_name}' exists. Attempting update (git pull)...")
            try:
                # 'git -C path' runs the command inside that directory
                subprocess.run(["git", "-C", str(target_path), "pull"], 
                               check=True, 
                               stdout=subprocess.DEVNULL) # Hide generic output unless error
                print(f"  - ✅ Update successful: {repo_name}")
            except subprocess.CalledProcessError as e:
                print(f"  - ⚠️  Update failed (local changes or network issue): {e}")
                print("       Using existing version without update.")
        else:
            print(f"  - ℹ️  Repo '{repo_name}' exists. Skipping update.")
            
        return str(target_path.resolve())
    
    # 4. Clone New Repo
    print(f"  - 📥 Cloning '{repo_url}' into {target_path}...")
    try:
        target_path.parent.mkdir(parents=True, exist_ok=True)
        subprocess.run(["git", "clone", repo_url, str(target_path)], check=True)
        print("  - ✅ Clone successful.")
        return str(target_path.resolve())
    except subprocess.CalledProcessError as e:
        print(f"  - ❌ Error cloning repo: {e}")
        return None


================================================
FILE: scalarizer_agent.py
================================================
import subprocess
import json
import logging
import re
import uuid
from datetime import datetime
from pathlib import Path
from typing import Dict, Any, Optional, List
import PIL.Image as PIL_Image

from ...auth import get_internal_proxy_key
from ...wrappers.openai_wrapper import OpenAIAsGenerativeModel
from ...wrappers.litellm_wrapper import LiteLLMGenerativeModel
from ...executors import require_sandbox_approval
from .parser_utils import parse_json_from_response
from .instruct import SCALARIZER_PROMPT, SCALARIZER_REFLECTION_PROMPT

from ._deprecation import normalize_params

from .base_agent import BaseAgent


class ScalarizerAgent(BaseAgent):
    """
    Agent for converting raw experimental data into scalar descriptors
    suitable for Bayesian Optimization.

    Example:
        >>> agent = ScalarizerAgent()
        >>> context = {
        ...     "hypothesis": "Product peak expected at 5.5 min",
        ...     "expected_outcome": "High yield > 80%"
        ... }
        >>> result = agent.scalarize(
        ...     data_path="data/hplc_run_01.csv",
        ...     objective_query="Integrate peak at 5.5 min. Calculate Purity %.",
        ...     experiment_context=context
        ... )
        >>> print(result["metrics"])
        {'purity': 98.5, 'peak_area': 12504.2}

    Args:
        api_key: API key for the LLM provider.
        model_name: Model name. For public deployments, use LiteLLM format
            (e.g., "gemini/gemini-2.0-flash", "gpt-4o", "claude-sonnet-4-20250514").
        base_url: Base URL for internal proxy endpoint.
            When provided, uses OpenAI-compatible client.
            When None, uses LiteLLM for multi-provider support.
        output_dir: Output directory for artifacts.
        
        google_api_key: DEPRECATED. Use 'api_key' instead.
        local_model: DEPRECATED. Use 'base_url' instead.
    """
    def __init__(
        self,
        api_key: Optional[str] = None,
        model_name: str = "gemini-3-pro-preview",
        base_url: Optional[str] = None,
        output_dir: str = ".",
        # Deprecated
        google_api_key: Optional[str] = None,
        local_model: Optional[str] = None,
    ):
        if not require_sandbox_approval(
            context="ScalarizerAgent (scalarization of experimental data)"
        ):
            raise RuntimeError(
                "ScalarizerAgent requires code execution but user declined. "
                "Run in Docker, VM, or Colab for safe execution."
            )
        super().__init__(output_dir)
        self.agent_type = "scalarizer"

        # Handle deprecated parameters
        api_key, base_url = normalize_params(
            api_key=api_key,
            google_api_key=google_api_key,
            base_url=base_url,
            local_model=local_model,
            source="ScalarizerAgent"
        )
        
        if base_url:
            # INTERNAL PROXY
            if api_key is None:
                api_key = get_internal_proxy_key()
            
            if not api_key:
                raise ValueError(
                    "API key required for internal proxy.\n"
                    "Set SCILINK_API_KEY environment variable or pass api_key parameter."
                )
            
            logging.info(f"🏛️ ScalarizerAgent using internal proxy: {base_url}")
            self.model = OpenAIAsGenerativeModel(
                model=model_name,
                api_key=api_key,
                base_url=base_url
            )
        else:
            # PUBLIC LITELLM
            logging.info(f"🌐 ScalarizerAgent using LiteLLM: {model_name}")
            self.model = LiteLLMGenerativeModel(
                model=model_name,
                api_key=api_key
            )

        self.generation_config = None

    def _get_initial_state_fields(self) -> Dict[str, Any]:
        """Agent-specific state fields"""
        return {
            "current_data_path": None,
            "current_objective": None,
            "active_script": None
        }

    def _read_file_head(self, file_path: str, n_lines=25) -> str:
        """Reads raw file header to help LLM handle delimiters/metadata."""
        path = Path(file_path)
        if not path.exists(): return "Error: File not found."
        try:
            with open(path, 'r', encoding='utf-8', errors='replace') as f:
                head = [next(f) for _ in range(n_lines)]
            return "".join(head)
        except Exception as e:
            return f"Error reading file head: {str(e)}"
        
    def _read_metadata(self, metadata_path: str) -> str:
        """Safely reads a sidecar JSON file."""
        if not metadata_path: 
            return "None"
        
        path = Path(metadata_path)
        if not path.exists():
            return f"Error: Metadata file not found at {path}"
            
        try:
            with open(path, 'r', encoding='utf-8') as f:
                data = json.load(f)
            return json.dumps(data, indent=2)
        except Exception as e:
            return f"Error reading metadata: {str(e)}"

    def _execute_script(self, script_path: Path, args: List[str] = None) -> Dict[str, Any]:
        """Runs the generated python script in a subprocess."""
       
        # Construct command with arguments (if any)
        cmd = ["python", str(script_path)]
        if args:
            cmd.extend(args)
        try:
            process = subprocess.run(
                cmd,
                capture_output=True, text=True, timeout=45
            )
            # Parse STDOUT for JSON
            json_match = re.search(r'\{.*\}', process.stdout.strip(), re.DOTALL)
            if json_match:
                data = json.loads(json_match.group(0))
                return {
                    "status": "success",
                    "metrics": data.get("metrics", {}),
                    "plot_path": data.get("plot_path", ""),
                    "stdout": process.stdout
                }
            else:
                return {
                    "status": "failure",
                    "error": "No JSON output found in stdout",
                    "stdout": process.stdout,
                    "stderr": process.stderr
                }
        except subprocess.TimeoutExpired:
            return {"status": "failure", "error": "Script execution timed out"}
        except Exception as e:
            return {"status": "failure", "error": str(e)}

    def _verify_analysis(self, 
                        objective: str, 
                        context_str: str,
                        script_content: str, 
                        metrics: Dict, 
                        plot_path: str,
                        schema_requirements: Optional[Dict] = None) -> Dict[str, Any]:
        """Multimodal Self-Reflection: Checks plot vs. objective."""
        try:
            image = PIL_Image.open(plot_path)
        except Exception as e:
            return {"status": "fail", "feedback": f"Could not load visual proof: {e}"}

        # Build schema verification section
        schema_check = ""
        if schema_requirements:
            schema_check = f"""
    **REQUIRED SCHEMA TO VERIFY:**
    - Input columns that MUST be present: {schema_requirements.get('input_columns', [])}
    - Target columns that MUST be present: {schema_requirements.get('target_columns', [])}
    Verify ALL these columns appear in the metrics.
    """

        prompt = f"""
    **AUDIT REQUEST:**
    **1. EXTRACTION OBJECTIVE:** "{objective}"
    {schema_check}
    **2. EXTRACTED METRICS:** {json.dumps(metrics, indent=2)}
    **3. CODE SNIPPET:** 
    ```python
    {script_content[:3000]}
    ```
    **4. VISUAL PROOF:** (See Attached Image)
    **CONTEXT (reference only):** {context_str[:500]}...

    Verify the extraction is technically correct.
    """
        
        try:
            response = self.model.generate_content(
                [SCALARIZER_REFLECTION_PROMPT, prompt, image],
                generation_config=self.generation_config
            )
            return parse_json_from_response(response)[0]
        except Exception as e:
            logging.warning(f"Reflection failed: {e}")
            return {"status": "pass", "reasoning": "Auto-reflection unavailable."}

    def scalarize(self, 
                 data_path: str, 
                 objective_query: str = "",
                 reuse_script_path: str = None,
                 experiment_context: Optional[Dict[str, Any]] = None,
                 metadata_path: Optional[str] = None, 
                 enable_human_review: bool = True) -> Dict[str, Any]:
        """
        Main entry point. Converts raw data -> Scalar Metrics.

        Example:
            >>> agent = ScalarizerAgent()
            >>> context = {
            ...     "hypothesis": "Product peak expected at 5.5 min",
            ...     "expected_outcome": "High yield > 80%"
            ... }
            >>> result = agent.scalarize(
            ...     data_path="data/hplc_run_01.csv",
            ...     objective_query="Integrate peak at 5.5 min. Calculate Purity %.",
            ...     experiment_context=context
            ... )
            >>> print(result["metrics"])
            {'purity': 98.5, 'peak_area': 12504.2}

        Args:
            data_path: Path to raw data (csv, xlsx, txt).
            objective_query: Natural language instruction (e.g. "Calculate yield").
            experiment_context: Dict of high-level plan info (Hypothesis, etc).
            metadata_path: Path to sidecar JSON describing the data file (Units, Columns).
            enable_human_review: Pause for human check of the plot/logic.

        Returns:
            Dict containing:
            - 'status': 'success' or 'failure'
            - 'metrics': Dict of extracted scalars
            - 'source_script': Path to the generated Python script
        """
        path_obj = Path(data_path)
        
        # Initialize state
        self._init_state(current_data_path=data_path, current_objective=objective_query)

        # Path 1: Re-use existing script
        if reuse_script_path and Path(reuse_script_path).exists():
            print(f"  🔄 Reusing scalarizer script: {Path(reuse_script_path).name}")
            exec_res = self._execute_script(Path(reuse_script_path), args=[str(data_path)])
            
            result = {
                "status": exec_res["status"], 
                "metrics": exec_res.get("metrics", {}),
                "source_script": str(reuse_script_path),
                "error": exec_res.get("error")
            }
            
            # Log the reuse action
            self._log_action(
                action="reuse_script",
                input_ctx={"data_path": data_path, "script": reuse_script_path},
                result=result,
                rationale="Reusing previously validated analysis script for consistency"
            )
            
            return result
        
        # Path 2: Generate new script
        file_context = self._read_file_head(data_path)
        
        # Metadata Auto-Discovery
        if not metadata_path:
            potential_json = path_obj.with_suffix('.json')
            if potential_json.exists():
                metadata_path = str(potential_json)
                print(f"  - ℹ️  Auto-discovered metadata file: {potential_json.name}")
        
        metadata_str = self._read_metadata(metadata_path)
        exp_context_str = json.dumps(experiment_context) if experiment_context else "None"
        plot_output_dir = str(self.output_dir.resolve())
        
        schema_section = ""
        if experiment_context and "_schema_requirements" in experiment_context:
            schema = experiment_context["_schema_requirements"]
            schema_section = f"""
    **REQUIRED OUTPUT SCHEMA (MANDATORY):**
    - INPUT COLUMNS: {schema.get('input_columns', [])}
    - TARGET COLUMNS: {schema.get('target_columns', [])}
    - OPTIMIZATION TYPE: {schema.get('optimization_type', 'single-objective')}

    Your output metrics MUST include ALL of these columns for each data point.
    """
        
        base_prompt = f"""
        **INPUT DATA:** {data_path}
        **HEAD SNIPPET:** \n{file_context}\n

        **METADATA SIDECAR (Column Defs / Units):**
        {metadata_str}
        {schema_section}
        
        **EXPERIMENTAL CONTEXT (Hypothesis / Steps):**
        {exp_context_str}
        
        **GOAL:** "{objective_query}"
        
        **REQ:** Parse, Calculate, Plot (save to {plot_output_dir}/debug_{path_obj.stem}.png), Print JSON.

        **CRITICAL:** In your code, replace OUTPUT_DIR_PLACEHOLDER with exactly: {plot_output_dir}
        This is an absolute path - use it directly without modification.
        """

        current_prompt = base_prompt
        max_retries = 5
        human_feedback_collected = None

        for attempt in range(max_retries):
            print(f"  - 📉 Scalarizer (Attempt {attempt+1}): Generating script...")
            
            # Generate Script
            try:
                response = self.model.generate_content(
                    [SCALARIZER_PROMPT, current_prompt], 
                    generation_config=self.generation_config
                )
                result, error = parse_json_from_response(response)
            except Exception as e:
                return {"status": "failure", "error": f"LLM Generation Error: {e}"}

            if error or not result or "implementation_code" not in result:
                err_msg = error if error else "Missing 'implementation_code' key"
                print(f"    ⚠️ Generation Failed (Invalid JSON): {err_msg}")
                current_prompt = base_prompt + f"\n\n**PREVIOUS ERROR:** JSON parsing failed ({err_msg}). Return ONLY valid JSON."
                continue

            # Save Script
            sanitized_name = Path(data_path).stem.replace(" ", "_")
            script_path = self.output_dir / f"proc_{sanitized_name}.py"
            with open(script_path, "w", encoding="utf-8") as f:
                f.write(result["implementation_code"])
            
            # Track active script in state
            self.state["active_script"] = str(script_path)
            
            # Execute Script
            exec_res = self._execute_script(script_path, args=[str(data_path)])
            
            if exec_res["status"] == "failure":
                err_msg = exec_res.get('stderr', 'Unknown Error').strip()
                display_err = (err_msg[:300] + '...') if len(err_msg) > 300 else err_msg
                print(f"    ❌ Runtime Error:\n    {display_err}")
                current_prompt = base_prompt + f"\n\n**RUNTIME ERROR:**\n{err_msg}\nFix the code."
                continue
                
            schema_for_verification = None
            if experiment_context and "_schema_requirements" in experiment_context:
                schema_for_verification = experiment_context["_schema_requirements"]

            # Auto-Reflection
            print(f"    🤔 Auto-Reflecting on visual proof...")
            verification = self._verify_analysis(
                objective=objective_query,
                context_str=exp_context_str,
                script_content=result["implementation_code"],
                metrics=exec_res["metrics"],
                plot_path=exec_res["plot_path"],
                schema_requirements=schema_for_verification
            )
            
            if verification.get("status") == "fail":
                feedback = verification.get("feedback", "Unknown logic error")
                print(f"    ❌ Self-Correction Triggered: {feedback}")
                current_prompt = base_prompt + f"\n\n**AUTO-CRITIQUE:** {feedback}\nAdjust the code and visuals."
                continue
            
            print(f"    ✅ Auto-Reflection Passed.")

            # Human Review
            if enable_human_review:
                print("\n" + "="*60)
                print(f"👀 SCALARIZER REVIEW: {path_obj.name}")
                print(f"• Metrics: {exec_res['metrics']}")
                print(f"• Plot: {exec_res['plot_path']}")
                print("-" * 60)
                user_fb = input("> Press [ENTER] to confirm or type feedback: ").strip()
                
                if user_fb:
                    human_feedback_collected = user_fb
                    current_prompt = base_prompt + f"\n\n**HUMAN FEEDBACK:**\n{user_fb}"
                    continue

            # Success - log and return
            final_result = {
                "status": "success", 
                "metrics": exec_res["metrics"], 
                "source_script": str(script_path)
            }
            
            self._log_action(
                action="generate_and_execute_script",
                input_ctx={
                    "data_path": data_path,
                    "objective": objective_query,
                    "metadata_path": metadata_path,
                    "attempt": attempt + 1
                },
                result=final_result,
                rationale=result.get("thought_process"),
                feedback=human_feedback_collected
            )
            
            self.state["status"] = "success"
            return final_result

        # Max retries exceeded
        failure_result = {"status": "failure", "error": "Max retries exceeded"}
        
        self._log_action(
            action="generate_and_execute_script",
            input_ctx={
                "data_path": data_path,
                "objective": objective_query,
                "attempt": max_retries
            },
            result=failure_result,
            rationale="All retry attempts exhausted"
        )
        
        self.state["status"] = "failed"
        return failure_result



================================================
FILE: user_interface.py
================================================
from typing import Dict, Any, Optional
import re


def display_plan_summary(result: Dict[str, Any]) -> None:
    """
    Parses the agent's results and prints a structured, pretty-printed 
    summary to the console for human review.
    """
    # 1. Error Handling
    if result.get("error"):
        print(f"\n❌ Agent finished with an error: {result['error']}\n")
        return

    # 2. Structure Validation
    experiments = result.get("proposed_experiments")
    if not experiments or not isinstance(experiments, list):
        print("\n⚠️  The agent returned a result, but no experiments were found.")
        # Optional: Print raw if debugging needed
        # print(json.dumps(result, indent=2))
        return

    # 3. Header
    print("\n" + "="*80)
    print("✅ PROPOSED EXPERIMENTAL PLAN")
    print("="*80)

    # 4. Loop through Experiments
    for i, exp in enumerate(experiments, 1):
        
        # --- Name & Hypothesis ---
        print(f"\n🔬 EXPERIMENT {i}: {exp.get('experiment_name', 'Unnamed Experiment')}")
        print("-" * 80)
        print(f"\n> 🎯 Hypothesis:\n> {exp.get('hypothesis', 'N/A')}")

        # --- Experimental Steps (Numbered) ---
        print("\n--- 🧪 Experimental Steps ---")
        steps = exp.get('experimental_steps', [])
        if steps:
            for j, step in enumerate(steps, 1):
                # Remove leading numbers/bullets provided by LLM
                # Regex removes "1.", "1 -", "1)", etc.
                clean_step = re.sub(r'^[\d\-\.\)\s]+', '', str(step)).strip()
                print(f" {j}. {clean_step}")
        else:
            print("  (No steps provided)")
        
        # --- Equipment ---
        print("\n--- 🛠️  Required Equipment ---")
        equipment = exp.get('required_equipment', [])
        if equipment:
            # Print as a clean comma-separated list if short, or bullets if long
            if len(equipment) > 5:
                for item in equipment: print(f"  * {item}")
            else:
                print(f"  {', '.join(equipment)}")
        else:
            print("  (No equipment specified)")

        # --- Outcome & Justification (Critical for Review) ---
        print("\n--- 📈 Expected Outcome ---")
        print(f"  {exp.get('expected_outcome', 'N/A')}")

        print("\n--- 💡 Justification ---")
        print(f"  {exp.get('justification', 'N/A')}")
        
        # --- Source Documents ---
        print("\n--- 📄 Source Documents ---")
        sources = exp.get('source_documents', [])
        if sources:
            for src in sources:
                print(f"  - {src}")
        else:
            print("  (No sources listed)")

        # --- Code Indicator (If generated) ---
        if "implementation_code" in exp:
            print("\n--- 💻 Implementation Code ---")
            print("  ℹ️  Plan includes implementation script.")

    print("\n" + "="*80)


def get_user_feedback() -> Optional[str]:
    """
    Pauses execution to get user input via the CLI. 
    Returns None if the user just presses ENTER (indicating approval).
    """
    print("\n" + "-"*60)
    
    print("📝 REQUESTING FEEDBACK")
    print("-" * 60)
    print("Review the plan above.")
    print("• To APPROVE: Press [ENTER] directly.")
    print("• To REQUEST CHANGES: Type your feedback/instructions and press [ENTER].")
    
    feedback = input("\n> Instruction: ").strip()
    
    if not feedback:
        return None # User accepted the plan
        
    return feedback


def get_dataset_description(filename: str) -> str:
    """
    Interactive prompt when metadata is missing.
    """
    print("\n" + "!"*60)
    print(f"⚠️  MISSING METADATA FOR: {filename}")
    print("!"*60)
    print("The agent needs context to understand columns/units in this file.")
    print("• Option 1: Press [ENTER] to skip (Agent will guess based on headers).")
    print("• Option 2: Type a brief description (e.g., 'Yield results from Suzuki coupling').")
    
    desc = input("\n> Context: ").strip()
    return desc

