Coverage for src / infra / io / config.py: 49%
225 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-01-04 04:43 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-01-04 04:43 +0000
1"""Configuration dataclass for mala.
3Provides MalaConfig for centralized configuration management. This allows
4programmatic users to construct configuration without relying on environment
5variables, while CLI users can continue using env vars via from_env().
7Environment Variables:
8 MALA_RUNS_DIR: Directory for run metadata files (default: ~/.config/mala/runs)
9 MALA_LOCK_DIR: Directory for file locks (default: /tmp/mala-locks)
10 CLAUDE_CONFIG_DIR: Claude SDK config directory (default: ~/.claude)
11 BRAINTRUST_API_KEY: Braintrust API key (required when braintrust_enabled=True)
12 MALA_REVIEW_TIMEOUT: Timeout in seconds for review-gate wait
13 MALA_CERBERUS_SPAWN_ARGS: Extra args for `review-gate spawn-code-review`
14 MALA_CERBERUS_WAIT_ARGS: Extra args for `review-gate wait`
15 MALA_CERBERUS_ENV: Extra env for review-gate (JSON dict or comma KEY=VALUE list)
16 MALA_MAX_DIFF_SIZE_KB: Max diff size for epic verification (KB)
17 MALA_MAX_EPIC_VERIFICATION_RETRIES: Max retries for epic verification loop
18 LLM_API_KEY: API key for LLM calls (fallback to ANTHROPIC_API_KEY)
19 LLM_BASE_URL: Base URL for LLM API
20"""
22from __future__ import annotations
24import json
25import os
26import shlex
27from dataclasses import dataclass, field
28from pathlib import Path
30from src.infra.tools.env import USER_CONFIG_DIR
33def parse_cerberus_args(raw: str | None, *, source: str) -> list[str]:
34 if not raw or not raw.strip():
35 return []
36 try:
37 return shlex.split(raw)
38 except ValueError as exc:
39 raise ValueError(f"{source}: {exc}") from exc
42def parse_cerberus_env(raw: str | None, *, source: str) -> dict[str, str]:
43 if not raw or not raw.strip():
44 return {}
46 stripped = raw.strip()
47 if stripped.startswith("{"):
48 try:
49 data = json.loads(stripped)
50 except json.JSONDecodeError as exc:
51 raise ValueError(f"{source}: invalid JSON ({exc})") from exc
52 if not isinstance(data, dict):
53 raise ValueError(f"{source}: JSON must be an object")
54 return {str(key): str(value) for key, value in data.items()}
56 env: dict[str, str] = {}
57 for part in [item.strip() for item in raw.split(",") if item.strip()]:
58 if "=" not in part:
59 raise ValueError(f"{source}: invalid entry '{part}' (expected KEY=VALUE)")
60 key, value = part.split("=", 1)
61 key = key.strip()
62 if not key:
63 raise ValueError(f"{source}: invalid entry '{part}' (empty key)")
64 env[key] = value
65 return env
68def _normalize_cerberus_env(env: dict[str, str]) -> tuple[tuple[str, str], ...]:
69 """Normalize env map into a stable, hashable tuple of key/value pairs."""
70 return tuple(sorted(env.items()))
73def _find_cerberus_bin_path(claude_config_dir: Path) -> Path | None:
74 """Find the cerberus plugin bin directory from Claude's installed plugins.
76 Looks up the cerberus plugin installation path from Claude's
77 installed_plugins.json (v2 schema) and returns the path to its
78 bin/ directory. Falls back to known plugin locations if metadata is missing.
80 Args:
81 claude_config_dir: Path to Claude config directory (typically ~/.claude).
83 Returns:
84 Path to cerberus bin directory, or None if not found.
85 """
86 plugins_root = claude_config_dir / "plugins"
87 plugins_file = plugins_root / "installed_plugins.json"
89 def _iter_plugin_entries(data: object) -> list[tuple[str, object]]:
90 if isinstance(data, dict):
91 plugins = dict.get(data, "plugins")
92 if isinstance(plugins, dict):
93 return list(plugins.items())
94 return []
96 if plugins_file.exists():
97 try:
98 data = json.loads(plugins_file.read_text())
99 # Look for cerberus plugin (key format: "cerberus@cerberus" or similar)
100 for key, installs in _iter_plugin_entries(data):
101 if "cerberus" in str(key).lower() and isinstance(installs, list):
102 for install in installs:
103 if not isinstance(install, dict):
104 continue
105 install_path = dict.get(install, "installPath")
106 if install_path:
107 bin_path = Path(install_path) / "bin"
108 if bin_path.exists():
109 return bin_path
110 except (json.JSONDecodeError, KeyError, TypeError):
111 pass
113 # Fallback to known locations if installed_plugins.json is missing or stale.
114 marketplace_bin = plugins_root / "marketplaces" / "cerberus" / "bin"
115 if marketplace_bin.exists():
116 return marketplace_bin
118 cache_root = plugins_root / "cache" / "cerberus" / "cerberus"
119 if cache_root.exists():
120 candidates = sorted(
121 (path for path in cache_root.iterdir() if path.is_dir()),
122 key=lambda path: path.stat().st_mtime,
123 reverse=True,
124 )
125 for candidate in candidates:
126 bin_path = candidate / "bin"
127 if bin_path.exists():
128 return bin_path
130 return None
133def _safe_int(value: str | None, default: int) -> int:
134 """Safely parse an integer with fallback to default."""
135 if value is None:
136 return default
137 try:
138 return int(value)
139 except ValueError:
140 return default
143class ConfigurationError(Exception):
144 """Raised when configuration validation fails."""
146 def __init__(self, errors: list[str]) -> None:
147 self.errors = errors
148 message = "Configuration validation failed:\n" + "\n".join(
149 f" - {e}" for e in errors
150 )
151 super().__init__(message)
154@dataclass(frozen=True)
155class MalaConfig:
156 """Centralized configuration for mala orchestrator.
158 This dataclass consolidates all configuration that was previously scattered
159 across environment variable accesses. It can be constructed programmatically
160 or loaded from environment variables using from_env().
162 Attributes:
163 runs_dir: Directory where run metadata files are stored.
164 Env: MALA_RUNS_DIR (default: ~/.config/mala/runs)
165 lock_dir: Directory for file locks during parallel processing.
166 Env: MALA_LOCK_DIR (default: /tmp/mala-locks)
167 claude_config_dir: Claude SDK configuration directory.
168 Env: CLAUDE_CONFIG_DIR (default: ~/.claude)
169 braintrust_api_key: Braintrust API key for tracing.
170 Env: BRAINTRUST_API_KEY (required when braintrust_enabled=True)
171 braintrust_enabled: Whether Braintrust tracing is enabled.
172 Derived from braintrust_api_key presence.
173 review_enabled: Whether automated code review is enabled.
174 Defaults to True.
175 review_timeout: Timeout in seconds for review operations.
176 Defaults to 300.
177 cerberus_spawn_args: Extra args for `review-gate spawn-code-review`.
178 Defaults to empty (no extra args).
179 cerberus_wait_args: Extra args for `review-gate wait`.
180 Defaults to empty (no extra args).
181 cerberus_env: Extra environment variables for review-gate.
182 Defaults to empty (no extra env).
183 track_review_issues: Whether to create beads issues for P2/P3 review findings.
184 Env: MALA_TRACK_REVIEW_ISSUES (default: True)
185 llm_api_key: API key for LLM calls (epic verification).
186 Env: LLM_API_KEY (falls back to ANTHROPIC_API_KEY if not set)
187 llm_base_url: Base URL for LLM API requests.
188 Env: LLM_BASE_URL (for proxy/routing)
189 max_epic_verification_retries: Maximum retries for epic verification loop.
190 Env: MALA_MAX_EPIC_VERIFICATION_RETRIES (default: 3)
192 Example:
193 # Programmatic construction (no env vars needed):
194 config = MalaConfig(
195 runs_dir=Path("/custom/runs"),
196 lock_dir=Path("/custom/locks"),
197 claude_config_dir=Path("/custom/claude"),
198 )
200 # Load from environment:
201 config = MalaConfig.from_env()
202 """
204 # Paths
205 runs_dir: Path = field(
206 default_factory=lambda: Path.home() / ".config" / "mala" / "runs"
207 )
208 lock_dir: Path = field(default_factory=lambda: Path("/tmp/mala-locks"))
209 claude_config_dir: Path = field(default_factory=lambda: Path.home() / ".claude")
211 # API keys (optional)
212 braintrust_api_key: str | None = None
214 # Feature flags (derived from API key presence)
215 braintrust_enabled: bool = field(default=False)
217 # Review settings
218 review_enabled: bool = field(default=True)
219 review_timeout: int = field(default=1200)
220 cerberus_bin_path: Path | None = None # Path to cerberus bin/ directory
221 cerberus_spawn_args: tuple[str, ...] = field(default_factory=tuple)
222 cerberus_wait_args: tuple[str, ...] = field(default_factory=tuple)
223 cerberus_env: tuple[tuple[str, str], ...] = field(default_factory=tuple)
224 track_review_issues: bool = field(default=True) # Create beads issues for P2/P3
226 # LLM configuration (for epic verification and other direct API calls)
227 llm_api_key: str | None = (
228 None # API key for LLM calls (falls back to ANTHROPIC_API_KEY)
229 )
230 llm_base_url: str | None = None # Base URL for LLM API (for proxy/routing)
232 # Epic verification retry configuration
233 max_epic_verification_retries: int = field(default=3)
235 # Deadlock detection
236 deadlock_detection_enabled: bool = field(default=True)
238 def __post_init__(self) -> None:
239 """Derive feature flags from API key presence.
241 Since the dataclass is frozen, we use object.__setattr__ to set
242 derived fields after initialization.
243 """
244 # Normalize Cerberus overrides for immutability/consistency
245 if isinstance(self.cerberus_spawn_args, list):
246 object.__setattr__(
247 self, "cerberus_spawn_args", tuple(self.cerberus_spawn_args)
248 )
249 if isinstance(self.cerberus_wait_args, list):
250 object.__setattr__(
251 self, "cerberus_wait_args", tuple(self.cerberus_wait_args)
252 )
253 if isinstance(self.cerberus_env, dict):
254 object.__setattr__(
255 self, "cerberus_env", _normalize_cerberus_env(self.cerberus_env)
256 )
257 elif isinstance(self.cerberus_env, list):
258 object.__setattr__(self, "cerberus_env", tuple(self.cerberus_env))
260 # Derive braintrust_enabled from api key presence if not explicitly set
261 if not self.braintrust_enabled and self.braintrust_api_key:
262 object.__setattr__(self, "braintrust_enabled", True)
264 @classmethod
265 def from_env(cls, *, validate: bool = True) -> MalaConfig:
266 """Create MalaConfig by loading from environment variables with validation.
268 Reads the following environment variables:
269 - MALA_RUNS_DIR: Run metadata directory (optional)
270 - MALA_LOCK_DIR: Lock files directory (optional)
271 - CLAUDE_CONFIG_DIR: Claude SDK config directory (optional)
272 - BRAINTRUST_API_KEY: Braintrust API key (optional)
273 - MALA_REVIEW_TIMEOUT: Review timeout in seconds (optional)
274 - MALA_TRACK_REVIEW_ISSUES: Create beads issues for P2/P3 findings (optional)
275 - MALA_CERBERUS_SPAWN_ARGS: Extra args for review-gate spawn (optional)
276 - MALA_CERBERUS_WAIT_ARGS: Extra args for review-gate wait (optional)
277 - MALA_CERBERUS_ENV: Extra env for review-gate (optional)
278 - MALA_MAX_DIFF_SIZE_KB: Max diff size for epic verification (optional)
279 - MALA_MAX_EPIC_VERIFICATION_RETRIES: Max epic verification retries (optional)
280 - LLM_API_KEY: API key for LLM calls (optional)
281 - LLM_BASE_URL: Base URL for LLM API (optional)
283 Args:
284 validate: If True (default), run validation and raise ConfigurationError
285 on any errors. Set to False to skip validation.
287 Returns:
288 MalaConfig instance with values from environment or defaults.
290 Raises:
291 ConfigurationError: If validate=True and configuration is invalid.
293 Example:
294 # Set environment variables first
295 os.environ["BRAINTRUST_API_KEY"] = "my-key"
297 # Load configuration (validates by default)
298 config = MalaConfig.from_env()
299 assert config.braintrust_enabled is True
301 # Skip validation if needed
302 config = MalaConfig.from_env(validate=False)
303 """
304 # Get path values from environment with defaults
305 runs_dir = Path(
306 os.environ.get(
307 "MALA_RUNS_DIR", str(Path.home() / ".config" / "mala" / "runs")
308 )
309 )
310 lock_dir = Path(os.environ.get("MALA_LOCK_DIR", "/tmp/mala-locks"))
311 claude_config_dir = Path(
312 os.environ.get("CLAUDE_CONFIG_DIR", str(Path.home() / ".claude"))
313 )
315 # Get optional API keys (treat empty strings as None)
316 braintrust_api_key = os.environ.get("BRAINTRUST_API_KEY") or None
318 review_timeout = None
319 review_timeout_raw = os.environ.get("MALA_REVIEW_TIMEOUT")
320 parse_errors: list[str] = []
321 if review_timeout_raw:
322 try:
323 review_timeout = int(review_timeout_raw)
324 except ValueError:
325 parse_errors.append(
326 f"MALA_REVIEW_TIMEOUT: invalid integer '{review_timeout_raw}'"
327 )
328 review_timeout = None
330 # Parse Cerberus override settings
331 try:
332 cerberus_spawn_args = parse_cerberus_args(
333 os.environ.get("MALA_CERBERUS_SPAWN_ARGS"),
334 source="MALA_CERBERUS_SPAWN_ARGS",
335 )
336 except ValueError as exc:
337 parse_errors.append(str(exc))
338 cerberus_spawn_args = []
340 try:
341 cerberus_wait_args = parse_cerberus_args(
342 os.environ.get("MALA_CERBERUS_WAIT_ARGS"),
343 source="MALA_CERBERUS_WAIT_ARGS",
344 )
345 except ValueError as exc:
346 parse_errors.append(str(exc))
347 cerberus_wait_args = []
349 try:
350 cerberus_env = parse_cerberus_env(
351 os.environ.get("MALA_CERBERUS_ENV"),
352 source="MALA_CERBERUS_ENV",
353 )
354 except ValueError as exc:
355 parse_errors.append(str(exc))
356 cerberus_env = {}
358 # Auto-detect cerberus bin path from Claude plugins
359 cerberus_bin_path = _find_cerberus_bin_path(claude_config_dir)
361 # Parse track_review_issues flag (defaults to True)
362 track_review_issues_raw = os.environ.get("MALA_TRACK_REVIEW_ISSUES", "").lower()
363 track_review_issues = track_review_issues_raw not in ("0", "false", "no", "off")
365 # Get LLM configuration (for epic verification and other direct API calls)
366 # Falls back to ANTHROPIC_API_KEY if LLM_API_KEY is not set
367 llm_api_key = (
368 os.environ.get("LLM_API_KEY") or os.environ.get("ANTHROPIC_API_KEY") or None
369 )
370 llm_base_url = os.environ.get("LLM_BASE_URL") or None
372 # Parse max_epic_verification_retries
373 max_epic_verification_retries = _safe_int(
374 os.environ.get("MALA_MAX_EPIC_VERIFICATION_RETRIES"), 3
375 )
377 config = cls(
378 runs_dir=runs_dir,
379 lock_dir=lock_dir,
380 claude_config_dir=claude_config_dir,
381 braintrust_api_key=braintrust_api_key,
382 review_timeout=review_timeout if review_timeout is not None else 1200,
383 cerberus_bin_path=cerberus_bin_path,
384 cerberus_spawn_args=tuple(cerberus_spawn_args),
385 cerberus_wait_args=tuple(cerberus_wait_args),
386 cerberus_env=_normalize_cerberus_env(cerberus_env),
387 track_review_issues=track_review_issues,
388 llm_api_key=llm_api_key,
389 llm_base_url=llm_base_url,
390 max_epic_verification_retries=max_epic_verification_retries,
391 )
393 if validate:
394 errors = config.validate()
395 errors.extend(parse_errors)
396 if errors:
397 raise ConfigurationError(errors)
398 elif parse_errors:
399 raise ConfigurationError(parse_errors)
401 return config
403 def validate(self) -> list[str]:
404 """Validate configuration and return list of errors.
406 Checks:
407 - Feature flags have required API keys
408 - Paths are absolute
410 Note: Parent directories are not checked since ensure_directories()
411 creates them with parents=True. This allows first-run on fresh machines.
413 Returns:
414 List of error messages. Empty list if configuration is valid.
416 Example:
417 config = MalaConfig(braintrust_enabled=True) # Missing API key
418 errors = config.validate()
419 # errors = ["braintrust_enabled=True requires BRAINTRUST_API_KEY"]
420 """
421 errors: list[str] = []
423 # Check required API keys for enabled features
424 if self.braintrust_enabled and not self.braintrust_api_key:
425 errors.append(
426 "braintrust_enabled=True requires BRAINTRUST_API_KEY to be set"
427 )
429 # Validate paths are absolute (recommended for deterministic behavior)
430 if not self.runs_dir.is_absolute():
431 errors.append(f"runs_dir should be an absolute path, got: {self.runs_dir}")
432 if not self.lock_dir.is_absolute():
433 errors.append(f"lock_dir should be an absolute path, got: {self.lock_dir}")
434 if not self.claude_config_dir.is_absolute():
435 errors.append(
436 f"claude_config_dir should be an absolute path, got: {self.claude_config_dir}"
437 )
438 if self.review_timeout < 0:
439 errors.append(f"review_timeout must be >= 0, got: {self.review_timeout}")
441 return errors
443 def ensure_directories(self) -> None:
444 """Create configuration directories if they don't exist.
446 Creates runs_dir and lock_dir with parents=True.
447 Does not create claude_config_dir (managed by Claude SDK).
448 """
449 self.runs_dir.mkdir(parents=True, exist_ok=True)
450 self.lock_dir.mkdir(parents=True, exist_ok=True)
453@dataclass(frozen=True)
454class CLIOverrides:
455 """CLI override values that modify MalaConfig.
457 This represents the raw string values from CLI arguments that will be
458 parsed and merged with MalaConfig to produce a ResolvedConfig.
460 Attributes:
461 cerberus_spawn_args: Raw string of extra args for review-gate spawn.
462 cerberus_wait_args: Raw string of extra args for review-gate wait.
463 cerberus_env: Raw string of extra env vars (JSON or KEY=VALUE,KEY=VALUE).
464 review_timeout: Override for review timeout in seconds.
465 max_epic_verification_retries: Override for max epic verification retries.
466 no_braintrust: Whether --no-braintrust flag was passed.
467 disable_review: Whether 'review' is in --disable-validations.
468 """
470 cerberus_spawn_args: str | None = None
471 cerberus_wait_args: str | None = None
472 cerberus_env: str | None = None
473 review_timeout: int | None = None
474 max_epic_verification_retries: int | None = None
475 no_braintrust: bool = False
476 disable_review: bool = False
479@dataclass(frozen=True)
480class ResolvedConfig:
481 """Fully resolved configuration combining MalaConfig and CLI overrides.
483 This is the final configuration object used by the orchestrator. It contains
484 all fields from MalaConfig plus derived fields computed from the combination
485 of base config and CLI overrides.
487 Attributes:
488 runs_dir: Directory where run metadata files are stored.
489 lock_dir: Directory for file locks during parallel processing.
490 claude_config_dir: Claude SDK configuration directory.
491 braintrust_api_key: Braintrust API key for tracing.
492 braintrust_enabled: Whether Braintrust tracing is enabled.
493 review_enabled: Whether automated code review is enabled.
494 review_timeout: Timeout in seconds for review operations.
495 cerberus_bin_path: Path to cerberus bin/ directory.
496 cerberus_spawn_args: Parsed extra args for review-gate spawn.
497 cerberus_wait_args: Parsed extra args for review-gate wait.
498 cerberus_env: Parsed extra environment variables for review-gate.
499 track_review_issues: Whether to create beads issues for P2/P3.
500 llm_api_key: API key for LLM calls.
501 llm_base_url: Base URL for LLM API.
502 max_epic_verification_retries: Maximum retries for epic verification loop.
503 braintrust_disabled_reason: Reason braintrust is disabled, if applicable.
504 """
506 # Paths
507 runs_dir: Path
508 lock_dir: Path
509 claude_config_dir: Path
511 # API keys
512 braintrust_api_key: str | None
514 # Feature flags
515 braintrust_enabled: bool
517 # Review settings
518 review_enabled: bool
519 review_timeout: int
520 cerberus_bin_path: Path | None
521 cerberus_spawn_args: tuple[str, ...]
522 cerberus_wait_args: tuple[str, ...]
523 cerberus_env: tuple[tuple[str, str], ...]
524 track_review_issues: bool
526 # LLM configuration
527 llm_api_key: str | None
528 llm_base_url: str | None
530 # Epic verification
531 max_epic_verification_retries: int
533 # Derived disabled reasons
534 braintrust_disabled_reason: str | None
537def build_resolved_config(
538 base_config: MalaConfig,
539 cli_overrides: CLIOverrides | None = None,
540) -> ResolvedConfig:
541 """Build a ResolvedConfig by merging MalaConfig with CLI overrides.
543 Takes a base MalaConfig (typically from environment) and applies CLI
544 overrides, parsing string values and computing derived fields.
546 Args:
547 base_config: Base configuration from MalaConfig.from_env() or constructed.
548 cli_overrides: Optional CLI overrides to apply on top of base config.
550 Returns:
551 A frozen ResolvedConfig with all values resolved and derived fields computed.
553 Raises:
554 ValueError: If CLI override values cannot be parsed.
556 Example:
557 config = MalaConfig.from_env()
558 overrides = CLIOverrides(
559 cerberus_spawn_args="--mode fast",
560 )
561 resolved = build_resolved_config(config, overrides)
562 """
563 overrides = cli_overrides or CLIOverrides()
565 # Parse CLI override strings, falling back to base config values
566 if overrides.cerberus_spawn_args is not None:
567 spawn_args = tuple(
568 parse_cerberus_args(overrides.cerberus_spawn_args, source="CLI")
569 )
570 else:
571 spawn_args = base_config.cerberus_spawn_args
573 if overrides.cerberus_wait_args is not None:
574 wait_args = tuple(
575 parse_cerberus_args(overrides.cerberus_wait_args, source="CLI")
576 )
577 else:
578 wait_args = base_config.cerberus_wait_args
580 if overrides.cerberus_env is not None:
581 env = _normalize_cerberus_env(
582 parse_cerberus_env(overrides.cerberus_env, source="CLI")
583 )
584 else:
585 env = base_config.cerberus_env
587 # Apply timeout override
588 review_timeout = (
589 overrides.review_timeout
590 if overrides.review_timeout is not None
591 else base_config.review_timeout
592 )
594 # Apply max_epic_verification_retries override
595 max_epic_verification_retries = (
596 overrides.max_epic_verification_retries
597 if overrides.max_epic_verification_retries is not None
598 else base_config.max_epic_verification_retries
599 )
601 # Determine if features are enabled after CLI overrides
602 braintrust_enabled = base_config.braintrust_enabled and not overrides.no_braintrust
603 review_enabled = base_config.review_enabled and not overrides.disable_review
605 # Compute disabled reasons
606 braintrust_disabled_reason: str | None = None
607 if not braintrust_enabled:
608 if overrides.no_braintrust:
609 braintrust_disabled_reason = "--no-braintrust"
610 elif not base_config.braintrust_api_key:
611 braintrust_disabled_reason = (
612 f"add BRAINTRUST_API_KEY to {USER_CONFIG_DIR}/.env"
613 )
614 else:
615 braintrust_disabled_reason = "disabled by config"
617 return ResolvedConfig(
618 runs_dir=base_config.runs_dir,
619 lock_dir=base_config.lock_dir,
620 claude_config_dir=base_config.claude_config_dir,
621 braintrust_api_key=base_config.braintrust_api_key,
622 braintrust_enabled=braintrust_enabled,
623 review_enabled=review_enabled,
624 review_timeout=review_timeout,
625 cerberus_bin_path=base_config.cerberus_bin_path,
626 cerberus_spawn_args=spawn_args,
627 cerberus_wait_args=wait_args,
628 cerberus_env=env,
629 track_review_issues=base_config.track_review_issues,
630 llm_api_key=base_config.llm_api_key,
631 llm_base_url=base_config.llm_base_url,
632 max_epic_verification_retries=max_epic_verification_retries,
633 braintrust_disabled_reason=braintrust_disabled_reason,
634 )