Coverage for src / domain / validation / e2e.py: 50%
119 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-01-04 04:43 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-01-04 04:43 +0000
1"""E2E fixture runner for mala validation.
3This module provides end-to-end validation using a fixture repository.
4It creates a temporary repo with a known bug, runs mala to fix it,
5and validates the result.
7Key types:
8- E2EResult: Result of an E2E validation run
9- E2EConfig: Configuration for E2E validation
10- E2ERunner: Orchestrates the E2E validation flow
11"""
13from __future__ import annotations
15import shutil
16import tempfile
17import time
18import uuid
19from dataclasses import dataclass, field
20from enum import Enum
21from pathlib import Path
22from typing import TYPE_CHECKING
24from .helpers import (
25 annotate_issue,
26 get_ready_issue_id,
27 init_fixture_repo,
28 write_fixture_repo,
29)
31if TYPE_CHECKING:
32 from collections.abc import Mapping
34 from src.core.protocols import CommandRunnerPort, EnvConfigPort
37class E2EStatus(Enum):
38 """Status of E2E validation."""
40 PASSED = "passed"
41 FAILED = "failed"
42 SKIPPED = "skipped" # Skipped due to missing prerequisites
45@dataclass
46class E2EPrereqResult:
47 """Result of prerequisite check.
49 Attributes:
50 ok: Whether all prerequisites are met.
51 missing: List of missing prerequisites.
52 can_skip: Whether E2E can be skipped rather than failed.
53 """
55 ok: bool
56 missing: list[str] = field(default_factory=list)
57 can_skip: bool = False
59 def failure_reason(self) -> str | None:
60 """Return failure reason string, or None if ok."""
61 if self.ok:
62 return None
63 if not self.missing:
64 return "E2E prerequisites not met"
65 return f"E2E prereq missing: {', '.join(self.missing)}"
68@dataclass
69class E2EResult:
70 """Result of an E2E validation run.
72 Attributes:
73 passed: Whether E2E validation passed.
74 status: Status code for the E2E run.
75 failure_reason: Explanation for failure (None if passed).
76 fixture_path: Path to the fixture repo (if created).
77 duration_seconds: How long the E2E run took.
78 command_output: Output from the mala command (truncated).
79 returncode: Exit code from the mala command.
80 """
82 passed: bool
83 status: E2EStatus
84 failure_reason: str | None = None
85 fixture_path: Path | None = None
86 duration_seconds: float = 0.0
87 command_output: str = ""
88 returncode: int = 0
90 def short_summary(self) -> str:
91 """One-line summary for logs/prompts."""
92 if self.status == E2EStatus.SKIPPED:
93 return f"E2E skipped: {self.failure_reason or 'prerequisites not met'}"
94 if self.passed:
95 return "E2E passed"
96 return f"E2E failed: {self.failure_reason or 'unknown error'}"
99@dataclass
100class E2EConfig:
101 """Configuration for E2E validation.
103 Attributes:
104 enabled: Whether E2E is enabled.
105 skip_if_no_keys: Deprecated, kept for backward compatibility.
106 keep_fixture: Keep fixture repo after completion (for debugging).
107 timeout_seconds: Timeout for the mala run command (default 300s/5min).
108 max_agents: Maximum agents for the mala run.
109 max_issues: Maximum issues to process in the mala run.
110 cerberus_mode: Cerberus review mode (fast/smart/max). Default "fast" for E2E.
111 """
113 enabled: bool = True
114 skip_if_no_keys: bool = False
115 keep_fixture: bool = False
116 timeout_seconds: float = 300.0
117 max_agents: int = 1
118 max_issues: int = 1
119 cerberus_mode: str = "fast"
122class E2ERunner:
123 """Orchestrates E2E validation using a fixture repository."""
125 def __init__(
126 self,
127 env_config: EnvConfigPort,
128 command_runner: CommandRunnerPort,
129 config: E2EConfig | None = None,
130 ):
131 """Initialize the E2E runner.
133 Args:
134 env_config: Environment configuration for finding cerberus bin path.
135 command_runner: CommandRunnerPort for running mala commands.
136 config: E2E configuration. Uses defaults if None.
137 """
138 self.config = config or E2EConfig()
139 self.env_config = env_config
140 self._command_runner = command_runner
142 def check_prereqs(self, env: Mapping[str, str] | None = None) -> E2EPrereqResult:
143 """Check if all E2E prerequisites are met.
145 Args:
146 env: Environment variables to check. Uses os.environ if None.
148 Returns:
149 E2EPrereqResult with details about missing prerequisites.
150 """
151 import os
153 if env is None:
154 env = os.environ
156 missing: list[str] = []
158 # Check for mala CLI
159 if not shutil.which("mala"):
160 missing.append("mala CLI not found in PATH")
162 # Check for bd CLI
163 if not shutil.which("bd"):
164 missing.append("bd CLI not found in PATH")
166 # Check for Cerberus review-gate (required for E2E to test review flow)
167 cerberus_bin = self.env_config.find_cerberus_bin_path()
168 if cerberus_bin is None:
169 missing.append(
170 "Cerberus review-gate not installed (check ~/.claude/plugins)"
171 )
172 elif not (cerberus_bin / "review-gate").exists():
173 missing.append(f"review-gate binary not found at {cerberus_bin}")
175 if missing:
176 return E2EPrereqResult(ok=False, missing=missing, can_skip=False)
178 return E2EPrereqResult(ok=True)
180 def run(
181 self, env: Mapping[str, str] | None = None, cwd: Path | None = None
182 ) -> E2EResult:
183 """Run E2E validation.
185 Creates a fixture repo, runs mala on it, and validates the result.
186 Cleans up the fixture repo unless keep_fixture is True.
188 Args:
189 env: Environment variables for subprocess. Uses os.environ if None.
190 cwd: Working directory for mala command. Uses current directory if None.
192 Returns:
193 E2EResult with details about the validation.
194 """
195 import os
197 if env is None:
198 env = dict(os.environ)
199 else:
200 env = dict(env)
202 if cwd is None:
203 cwd = Path.cwd()
205 # Check prerequisites
206 prereq = self.check_prereqs(env)
207 if not prereq.ok:
208 if prereq.can_skip:
209 return E2EResult(
210 passed=True, # Skipped is considered "not failed"
211 status=E2EStatus.SKIPPED,
212 failure_reason=prereq.failure_reason(),
213 )
214 return E2EResult(
215 passed=False,
216 status=E2EStatus.FAILED,
217 failure_reason=prereq.failure_reason(),
218 )
220 # Create fixture repo
221 fixture_path = Path(tempfile.mkdtemp(prefix="mala-e2e-fixture-"))
222 start_time = time.monotonic()
224 try:
225 # Write fixture files
226 setup_error = self._setup_fixture(fixture_path)
227 if setup_error:
228 duration = time.monotonic() - start_time
229 return E2EResult(
230 passed=False,
231 status=E2EStatus.FAILED,
232 failure_reason=setup_error,
233 fixture_path=fixture_path if self.config.keep_fixture else None,
234 duration_seconds=duration,
235 )
237 # Run mala
238 result = self._run_mala(fixture_path, env, cwd)
239 result.fixture_path = fixture_path if self.config.keep_fixture else None
241 return result
243 finally:
244 duration = time.monotonic() - start_time
245 # Cleanup fixture unless keeping it
246 if not self.config.keep_fixture and fixture_path.exists():
247 shutil.rmtree(fixture_path, ignore_errors=True)
249 def _setup_fixture(self, repo_path: Path) -> str | None:
250 """Set up the fixture repository.
252 Args:
253 repo_path: Path to create the fixture repo in.
255 Returns:
256 Error message if setup failed, None on success.
257 """
258 # Write fixture files using shared helper
259 write_fixture_repo(repo_path)
261 # Initialize git and beads using shared helper
262 return init_fixture_repo(repo_path, self._command_runner)
264 def _run_mala(
265 self, fixture_path: Path, env: Mapping[str, str], cwd: Path
266 ) -> E2EResult:
267 """Run mala on the fixture repo.
269 Args:
270 fixture_path: Path to the fixture repository.
271 env: Environment variables for subprocess.
272 cwd: Working directory for the mala command.
274 Returns:
275 E2EResult with command execution details.
276 """
277 # Annotate the issue with context using shared helper
278 issue_id = get_ready_issue_id(fixture_path, self._command_runner)
279 if issue_id:
280 annotate_issue(fixture_path, issue_id, self._command_runner)
282 # Override CLAUDE_SESSION_ID to avoid conflicts with parent session's review gate.
283 # The Cerberus review-gate tracks pending reviews per session, so running e2e
284 # inside an existing mala session (which already has a review gate active) would
285 # fail with "Review gate already active" unless we use a distinct session ID.
286 child_env = dict(env)
287 child_env["CLAUDE_SESSION_ID"] = f"e2e-{uuid.uuid4()}"
289 # Convert timeout from seconds to minutes for CLI (which expects minutes)
290 timeout_minutes = max(1, int(self.config.timeout_seconds // 60))
291 cmd = [
292 "mala",
293 "run",
294 str(fixture_path),
295 "--max-agents",
296 str(self.config.max_agents),
297 "--max-issues",
298 str(self.config.max_issues),
299 "--timeout",
300 str(timeout_minutes),
301 "--disable-validations",
302 "e2e",
303 # Use fast mode for Cerberus to speed up E2E tests
304 f"--cerberus-spawn-args=--mode={self.config.cerberus_mode}",
305 ]
307 runner = self._command_runner
308 result = runner.run(cmd, env=child_env, cwd=cwd)
310 if result.ok:
311 return E2EResult(
312 passed=True,
313 status=E2EStatus.PASSED,
314 duration_seconds=result.duration_seconds,
315 command_output=result.stdout_tail(),
316 returncode=0,
317 )
319 if result.timed_out:
320 return E2EResult(
321 passed=False,
322 status=E2EStatus.FAILED,
323 failure_reason=f"mala timed out after {self.config.timeout_seconds}s",
324 duration_seconds=result.duration_seconds,
325 command_output=result.stderr_tail() or result.stdout_tail(),
326 returncode=124,
327 )
329 output = result.stderr_tail() or result.stdout_tail()
330 return E2EResult(
331 passed=False,
332 status=E2EStatus.FAILED,
333 failure_reason=f"mala exited {result.returncode}: {output}",
334 duration_seconds=result.duration_seconds,
335 command_output=output,
336 returncode=result.returncode,
337 )
340def check_e2e_prereqs(
341 env_config: EnvConfigPort,
342 command_runner: CommandRunnerPort,
343 env: Mapping[str, str],
344) -> str | None:
345 """Check E2E prerequisites.
347 Args:
348 env_config: Environment configuration for paths.
349 command_runner: Command runner for executing commands.
350 env: Environment variables to check.
352 Returns:
353 Error message if prerequisites not met, None if all ok.
354 """
355 runner = E2ERunner(env_config, command_runner)
356 result = runner.check_prereqs(env)
357 return result.failure_reason()