Coverage for src / domain / validation / e2e.py: 50%

119 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-01-04 04:43 +0000

1"""E2E fixture runner for mala validation. 

2 

3This module provides end-to-end validation using a fixture repository. 

4It creates a temporary repo with a known bug, runs mala to fix it, 

5and validates the result. 

6 

7Key types: 

8- E2EResult: Result of an E2E validation run 

9- E2EConfig: Configuration for E2E validation 

10- E2ERunner: Orchestrates the E2E validation flow 

11""" 

12 

13from __future__ import annotations 

14 

15import shutil 

16import tempfile 

17import time 

18import uuid 

19from dataclasses import dataclass, field 

20from enum import Enum 

21from pathlib import Path 

22from typing import TYPE_CHECKING 

23 

24from .helpers import ( 

25 annotate_issue, 

26 get_ready_issue_id, 

27 init_fixture_repo, 

28 write_fixture_repo, 

29) 

30 

31if TYPE_CHECKING: 

32 from collections.abc import Mapping 

33 

34 from src.core.protocols import CommandRunnerPort, EnvConfigPort 

35 

36 

37class E2EStatus(Enum): 

38 """Status of E2E validation.""" 

39 

40 PASSED = "passed" 

41 FAILED = "failed" 

42 SKIPPED = "skipped" # Skipped due to missing prerequisites 

43 

44 

45@dataclass 

46class E2EPrereqResult: 

47 """Result of prerequisite check. 

48 

49 Attributes: 

50 ok: Whether all prerequisites are met. 

51 missing: List of missing prerequisites. 

52 can_skip: Whether E2E can be skipped rather than failed. 

53 """ 

54 

55 ok: bool 

56 missing: list[str] = field(default_factory=list) 

57 can_skip: bool = False 

58 

59 def failure_reason(self) -> str | None: 

60 """Return failure reason string, or None if ok.""" 

61 if self.ok: 

62 return None 

63 if not self.missing: 

64 return "E2E prerequisites not met" 

65 return f"E2E prereq missing: {', '.join(self.missing)}" 

66 

67 

68@dataclass 

69class E2EResult: 

70 """Result of an E2E validation run. 

71 

72 Attributes: 

73 passed: Whether E2E validation passed. 

74 status: Status code for the E2E run. 

75 failure_reason: Explanation for failure (None if passed). 

76 fixture_path: Path to the fixture repo (if created). 

77 duration_seconds: How long the E2E run took. 

78 command_output: Output from the mala command (truncated). 

79 returncode: Exit code from the mala command. 

80 """ 

81 

82 passed: bool 

83 status: E2EStatus 

84 failure_reason: str | None = None 

85 fixture_path: Path | None = None 

86 duration_seconds: float = 0.0 

87 command_output: str = "" 

88 returncode: int = 0 

89 

90 def short_summary(self) -> str: 

91 """One-line summary for logs/prompts.""" 

92 if self.status == E2EStatus.SKIPPED: 

93 return f"E2E skipped: {self.failure_reason or 'prerequisites not met'}" 

94 if self.passed: 

95 return "E2E passed" 

96 return f"E2E failed: {self.failure_reason or 'unknown error'}" 

97 

98 

99@dataclass 

100class E2EConfig: 

101 """Configuration for E2E validation. 

102 

103 Attributes: 

104 enabled: Whether E2E is enabled. 

105 skip_if_no_keys: Deprecated, kept for backward compatibility. 

106 keep_fixture: Keep fixture repo after completion (for debugging). 

107 timeout_seconds: Timeout for the mala run command (default 300s/5min). 

108 max_agents: Maximum agents for the mala run. 

109 max_issues: Maximum issues to process in the mala run. 

110 cerberus_mode: Cerberus review mode (fast/smart/max). Default "fast" for E2E. 

111 """ 

112 

113 enabled: bool = True 

114 skip_if_no_keys: bool = False 

115 keep_fixture: bool = False 

116 timeout_seconds: float = 300.0 

117 max_agents: int = 1 

118 max_issues: int = 1 

119 cerberus_mode: str = "fast" 

120 

121 

122class E2ERunner: 

123 """Orchestrates E2E validation using a fixture repository.""" 

124 

125 def __init__( 

126 self, 

127 env_config: EnvConfigPort, 

128 command_runner: CommandRunnerPort, 

129 config: E2EConfig | None = None, 

130 ): 

131 """Initialize the E2E runner. 

132 

133 Args: 

134 env_config: Environment configuration for finding cerberus bin path. 

135 command_runner: CommandRunnerPort for running mala commands. 

136 config: E2E configuration. Uses defaults if None. 

137 """ 

138 self.config = config or E2EConfig() 

139 self.env_config = env_config 

140 self._command_runner = command_runner 

141 

142 def check_prereqs(self, env: Mapping[str, str] | None = None) -> E2EPrereqResult: 

143 """Check if all E2E prerequisites are met. 

144 

145 Args: 

146 env: Environment variables to check. Uses os.environ if None. 

147 

148 Returns: 

149 E2EPrereqResult with details about missing prerequisites. 

150 """ 

151 import os 

152 

153 if env is None: 

154 env = os.environ 

155 

156 missing: list[str] = [] 

157 

158 # Check for mala CLI 

159 if not shutil.which("mala"): 

160 missing.append("mala CLI not found in PATH") 

161 

162 # Check for bd CLI 

163 if not shutil.which("bd"): 

164 missing.append("bd CLI not found in PATH") 

165 

166 # Check for Cerberus review-gate (required for E2E to test review flow) 

167 cerberus_bin = self.env_config.find_cerberus_bin_path() 

168 if cerberus_bin is None: 

169 missing.append( 

170 "Cerberus review-gate not installed (check ~/.claude/plugins)" 

171 ) 

172 elif not (cerberus_bin / "review-gate").exists(): 

173 missing.append(f"review-gate binary not found at {cerberus_bin}") 

174 

175 if missing: 

176 return E2EPrereqResult(ok=False, missing=missing, can_skip=False) 

177 

178 return E2EPrereqResult(ok=True) 

179 

180 def run( 

181 self, env: Mapping[str, str] | None = None, cwd: Path | None = None 

182 ) -> E2EResult: 

183 """Run E2E validation. 

184 

185 Creates a fixture repo, runs mala on it, and validates the result. 

186 Cleans up the fixture repo unless keep_fixture is True. 

187 

188 Args: 

189 env: Environment variables for subprocess. Uses os.environ if None. 

190 cwd: Working directory for mala command. Uses current directory if None. 

191 

192 Returns: 

193 E2EResult with details about the validation. 

194 """ 

195 import os 

196 

197 if env is None: 

198 env = dict(os.environ) 

199 else: 

200 env = dict(env) 

201 

202 if cwd is None: 

203 cwd = Path.cwd() 

204 

205 # Check prerequisites 

206 prereq = self.check_prereqs(env) 

207 if not prereq.ok: 

208 if prereq.can_skip: 

209 return E2EResult( 

210 passed=True, # Skipped is considered "not failed" 

211 status=E2EStatus.SKIPPED, 

212 failure_reason=prereq.failure_reason(), 

213 ) 

214 return E2EResult( 

215 passed=False, 

216 status=E2EStatus.FAILED, 

217 failure_reason=prereq.failure_reason(), 

218 ) 

219 

220 # Create fixture repo 

221 fixture_path = Path(tempfile.mkdtemp(prefix="mala-e2e-fixture-")) 

222 start_time = time.monotonic() 

223 

224 try: 

225 # Write fixture files 

226 setup_error = self._setup_fixture(fixture_path) 

227 if setup_error: 

228 duration = time.monotonic() - start_time 

229 return E2EResult( 

230 passed=False, 

231 status=E2EStatus.FAILED, 

232 failure_reason=setup_error, 

233 fixture_path=fixture_path if self.config.keep_fixture else None, 

234 duration_seconds=duration, 

235 ) 

236 

237 # Run mala 

238 result = self._run_mala(fixture_path, env, cwd) 

239 result.fixture_path = fixture_path if self.config.keep_fixture else None 

240 

241 return result 

242 

243 finally: 

244 duration = time.monotonic() - start_time 

245 # Cleanup fixture unless keeping it 

246 if not self.config.keep_fixture and fixture_path.exists(): 

247 shutil.rmtree(fixture_path, ignore_errors=True) 

248 

249 def _setup_fixture(self, repo_path: Path) -> str | None: 

250 """Set up the fixture repository. 

251 

252 Args: 

253 repo_path: Path to create the fixture repo in. 

254 

255 Returns: 

256 Error message if setup failed, None on success. 

257 """ 

258 # Write fixture files using shared helper 

259 write_fixture_repo(repo_path) 

260 

261 # Initialize git and beads using shared helper 

262 return init_fixture_repo(repo_path, self._command_runner) 

263 

264 def _run_mala( 

265 self, fixture_path: Path, env: Mapping[str, str], cwd: Path 

266 ) -> E2EResult: 

267 """Run mala on the fixture repo. 

268 

269 Args: 

270 fixture_path: Path to the fixture repository. 

271 env: Environment variables for subprocess. 

272 cwd: Working directory for the mala command. 

273 

274 Returns: 

275 E2EResult with command execution details. 

276 """ 

277 # Annotate the issue with context using shared helper 

278 issue_id = get_ready_issue_id(fixture_path, self._command_runner) 

279 if issue_id: 

280 annotate_issue(fixture_path, issue_id, self._command_runner) 

281 

282 # Override CLAUDE_SESSION_ID to avoid conflicts with parent session's review gate. 

283 # The Cerberus review-gate tracks pending reviews per session, so running e2e 

284 # inside an existing mala session (which already has a review gate active) would 

285 # fail with "Review gate already active" unless we use a distinct session ID. 

286 child_env = dict(env) 

287 child_env["CLAUDE_SESSION_ID"] = f"e2e-{uuid.uuid4()}" 

288 

289 # Convert timeout from seconds to minutes for CLI (which expects minutes) 

290 timeout_minutes = max(1, int(self.config.timeout_seconds // 60)) 

291 cmd = [ 

292 "mala", 

293 "run", 

294 str(fixture_path), 

295 "--max-agents", 

296 str(self.config.max_agents), 

297 "--max-issues", 

298 str(self.config.max_issues), 

299 "--timeout", 

300 str(timeout_minutes), 

301 "--disable-validations", 

302 "e2e", 

303 # Use fast mode for Cerberus to speed up E2E tests 

304 f"--cerberus-spawn-args=--mode={self.config.cerberus_mode}", 

305 ] 

306 

307 runner = self._command_runner 

308 result = runner.run(cmd, env=child_env, cwd=cwd) 

309 

310 if result.ok: 

311 return E2EResult( 

312 passed=True, 

313 status=E2EStatus.PASSED, 

314 duration_seconds=result.duration_seconds, 

315 command_output=result.stdout_tail(), 

316 returncode=0, 

317 ) 

318 

319 if result.timed_out: 

320 return E2EResult( 

321 passed=False, 

322 status=E2EStatus.FAILED, 

323 failure_reason=f"mala timed out after {self.config.timeout_seconds}s", 

324 duration_seconds=result.duration_seconds, 

325 command_output=result.stderr_tail() or result.stdout_tail(), 

326 returncode=124, 

327 ) 

328 

329 output = result.stderr_tail() or result.stdout_tail() 

330 return E2EResult( 

331 passed=False, 

332 status=E2EStatus.FAILED, 

333 failure_reason=f"mala exited {result.returncode}: {output}", 

334 duration_seconds=result.duration_seconds, 

335 command_output=output, 

336 returncode=result.returncode, 

337 ) 

338 

339 

340def check_e2e_prereqs( 

341 env_config: EnvConfigPort, 

342 command_runner: CommandRunnerPort, 

343 env: Mapping[str, str], 

344) -> str | None: 

345 """Check E2E prerequisites. 

346 

347 Args: 

348 env_config: Environment configuration for paths. 

349 command_runner: Command runner for executing commands. 

350 env: Environment variables to check. 

351 

352 Returns: 

353 Error message if prerequisites not met, None if all ok. 

354 """ 

355 runner = E2ERunner(env_config, command_runner) 

356 result = runner.check_prereqs(env) 

357 return result.failure_reason()