Coverage for src / infra / hooks / deadlock.py: 84%

143 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-01-04 04:43 +0000

1"""Hooks for emitting lock events to the deadlock monitor. 

2 

3Provides: 

4- PreToolUse hook: Emits WAITING events when lock-wait.sh is invoked (real-time) 

5- PostToolUse hook: Emits ACQUIRED/RELEASED events after lock commands complete 

6 

7Captures lock command outcomes (lock-try.sh, lock-wait.sh, lock-release.sh) 

8and emits LockEvents for deadlock detection. 

9 

10Note: LockEvent and LockEventType are injected via parameters to avoid importing 

11from src.core.models, which would violate the "Hooks isolated" contract. 

12""" 

13 

14from __future__ import annotations 

15 

16import logging 

17import re 

18import time 

19from typing import TYPE_CHECKING, Any 

20 

21if TYPE_CHECKING: 

22 from collections.abc import Awaitable, Callable 

23 

24 from .dangerous_commands import PostToolUseHook, PreToolUseHook 

25 

26from src.infra.tools.locking import canonicalize_path 

27 

28logger = logging.getLogger(__name__) 

29 

30# Patterns for lock commands 

31# Capture quoted paths (double or single) OR unquoted paths (stop at shell operators) 

32# Unquoted: [^\s;&|]+ matches non-whitespace excluding shell operators ; & | 

33_PATH_PATTERN = r'"(?:[^"\\]|\\.)*"|\'(?:[^\'\\]|\\.)*\'|[^\s;&|]+' 

34_LOCK_TRY_PATTERN = re.compile(rf"lock-try\.sh\s+({_PATH_PATTERN})") 

35_LOCK_WAIT_PATTERN = re.compile(rf"lock-wait\.sh\s+({_PATH_PATTERN})") 

36_LOCK_RELEASE_PATTERN = re.compile(rf"lock-release\.sh\s+({_PATH_PATTERN})") 

37 

38 

39def _strip_quotes(path: str) -> str: 

40 """Remove surrounding shell quotes from a path.""" 

41 path = path.strip() 

42 if len(path) >= 2: 

43 if (path.startswith('"') and path.endswith('"')) or ( 

44 path.startswith("'") and path.endswith("'") 

45 ): 

46 return path[1:-1] 

47 return path 

48 

49 

50def _is_safe_batch_command(command: str) -> bool: 

51 """Check if a batched command is safe for emitting events for all matches. 

52 

53 A command is "safe" if it only uses && to chain commands, meaning 

54 success (exit 0) implies all commands ran successfully. Commands using 

55 ;, ||, |, or & operators are unsafe because: 

56 - ; runs commands regardless of previous exit codes 

57 - || short-circuits on success 

58 - | creates pipelines where exit code reflects last command 

59 - & runs commands in background 

60 

61 Args: 

62 command: The bash command string. 

63 

64 Returns: 

65 True if the command is safe for batch emission, False otherwise. 

66 """ 

67 # Check for unsafe operators outside of quoted strings 

68 # Simple heuristic: if any of ; || | & appear outside quotes, it's unsafe 

69 # Note: && is safe, so we need to distinguish || from && 

70 in_single_quote = False 

71 in_double_quote = False 

72 i = 0 

73 while i < len(command): 

74 char = command[i] 

75 

76 # Handle quote state changes 

77 if char == "'" and not in_double_quote: 

78 in_single_quote = not in_single_quote 

79 elif char == '"' and not in_single_quote: 

80 in_double_quote = not in_double_quote 

81 elif not in_single_quote and not in_double_quote: 

82 # Check for unsafe operators 

83 if char == ";": 

84 return False 

85 if char == "|": 

86 # Check if it's || (unsafe) or just | (unsafe) 

87 # Note: && is handled separately - it's safe 

88 if i + 1 < len(command) and command[i + 1] == "|": 

89 return False # || 

90 return False # single | 

91 if char == "&": 

92 # Check if it's && (safe), redirection (safe), or background (unsafe) 

93 if i + 1 < len(command) and command[i + 1] == "&": 

94 i += 1 # Skip the second &, && is safe 

95 elif i > 0 and command[i - 1] in "><": 

96 # Part of redirection: >&1, <&3 patterns 

97 # In 2>&1, the & follows >, not the digit 

98 pass 

99 elif i + 1 < len(command) and command[i + 1] == ">": 

100 # &> redirection (stdout+stderr to file) 

101 pass 

102 else: 

103 return False # single & (background) 

104 

105 i += 1 

106 

107 return True 

108 

109 

110def _extract_all_lock_paths(command: str) -> list[tuple[str, str]]: 

111 """Extract all lock commands from a bash command string. 

112 

113 Args: 

114 command: The bash command string (may contain multiple commands). 

115 

116 Returns: 

117 List of (command_type, file_path) tuples for each lock command found, 

118 sorted by position in the command string (preserving execution order). 

119 command_type is one of "try", "wait", "release". 

120 """ 

121 # Collect (position, command_type, file_path) tuples 

122 matches: list[tuple[int, str, str]] = [] 

123 

124 for match in _LOCK_TRY_PATTERN.finditer(command): 

125 matches.append((match.start(), "try", _strip_quotes(match.group(1)))) 

126 for match in _LOCK_WAIT_PATTERN.finditer(command): 

127 matches.append((match.start(), "wait", _strip_quotes(match.group(1)))) 

128 for match in _LOCK_RELEASE_PATTERN.finditer(command): 

129 matches.append((match.start(), "release", _strip_quotes(match.group(1)))) 

130 

131 # Sort by position to preserve execution order 

132 matches.sort(key=lambda x: x[0]) 

133 

134 return [(cmd_type, path) for _, cmd_type, path in matches] 

135 

136 

137def _extract_lock_path(command: str) -> tuple[str, str] | None: 

138 """Extract first lock command type and file path from a bash command. 

139 

140 Args: 

141 command: The bash command string. 

142 

143 Returns: 

144 Tuple of (command_type, file_path) if a lock command is found, 

145 None otherwise. command_type is one of "try", "wait", "release". 

146 """ 

147 results = _extract_all_lock_paths(command) 

148 return results[0] if results else None 

149 

150 

151def _get_exit_code(tool_result: str) -> int | None: 

152 """Extract exit code from tool result. 

153 

154 The SDK returns exit code in the tool result when a bash command completes. 

155 This function parses it from various possible formats. 

156 

157 Args: 

158 tool_result: The result string from the bash tool. 

159 

160 Returns: 

161 The exit code as an integer, or None if not found. 

162 """ 

163 # Check for explicit exit code patterns in tool result 

164 # Common format: "exit code: N" or "(exit N)" or just the exit code 

165 # Guard uses "exit code:" (no space after colon) to match regex which uses \s* 

166 if "exit code:" in tool_result.lower(): 

167 match = re.search(r"exit code:\s*(\d+)", tool_result, re.IGNORECASE) 

168 if match: 

169 return int(match.group(1)) 

170 

171 # For successful commands, tool_result often doesn't include exit code 

172 # We need to check the exit_code field from the hook input directly 

173 # This function may need adjustment based on actual SDK behavior 

174 return None 

175 

176 

177def make_lock_event_hook( 

178 agent_id: str, 

179 emit_event: Callable[[Any], Awaitable[object] | None], 

180 repo_namespace: str | None = None, 

181 *, 

182 lock_event_class: type[Any], 

183 lock_event_type_enum: type[Any], 

184) -> PostToolUseHook: 

185 """Create a PostToolUse hook that emits lock events. 

186 

187 Args: 

188 agent_id: The agent ID emitting events. 

189 emit_event: Callback to emit lock events. Can be sync or async. 

190 Return value is awaited if async, but discarded. 

191 repo_namespace: Optional repo root for path canonicalization. 

192 lock_event_class: The LockEvent class to instantiate. 

193 lock_event_type_enum: The LockEventType enum. 

194 

195 Returns: 

196 An async hook function for PostToolUse events. 

197 """ 

198 # Capture the types for use in the closure 

199 LockEvent = lock_event_class 

200 LockEventType = lock_event_type_enum 

201 

202 async def lock_event_hook( 

203 hook_input: Any, # noqa: ANN401 - SDK type, avoid import 

204 stderr: str | None, 

205 context: Any, # noqa: ANN401 - SDK type, avoid import 

206 ) -> dict[str, Any]: 

207 """PostToolUse hook to capture lock command outcomes.""" 

208 tool_name = hook_input["tool_name"] 

209 

210 # Only process bash tool calls 

211 if tool_name not in ("Bash", "bash"): 

212 return {} 

213 

214 # Get the command from tool input 

215 tool_input = hook_input.get("tool_input", {}) 

216 command = tool_input.get("command", "") 

217 if not command: 

218 return {} 

219 

220 # Extract all lock commands from the bash call 

221 lock_infos = _extract_all_lock_paths(command) 

222 if not lock_infos: 

223 return {} 

224 

225 # Get exit code from tool result 

226 tool_result = hook_input.get("tool_result", "") 

227 exit_code = hook_input.get("exit_code") 

228 

229 # If exit_code not in hook_input, try parsing from result 

230 if exit_code is None: 

231 exit_code = _get_exit_code(str(tool_result)) 

232 

233 # Handle error exit codes (2 = script error) 

234 if exit_code == 2: 

235 logger.warning( 

236 "Lock command error (exit code 2), command=%s", 

237 command, 

238 ) 

239 return {} 

240 

241 # Process lock commands found 

242 # For batched commands, we can only safely emit events for all if: 

243 # 1. Single command, or 

244 # 2. Commands are chained with && only (safe batch) 

245 # For unsafe batches (;, ||, |, &), only emit for the last command 

246 is_single_command = len(lock_infos) == 1 

247 is_safe_batch = is_single_command or _is_safe_batch_command(command) 

248 

249 # If unsafe batch, only process the last command (whose exit code we have) 

250 commands_to_process = lock_infos if is_safe_batch else lock_infos[-1:] 

251 logger.debug( 

252 "Batch safety: safe=%s, processing %d/%d commands", 

253 is_safe_batch, 

254 len(commands_to_process), 

255 len(lock_infos), 

256 ) 

257 

258 for cmd_type, raw_path in commands_to_process: 

259 # Canonicalize the path 

260 try: 

261 lock_path = canonicalize_path(raw_path, repo_namespace) 

262 except Exception: 

263 logger.warning("Failed to canonicalize lock path: %s", raw_path) 

264 continue 

265 

266 # Determine event type based on command and exit code 

267 event_type: Any = None 

268 

269 if cmd_type == "try": 

270 if exit_code == 0: 

271 event_type = LockEventType.ACQUIRED 

272 elif exit_code == 1 and is_single_command: 

273 # Only emit WAITING for single-command case 

274 # (for batched, we can't tell which command had contention) 

275 event_type = LockEventType.WAITING 

276 elif cmd_type == "wait": 

277 if exit_code == 0: 

278 event_type = LockEventType.ACQUIRED 

279 # exit_code 1 means timeout - no event (agent will retry or abort) 

280 elif cmd_type == "release": 

281 if exit_code == 0: 

282 event_type = LockEventType.RELEASED 

283 

284 if event_type is None: 

285 logger.debug( 

286 "Skipping event: cmd_type=%s exit_code=%s (no event type)", 

287 cmd_type, 

288 exit_code, 

289 ) 

290 continue 

291 

292 # Create and emit the event 

293 event = LockEvent( 

294 event_type=event_type, 

295 agent_id=agent_id, 

296 lock_path=lock_path, 

297 timestamp=time.time(), 

298 ) 

299 

300 # Call emit_event (may be sync or async) 

301 result = emit_event(event) 

302 if result is not None: 

303 # It's a coroutine, await it 

304 await result 

305 

306 logger.debug( 

307 "Lock event emitted: type=%s agent_id=%s lock_path=%s", 

308 event_type.value, 

309 agent_id, 

310 lock_path, 

311 ) 

312 

313 return {} 

314 

315 return lock_event_hook 

316 

317 

318def make_lock_wait_hook( 

319 agent_id: str, 

320 emit_event: Callable[[Any], Awaitable[object] | None], 

321 repo_namespace: str | None = None, 

322 *, 

323 lock_event_class: type[Any], 

324 lock_event_type_enum: type[Any], 

325) -> PreToolUseHook: 

326 """Create a PreToolUse hook that emits WAITING events for lock-wait.sh. 

327 

328 This hook enables real-time deadlock detection by emitting WAITING events 

329 BEFORE lock-wait.sh executes. Without this, deadlocks would never be 

330 detected because PostToolUse hooks only run after the tool completes, 

331 but lock-wait.sh blocks indefinitely when waiting for a lock. 

332 

333 Args: 

334 agent_id: The agent ID emitting events. 

335 emit_event: Callback to emit lock events. Can be sync or async. 

336 Return value is awaited if async, but discarded. 

337 repo_namespace: Optional repo root for path canonicalization. 

338 lock_event_class: The LockEvent class to instantiate. 

339 lock_event_type_enum: The LockEventType enum. 

340 

341 Returns: 

342 An async hook function for PreToolUse events. 

343 """ 

344 # Capture the types for use in the closure 

345 LockEvent = lock_event_class 

346 LockEventType = lock_event_type_enum 

347 

348 async def lock_wait_hook( 

349 hook_input: Any, # noqa: ANN401 - SDK type, avoid import 

350 stderr: str | None, 

351 context: Any, # noqa: ANN401 - SDK type, avoid import 

352 ) -> dict[str, Any]: 

353 """PreToolUse hook to emit WAITING events before lock-wait.sh runs.""" 

354 tool_name = hook_input["tool_name"] 

355 

356 # Only process bash tool calls 

357 if tool_name not in ("Bash", "bash"): 

358 return {} 

359 

360 # Get the command from tool input 

361 tool_input = hook_input.get("tool_input", {}) 

362 command = tool_input.get("command", "") 

363 if not command: 

364 return {} 

365 

366 # Look for lock-wait.sh commands 

367 wait_matches = list(_LOCK_WAIT_PATTERN.finditer(command)) 

368 if len(wait_matches) > 1: 

369 logger.warning( 

370 "Multiple lock-wait commands found; emitting %d waits (may overwrite graph)", 

371 len(wait_matches), 

372 ) 

373 for match in wait_matches: 

374 raw_path = _strip_quotes(match.group(1)) 

375 

376 # Canonicalize the path 

377 try: 

378 lock_path = canonicalize_path(raw_path, repo_namespace) 

379 except Exception: 

380 logger.warning("Failed to canonicalize lock path: %s", raw_path) 

381 continue 

382 

383 # Emit WAITING event before the command executes 

384 event = LockEvent( 

385 event_type=LockEventType.WAITING, 

386 agent_id=agent_id, 

387 lock_path=lock_path, 

388 timestamp=time.time(), 

389 ) 

390 

391 # Call emit_event (may be sync or async) 

392 result = emit_event(event) 

393 if result is not None: 

394 await result 

395 

396 # Always allow the command to proceed 

397 return {} 

398 

399 return lock_wait_hook