Coverage for src / infra / hooks / deadlock.py: 84%
143 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-01-04 04:43 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-01-04 04:43 +0000
1"""Hooks for emitting lock events to the deadlock monitor.
3Provides:
4- PreToolUse hook: Emits WAITING events when lock-wait.sh is invoked (real-time)
5- PostToolUse hook: Emits ACQUIRED/RELEASED events after lock commands complete
7Captures lock command outcomes (lock-try.sh, lock-wait.sh, lock-release.sh)
8and emits LockEvents for deadlock detection.
10Note: LockEvent and LockEventType are injected via parameters to avoid importing
11from src.core.models, which would violate the "Hooks isolated" contract.
12"""
14from __future__ import annotations
16import logging
17import re
18import time
19from typing import TYPE_CHECKING, Any
21if TYPE_CHECKING:
22 from collections.abc import Awaitable, Callable
24 from .dangerous_commands import PostToolUseHook, PreToolUseHook
26from src.infra.tools.locking import canonicalize_path
28logger = logging.getLogger(__name__)
30# Patterns for lock commands
31# Capture quoted paths (double or single) OR unquoted paths (stop at shell operators)
32# Unquoted: [^\s;&|]+ matches non-whitespace excluding shell operators ; & |
33_PATH_PATTERN = r'"(?:[^"\\]|\\.)*"|\'(?:[^\'\\]|\\.)*\'|[^\s;&|]+'
34_LOCK_TRY_PATTERN = re.compile(rf"lock-try\.sh\s+({_PATH_PATTERN})")
35_LOCK_WAIT_PATTERN = re.compile(rf"lock-wait\.sh\s+({_PATH_PATTERN})")
36_LOCK_RELEASE_PATTERN = re.compile(rf"lock-release\.sh\s+({_PATH_PATTERN})")
39def _strip_quotes(path: str) -> str:
40 """Remove surrounding shell quotes from a path."""
41 path = path.strip()
42 if len(path) >= 2:
43 if (path.startswith('"') and path.endswith('"')) or (
44 path.startswith("'") and path.endswith("'")
45 ):
46 return path[1:-1]
47 return path
50def _is_safe_batch_command(command: str) -> bool:
51 """Check if a batched command is safe for emitting events for all matches.
53 A command is "safe" if it only uses && to chain commands, meaning
54 success (exit 0) implies all commands ran successfully. Commands using
55 ;, ||, |, or & operators are unsafe because:
56 - ; runs commands regardless of previous exit codes
57 - || short-circuits on success
58 - | creates pipelines where exit code reflects last command
59 - & runs commands in background
61 Args:
62 command: The bash command string.
64 Returns:
65 True if the command is safe for batch emission, False otherwise.
66 """
67 # Check for unsafe operators outside of quoted strings
68 # Simple heuristic: if any of ; || | & appear outside quotes, it's unsafe
69 # Note: && is safe, so we need to distinguish || from &&
70 in_single_quote = False
71 in_double_quote = False
72 i = 0
73 while i < len(command):
74 char = command[i]
76 # Handle quote state changes
77 if char == "'" and not in_double_quote:
78 in_single_quote = not in_single_quote
79 elif char == '"' and not in_single_quote:
80 in_double_quote = not in_double_quote
81 elif not in_single_quote and not in_double_quote:
82 # Check for unsafe operators
83 if char == ";":
84 return False
85 if char == "|":
86 # Check if it's || (unsafe) or just | (unsafe)
87 # Note: && is handled separately - it's safe
88 if i + 1 < len(command) and command[i + 1] == "|":
89 return False # ||
90 return False # single |
91 if char == "&":
92 # Check if it's && (safe), redirection (safe), or background (unsafe)
93 if i + 1 < len(command) and command[i + 1] == "&":
94 i += 1 # Skip the second &, && is safe
95 elif i > 0 and command[i - 1] in "><":
96 # Part of redirection: >&1, <&3 patterns
97 # In 2>&1, the & follows >, not the digit
98 pass
99 elif i + 1 < len(command) and command[i + 1] == ">":
100 # &> redirection (stdout+stderr to file)
101 pass
102 else:
103 return False # single & (background)
105 i += 1
107 return True
110def _extract_all_lock_paths(command: str) -> list[tuple[str, str]]:
111 """Extract all lock commands from a bash command string.
113 Args:
114 command: The bash command string (may contain multiple commands).
116 Returns:
117 List of (command_type, file_path) tuples for each lock command found,
118 sorted by position in the command string (preserving execution order).
119 command_type is one of "try", "wait", "release".
120 """
121 # Collect (position, command_type, file_path) tuples
122 matches: list[tuple[int, str, str]] = []
124 for match in _LOCK_TRY_PATTERN.finditer(command):
125 matches.append((match.start(), "try", _strip_quotes(match.group(1))))
126 for match in _LOCK_WAIT_PATTERN.finditer(command):
127 matches.append((match.start(), "wait", _strip_quotes(match.group(1))))
128 for match in _LOCK_RELEASE_PATTERN.finditer(command):
129 matches.append((match.start(), "release", _strip_quotes(match.group(1))))
131 # Sort by position to preserve execution order
132 matches.sort(key=lambda x: x[0])
134 return [(cmd_type, path) for _, cmd_type, path in matches]
137def _extract_lock_path(command: str) -> tuple[str, str] | None:
138 """Extract first lock command type and file path from a bash command.
140 Args:
141 command: The bash command string.
143 Returns:
144 Tuple of (command_type, file_path) if a lock command is found,
145 None otherwise. command_type is one of "try", "wait", "release".
146 """
147 results = _extract_all_lock_paths(command)
148 return results[0] if results else None
151def _get_exit_code(tool_result: str) -> int | None:
152 """Extract exit code from tool result.
154 The SDK returns exit code in the tool result when a bash command completes.
155 This function parses it from various possible formats.
157 Args:
158 tool_result: The result string from the bash tool.
160 Returns:
161 The exit code as an integer, or None if not found.
162 """
163 # Check for explicit exit code patterns in tool result
164 # Common format: "exit code: N" or "(exit N)" or just the exit code
165 # Guard uses "exit code:" (no space after colon) to match regex which uses \s*
166 if "exit code:" in tool_result.lower():
167 match = re.search(r"exit code:\s*(\d+)", tool_result, re.IGNORECASE)
168 if match:
169 return int(match.group(1))
171 # For successful commands, tool_result often doesn't include exit code
172 # We need to check the exit_code field from the hook input directly
173 # This function may need adjustment based on actual SDK behavior
174 return None
177def make_lock_event_hook(
178 agent_id: str,
179 emit_event: Callable[[Any], Awaitable[object] | None],
180 repo_namespace: str | None = None,
181 *,
182 lock_event_class: type[Any],
183 lock_event_type_enum: type[Any],
184) -> PostToolUseHook:
185 """Create a PostToolUse hook that emits lock events.
187 Args:
188 agent_id: The agent ID emitting events.
189 emit_event: Callback to emit lock events. Can be sync or async.
190 Return value is awaited if async, but discarded.
191 repo_namespace: Optional repo root for path canonicalization.
192 lock_event_class: The LockEvent class to instantiate.
193 lock_event_type_enum: The LockEventType enum.
195 Returns:
196 An async hook function for PostToolUse events.
197 """
198 # Capture the types for use in the closure
199 LockEvent = lock_event_class
200 LockEventType = lock_event_type_enum
202 async def lock_event_hook(
203 hook_input: Any, # noqa: ANN401 - SDK type, avoid import
204 stderr: str | None,
205 context: Any, # noqa: ANN401 - SDK type, avoid import
206 ) -> dict[str, Any]:
207 """PostToolUse hook to capture lock command outcomes."""
208 tool_name = hook_input["tool_name"]
210 # Only process bash tool calls
211 if tool_name not in ("Bash", "bash"):
212 return {}
214 # Get the command from tool input
215 tool_input = hook_input.get("tool_input", {})
216 command = tool_input.get("command", "")
217 if not command:
218 return {}
220 # Extract all lock commands from the bash call
221 lock_infos = _extract_all_lock_paths(command)
222 if not lock_infos:
223 return {}
225 # Get exit code from tool result
226 tool_result = hook_input.get("tool_result", "")
227 exit_code = hook_input.get("exit_code")
229 # If exit_code not in hook_input, try parsing from result
230 if exit_code is None:
231 exit_code = _get_exit_code(str(tool_result))
233 # Handle error exit codes (2 = script error)
234 if exit_code == 2:
235 logger.warning(
236 "Lock command error (exit code 2), command=%s",
237 command,
238 )
239 return {}
241 # Process lock commands found
242 # For batched commands, we can only safely emit events for all if:
243 # 1. Single command, or
244 # 2. Commands are chained with && only (safe batch)
245 # For unsafe batches (;, ||, |, &), only emit for the last command
246 is_single_command = len(lock_infos) == 1
247 is_safe_batch = is_single_command or _is_safe_batch_command(command)
249 # If unsafe batch, only process the last command (whose exit code we have)
250 commands_to_process = lock_infos if is_safe_batch else lock_infos[-1:]
251 logger.debug(
252 "Batch safety: safe=%s, processing %d/%d commands",
253 is_safe_batch,
254 len(commands_to_process),
255 len(lock_infos),
256 )
258 for cmd_type, raw_path in commands_to_process:
259 # Canonicalize the path
260 try:
261 lock_path = canonicalize_path(raw_path, repo_namespace)
262 except Exception:
263 logger.warning("Failed to canonicalize lock path: %s", raw_path)
264 continue
266 # Determine event type based on command and exit code
267 event_type: Any = None
269 if cmd_type == "try":
270 if exit_code == 0:
271 event_type = LockEventType.ACQUIRED
272 elif exit_code == 1 and is_single_command:
273 # Only emit WAITING for single-command case
274 # (for batched, we can't tell which command had contention)
275 event_type = LockEventType.WAITING
276 elif cmd_type == "wait":
277 if exit_code == 0:
278 event_type = LockEventType.ACQUIRED
279 # exit_code 1 means timeout - no event (agent will retry or abort)
280 elif cmd_type == "release":
281 if exit_code == 0:
282 event_type = LockEventType.RELEASED
284 if event_type is None:
285 logger.debug(
286 "Skipping event: cmd_type=%s exit_code=%s (no event type)",
287 cmd_type,
288 exit_code,
289 )
290 continue
292 # Create and emit the event
293 event = LockEvent(
294 event_type=event_type,
295 agent_id=agent_id,
296 lock_path=lock_path,
297 timestamp=time.time(),
298 )
300 # Call emit_event (may be sync or async)
301 result = emit_event(event)
302 if result is not None:
303 # It's a coroutine, await it
304 await result
306 logger.debug(
307 "Lock event emitted: type=%s agent_id=%s lock_path=%s",
308 event_type.value,
309 agent_id,
310 lock_path,
311 )
313 return {}
315 return lock_event_hook
318def make_lock_wait_hook(
319 agent_id: str,
320 emit_event: Callable[[Any], Awaitable[object] | None],
321 repo_namespace: str | None = None,
322 *,
323 lock_event_class: type[Any],
324 lock_event_type_enum: type[Any],
325) -> PreToolUseHook:
326 """Create a PreToolUse hook that emits WAITING events for lock-wait.sh.
328 This hook enables real-time deadlock detection by emitting WAITING events
329 BEFORE lock-wait.sh executes. Without this, deadlocks would never be
330 detected because PostToolUse hooks only run after the tool completes,
331 but lock-wait.sh blocks indefinitely when waiting for a lock.
333 Args:
334 agent_id: The agent ID emitting events.
335 emit_event: Callback to emit lock events. Can be sync or async.
336 Return value is awaited if async, but discarded.
337 repo_namespace: Optional repo root for path canonicalization.
338 lock_event_class: The LockEvent class to instantiate.
339 lock_event_type_enum: The LockEventType enum.
341 Returns:
342 An async hook function for PreToolUse events.
343 """
344 # Capture the types for use in the closure
345 LockEvent = lock_event_class
346 LockEventType = lock_event_type_enum
348 async def lock_wait_hook(
349 hook_input: Any, # noqa: ANN401 - SDK type, avoid import
350 stderr: str | None,
351 context: Any, # noqa: ANN401 - SDK type, avoid import
352 ) -> dict[str, Any]:
353 """PreToolUse hook to emit WAITING events before lock-wait.sh runs."""
354 tool_name = hook_input["tool_name"]
356 # Only process bash tool calls
357 if tool_name not in ("Bash", "bash"):
358 return {}
360 # Get the command from tool input
361 tool_input = hook_input.get("tool_input", {})
362 command = tool_input.get("command", "")
363 if not command:
364 return {}
366 # Look for lock-wait.sh commands
367 wait_matches = list(_LOCK_WAIT_PATTERN.finditer(command))
368 if len(wait_matches) > 1:
369 logger.warning(
370 "Multiple lock-wait commands found; emitting %d waits (may overwrite graph)",
371 len(wait_matches),
372 )
373 for match in wait_matches:
374 raw_path = _strip_quotes(match.group(1))
376 # Canonicalize the path
377 try:
378 lock_path = canonicalize_path(raw_path, repo_namespace)
379 except Exception:
380 logger.warning("Failed to canonicalize lock path: %s", raw_path)
381 continue
383 # Emit WAITING event before the command executes
384 event = LockEvent(
385 event_type=LockEventType.WAITING,
386 agent_id=agent_id,
387 lock_path=lock_path,
388 timestamp=time.time(),
389 )
391 # Call emit_event (may be sync or async)
392 result = emit_event(event)
393 if result is not None:
394 await result
396 # Always allow the command to proceed
397 return {}
399 return lock_wait_hook