Coverage for src / core / log_events.py: 24%
156 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-01-04 04:43 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-01-04 04:43 +0000
1"""JSONL log event types for Claude Agent SDK schema contract.
3This module defines explicit types for the JSONL log format produced by
4Claude Agent SDK. These types serve as a contract between mala and the SDK,
5enabling validation and clearer parsing.
7Schema Overview:
8 Log entries have a top-level "type" field that determines message direction:
9 - "assistant": Messages from the assistant (tool_use, text blocks)
10 - "user": Messages to the assistant (tool_result blocks)
12 Message content is a list of blocks, each with a "type" field:
13 - "tool_use": Tool invocation with name, id, input
14 - "tool_result": Tool output with tool_use_id, content, is_error
15 - "text": Plain text content
17Example JSONL entries:
19 Assistant message with tool_use:
20 {"type": "assistant", "message": {"content": [
21 {"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {"command": "ls"}}
22 ]}}
24 User message with tool_result:
25 {"type": "user", "message": {"content": [
26 {"type": "tool_result", "tool_use_id": "toolu_123", "content": "file.txt", "is_error": false}
27 ]}}
29 Assistant message with text:
30 {"type": "assistant", "message": {"content": [
31 {"type": "text", "text": "Here are the files..."}
32 ]}}
34Parsing Modes:
35 - parse_log_entry(): Lenient mode for production use. Returns None for
36 unrecognized entries (forward compatibility).
37 - parse_log_entry_strict(): Strict mode for testing/debugging. Raises
38 LogParseError with detailed schema information on parse failures.
39"""
41from __future__ import annotations
43from dataclasses import dataclass
44from typing import Any
47@dataclass(frozen=True)
48class TextBlock:
49 """A text content block in a message.
51 Attributes:
52 text: The text content.
53 """
55 text: str
58@dataclass(frozen=True)
59class ToolUseBlock:
60 """A tool_use block representing a tool invocation.
62 Attributes:
63 id: Unique identifier for this tool use (used to correlate with tool_result).
64 name: Name of the tool being invoked (e.g., "Bash", "Read", "Write").
65 input: Tool-specific input parameters (e.g., {"command": "ls"} for Bash).
66 """
68 id: str
69 name: str
70 input: dict[str, Any]
73@dataclass(frozen=True)
74class ToolResultBlock:
75 """A tool_result block representing tool output.
77 Attributes:
78 tool_use_id: ID of the tool_use this is a response to.
79 content: The tool output content (usually a string, but can be structured).
80 is_error: Whether the tool execution resulted in an error.
81 """
83 tool_use_id: str
84 content: Any
85 is_error: bool
88# Type alias for content blocks
89ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock
92@dataclass(frozen=True)
93class AssistantMessage:
94 """An assistant message containing content blocks.
96 Attributes:
97 content: List of content blocks (text, tool_use).
98 """
100 content: list[ContentBlock]
103@dataclass(frozen=True)
104class UserMessage:
105 """A user message containing content blocks.
107 Attributes:
108 content: List of content blocks (typically tool_result).
109 """
111 content: list[ContentBlock]
114@dataclass(frozen=True)
115class AssistantLogEntry:
116 """A log entry from the assistant.
118 Attributes:
119 message: The assistant message.
120 """
122 message: AssistantMessage
125@dataclass(frozen=True)
126class UserLogEntry:
127 """A log entry from the user (typically tool results).
129 Attributes:
130 message: The user message.
131 """
133 message: UserMessage
136# Type alias for all log entry types
137LogEntry = AssistantLogEntry | UserLogEntry
140# Expected schema description for error messages
141_SCHEMA_DESCRIPTION = """
142Expected Claude Agent SDK JSONL schema:
143 {"type": "assistant"|"user", "message": {"content": [<blocks>]}}
145Content block types:
146 - {"type": "text", "text": "<string>"}
147 - {"type": "tool_use", "id": "<string>", "name": "<string>", "input": {...}}
148 - {"type": "tool_result", "tool_use_id": "<string>", "content": ..., "is_error": <bool>}
149""".strip()
152class LogParseError(Exception):
153 """Error raised when log parsing fails with schema validation error.
155 Attributes:
156 reason: Human-readable explanation of what was expected.
157 data: The raw data that failed to parse.
158 schema_hint: Reference to the expected schema format.
159 """
161 def __init__(self, reason: str, data: dict[str, Any] | None = None):
162 self.reason = reason
163 self.data = data
164 self.schema_hint = _SCHEMA_DESCRIPTION
165 super().__init__(f"Log parse error: {reason}\n\n{_SCHEMA_DESCRIPTION}")
168def _parse_content_block(block: dict[str, Any]) -> ContentBlock | None:
169 """Parse a content block from raw dict data.
171 Args:
172 block: Raw dict data for a content block.
174 Returns:
175 Parsed ContentBlock or None if the block type is unrecognized.
176 Unknown block types are silently ignored for forward compatibility.
177 """
178 if not isinstance(block, dict):
179 return None
181 block_type = block.get("type")
183 if block_type == "text":
184 text = block.get("text", "")
185 if not isinstance(text, str):
186 return None
187 return TextBlock(text=text)
189 if block_type == "tool_use":
190 tool_id = block.get("id", "")
191 name = block.get("name", "")
192 tool_input = block.get("input", {})
193 if not isinstance(tool_id, str) or not isinstance(name, str):
194 return None
195 if not isinstance(tool_input, dict):
196 tool_input = {}
197 return ToolUseBlock(id=tool_id, name=name, input=tool_input)
199 if block_type == "tool_result":
200 tool_use_id = block.get("tool_use_id", "")
201 content = block.get("content", "")
202 is_error = block.get("is_error", False)
203 if not isinstance(tool_use_id, str):
204 return None
205 # Reject non-bool is_error to avoid "false" -> True misclassification
206 if not isinstance(is_error, bool):
207 return None
208 return ToolResultBlock(
209 tool_use_id=tool_use_id, content=content, is_error=is_error
210 )
212 # Unknown block type - ignore for forward compatibility
213 return None
216def _parse_content_block_strict(block: dict[str, Any], index: int) -> ContentBlock:
217 """Parse a content block in strict mode, raising on errors.
219 Args:
220 block: Raw dict data for a content block.
221 index: Index of this block in the content array (for error messages).
223 Returns:
224 Parsed ContentBlock.
226 Raises:
227 LogParseError: If the block cannot be parsed.
228 """
229 if not isinstance(block, dict):
230 raise LogParseError(
231 f"Content block at index {index} must be a dict, got {type(block).__name__}",
232 data={"block": block, "index": index},
233 )
235 block_type = block.get("type")
236 if block_type is None:
237 raise LogParseError(
238 f"Content block at index {index} missing required 'type' field",
239 data={"block": block, "index": index},
240 )
242 if block_type == "text":
243 text = block.get("text")
244 if text is None:
245 raise LogParseError(
246 f"Text block at index {index} missing required 'text' field",
247 data={"block": block, "index": index},
248 )
249 if not isinstance(text, str):
250 raise LogParseError(
251 f"Text block at index {index} has invalid 'text' type: "
252 f"expected str, got {type(text).__name__}",
253 data={"block": block, "index": index},
254 )
255 return TextBlock(text=text)
257 if block_type == "tool_use":
258 tool_id = block.get("id")
259 name = block.get("name")
260 tool_input = block.get("input", {})
261 # Require id and name fields in strict mode
262 if tool_id is None:
263 raise LogParseError(
264 f"tool_use block at index {index} missing required 'id' field",
265 data={"block": block, "index": index},
266 )
267 if not isinstance(tool_id, str):
268 raise LogParseError(
269 f"tool_use block at index {index} has invalid 'id' type: "
270 f"expected str, got {type(tool_id).__name__}",
271 data={"block": block, "index": index},
272 )
273 if name is None:
274 raise LogParseError(
275 f"tool_use block at index {index} missing required 'name' field",
276 data={"block": block, "index": index},
277 )
278 if not isinstance(name, str):
279 raise LogParseError(
280 f"tool_use block at index {index} has invalid 'name' type: "
281 f"expected str, got {type(name).__name__}",
282 data={"block": block, "index": index},
283 )
284 if not isinstance(tool_input, dict):
285 raise LogParseError(
286 f"tool_use block at index {index} has invalid 'input' type: "
287 f"expected dict, got {type(tool_input).__name__}",
288 data={"block": block, "index": index},
289 )
290 return ToolUseBlock(id=tool_id, name=name, input=tool_input)
292 if block_type == "tool_result":
293 tool_use_id = block.get("tool_use_id")
294 content = block.get("content", "")
295 is_error = block.get("is_error", False)
296 # Require tool_use_id field in strict mode
297 if tool_use_id is None:
298 raise LogParseError(
299 f"tool_result block at index {index} missing required 'tool_use_id' field",
300 data={"block": block, "index": index},
301 )
302 if not isinstance(tool_use_id, str):
303 raise LogParseError(
304 f"tool_result block at index {index} has invalid 'tool_use_id' type: "
305 f"expected str, got {type(tool_use_id).__name__}",
306 data={"block": block, "index": index},
307 )
308 # Require is_error to be a proper boolean in strict mode
309 if not isinstance(is_error, bool):
310 raise LogParseError(
311 f"tool_result block at index {index} has invalid 'is_error' type: "
312 f"expected bool, got {type(is_error).__name__}",
313 data={"block": block, "index": index},
314 )
315 return ToolResultBlock(
316 tool_use_id=tool_use_id, content=content, is_error=is_error
317 )
319 # Unknown block type - raise in strict mode
320 raise LogParseError(
321 f"Unknown content block type '{block_type}' at index {index}. "
322 f"Expected: text, tool_use, or tool_result",
323 data={"block": block, "index": index},
324 )
327def parse_log_entry(data: dict[str, Any]) -> LogEntry | None:
328 """Parse a raw JSONL entry dict into a typed LogEntry (lenient mode).
330 This function validates the structure of JSONL log entries from Claude
331 Agent SDK and returns typed objects. Unknown entry types or malformed
332 entries return None (not an error) to support forward compatibility.
334 For strict parsing with detailed error messages, use parse_log_entry_strict().
336 Args:
337 data: Parsed JSON object from a JSONL line.
339 Returns:
340 LogEntry (AssistantLogEntry or UserLogEntry) if the entry matches
341 expected schema, None if the entry type is unrecognized or the
342 structure is invalid.
344 Note:
345 - Unknown fields are ignored (forward compatibility)
346 - Unknown block types within content are skipped
347 - Empty content arrays are valid
349 Example:
350 >>> data = {"type": "assistant", "message": {"content": [
351 ... {"type": "text", "text": "Hello"}
352 ... ]}}
353 >>> entry = parse_log_entry(data)
354 >>> isinstance(entry, AssistantLogEntry)
355 True
356 """
357 if not isinstance(data, dict):
358 return None
360 entry_type = data.get("type")
361 message_data = data.get("message")
363 # Also check for role-based messages (alternative format)
364 # Some entries use message.role instead of top-level type
365 if entry_type is None and isinstance(message_data, dict):
366 entry_type = message_data.get("role")
368 if entry_type not in ("assistant", "user"):
369 return None
371 if not isinstance(message_data, dict):
372 return None
374 # Return None if content field is missing (required field)
375 content_data = message_data.get("content")
376 if content_data is None:
377 return None
378 if not isinstance(content_data, list):
379 return None
381 # Parse content blocks, filtering out unrecognized ones
382 content_blocks: list[ContentBlock] = []
383 for block_data in content_data:
384 block = _parse_content_block(block_data)
385 if block is not None:
386 content_blocks.append(block)
388 if entry_type == "assistant":
389 return AssistantLogEntry(message=AssistantMessage(content=content_blocks))
390 else:
391 return UserLogEntry(message=UserMessage(content=content_blocks))
394def parse_log_entry_strict(data: dict[str, Any]) -> LogEntry:
395 """Parse a raw JSONL entry dict into a typed LogEntry (strict mode).
397 Unlike parse_log_entry(), this function raises LogParseError with detailed
398 schema information when parsing fails. Use this for testing, debugging,
399 or when you need clear error messages about schema violations.
401 Args:
402 data: Parsed JSON object from a JSONL line.
404 Returns:
405 LogEntry (AssistantLogEntry or UserLogEntry).
407 Raises:
408 LogParseError: If the entry doesn't match the expected schema.
409 The error includes:
410 - A specific reason explaining what was wrong
411 - The problematic data
412 - A reference to the expected schema format
414 Example:
415 >>> data = {"type": "invalid"}
416 >>> parse_log_entry_strict(data)
417 Traceback (most recent call last):
418 ...
419 LogParseError: Log parse error: Entry type must be 'assistant' or 'user', got 'invalid'
420 """
421 if not isinstance(data, dict):
422 raise LogParseError(
423 f"Entry must be a dict, got {type(data).__name__}",
424 data=None,
425 )
427 entry_type = data.get("type")
428 message_data = data.get("message")
430 # Also check for role-based messages (alternative format)
431 if entry_type is None and isinstance(message_data, dict):
432 entry_type = message_data.get("role")
434 if entry_type is None:
435 raise LogParseError(
436 "Entry missing required 'type' field. "
437 "Expected top-level 'type' or 'message.role'",
438 data=data,
439 )
441 if entry_type not in ("assistant", "user"):
442 raise LogParseError(
443 f"Entry type must be 'assistant' or 'user', got '{entry_type}'",
444 data=data,
445 )
447 if message_data is None:
448 raise LogParseError(
449 "Entry missing required 'message' field",
450 data=data,
451 )
453 if not isinstance(message_data, dict):
454 raise LogParseError(
455 f"Entry 'message' must be a dict, got {type(message_data).__name__}",
456 data=data,
457 )
459 content_data = message_data.get("content")
460 if content_data is None:
461 raise LogParseError(
462 "Entry 'message' missing required 'content' field",
463 data=data,
464 )
466 if not isinstance(content_data, list):
467 raise LogParseError(
468 f"Entry 'message.content' must be a list, got {type(content_data).__name__}",
469 data=data,
470 )
472 # Parse content blocks in strict mode
473 content_blocks: list[ContentBlock] = []
474 for i, block_data in enumerate(content_data):
475 block = _parse_content_block_strict(block_data, i)
476 content_blocks.append(block)
478 if entry_type == "assistant":
479 return AssistantLogEntry(message=AssistantMessage(content=content_blocks))
480 else:
481 return UserLogEntry(message=UserMessage(content=content_blocks))