Coverage for src / core / log_events.py: 24%

156 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-01-04 04:43 +0000

1"""JSONL log event types for Claude Agent SDK schema contract. 

2 

3This module defines explicit types for the JSONL log format produced by 

4Claude Agent SDK. These types serve as a contract between mala and the SDK, 

5enabling validation and clearer parsing. 

6 

7Schema Overview: 

8 Log entries have a top-level "type" field that determines message direction: 

9 - "assistant": Messages from the assistant (tool_use, text blocks) 

10 - "user": Messages to the assistant (tool_result blocks) 

11 

12 Message content is a list of blocks, each with a "type" field: 

13 - "tool_use": Tool invocation with name, id, input 

14 - "tool_result": Tool output with tool_use_id, content, is_error 

15 - "text": Plain text content 

16 

17Example JSONL entries: 

18 

19 Assistant message with tool_use: 

20 {"type": "assistant", "message": {"content": [ 

21 {"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {"command": "ls"}} 

22 ]}} 

23 

24 User message with tool_result: 

25 {"type": "user", "message": {"content": [ 

26 {"type": "tool_result", "tool_use_id": "toolu_123", "content": "file.txt", "is_error": false} 

27 ]}} 

28 

29 Assistant message with text: 

30 {"type": "assistant", "message": {"content": [ 

31 {"type": "text", "text": "Here are the files..."} 

32 ]}} 

33 

34Parsing Modes: 

35 - parse_log_entry(): Lenient mode for production use. Returns None for 

36 unrecognized entries (forward compatibility). 

37 - parse_log_entry_strict(): Strict mode for testing/debugging. Raises 

38 LogParseError with detailed schema information on parse failures. 

39""" 

40 

41from __future__ import annotations 

42 

43from dataclasses import dataclass 

44from typing import Any 

45 

46 

47@dataclass(frozen=True) 

48class TextBlock: 

49 """A text content block in a message. 

50 

51 Attributes: 

52 text: The text content. 

53 """ 

54 

55 text: str 

56 

57 

58@dataclass(frozen=True) 

59class ToolUseBlock: 

60 """A tool_use block representing a tool invocation. 

61 

62 Attributes: 

63 id: Unique identifier for this tool use (used to correlate with tool_result). 

64 name: Name of the tool being invoked (e.g., "Bash", "Read", "Write"). 

65 input: Tool-specific input parameters (e.g., {"command": "ls"} for Bash). 

66 """ 

67 

68 id: str 

69 name: str 

70 input: dict[str, Any] 

71 

72 

73@dataclass(frozen=True) 

74class ToolResultBlock: 

75 """A tool_result block representing tool output. 

76 

77 Attributes: 

78 tool_use_id: ID of the tool_use this is a response to. 

79 content: The tool output content (usually a string, but can be structured). 

80 is_error: Whether the tool execution resulted in an error. 

81 """ 

82 

83 tool_use_id: str 

84 content: Any 

85 is_error: bool 

86 

87 

88# Type alias for content blocks 

89ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock 

90 

91 

92@dataclass(frozen=True) 

93class AssistantMessage: 

94 """An assistant message containing content blocks. 

95 

96 Attributes: 

97 content: List of content blocks (text, tool_use). 

98 """ 

99 

100 content: list[ContentBlock] 

101 

102 

103@dataclass(frozen=True) 

104class UserMessage: 

105 """A user message containing content blocks. 

106 

107 Attributes: 

108 content: List of content blocks (typically tool_result). 

109 """ 

110 

111 content: list[ContentBlock] 

112 

113 

114@dataclass(frozen=True) 

115class AssistantLogEntry: 

116 """A log entry from the assistant. 

117 

118 Attributes: 

119 message: The assistant message. 

120 """ 

121 

122 message: AssistantMessage 

123 

124 

125@dataclass(frozen=True) 

126class UserLogEntry: 

127 """A log entry from the user (typically tool results). 

128 

129 Attributes: 

130 message: The user message. 

131 """ 

132 

133 message: UserMessage 

134 

135 

136# Type alias for all log entry types 

137LogEntry = AssistantLogEntry | UserLogEntry 

138 

139 

140# Expected schema description for error messages 

141_SCHEMA_DESCRIPTION = """ 

142Expected Claude Agent SDK JSONL schema: 

143 {"type": "assistant"|"user", "message": {"content": [<blocks>]}} 

144 

145Content block types: 

146 - {"type": "text", "text": "<string>"} 

147 - {"type": "tool_use", "id": "<string>", "name": "<string>", "input": {...}} 

148 - {"type": "tool_result", "tool_use_id": "<string>", "content": ..., "is_error": <bool>} 

149""".strip() 

150 

151 

152class LogParseError(Exception): 

153 """Error raised when log parsing fails with schema validation error. 

154 

155 Attributes: 

156 reason: Human-readable explanation of what was expected. 

157 data: The raw data that failed to parse. 

158 schema_hint: Reference to the expected schema format. 

159 """ 

160 

161 def __init__(self, reason: str, data: dict[str, Any] | None = None): 

162 self.reason = reason 

163 self.data = data 

164 self.schema_hint = _SCHEMA_DESCRIPTION 

165 super().__init__(f"Log parse error: {reason}\n\n{_SCHEMA_DESCRIPTION}") 

166 

167 

168def _parse_content_block(block: dict[str, Any]) -> ContentBlock | None: 

169 """Parse a content block from raw dict data. 

170 

171 Args: 

172 block: Raw dict data for a content block. 

173 

174 Returns: 

175 Parsed ContentBlock or None if the block type is unrecognized. 

176 Unknown block types are silently ignored for forward compatibility. 

177 """ 

178 if not isinstance(block, dict): 

179 return None 

180 

181 block_type = block.get("type") 

182 

183 if block_type == "text": 

184 text = block.get("text", "") 

185 if not isinstance(text, str): 

186 return None 

187 return TextBlock(text=text) 

188 

189 if block_type == "tool_use": 

190 tool_id = block.get("id", "") 

191 name = block.get("name", "") 

192 tool_input = block.get("input", {}) 

193 if not isinstance(tool_id, str) or not isinstance(name, str): 

194 return None 

195 if not isinstance(tool_input, dict): 

196 tool_input = {} 

197 return ToolUseBlock(id=tool_id, name=name, input=tool_input) 

198 

199 if block_type == "tool_result": 

200 tool_use_id = block.get("tool_use_id", "") 

201 content = block.get("content", "") 

202 is_error = block.get("is_error", False) 

203 if not isinstance(tool_use_id, str): 

204 return None 

205 # Reject non-bool is_error to avoid "false" -> True misclassification 

206 if not isinstance(is_error, bool): 

207 return None 

208 return ToolResultBlock( 

209 tool_use_id=tool_use_id, content=content, is_error=is_error 

210 ) 

211 

212 # Unknown block type - ignore for forward compatibility 

213 return None 

214 

215 

216def _parse_content_block_strict(block: dict[str, Any], index: int) -> ContentBlock: 

217 """Parse a content block in strict mode, raising on errors. 

218 

219 Args: 

220 block: Raw dict data for a content block. 

221 index: Index of this block in the content array (for error messages). 

222 

223 Returns: 

224 Parsed ContentBlock. 

225 

226 Raises: 

227 LogParseError: If the block cannot be parsed. 

228 """ 

229 if not isinstance(block, dict): 

230 raise LogParseError( 

231 f"Content block at index {index} must be a dict, got {type(block).__name__}", 

232 data={"block": block, "index": index}, 

233 ) 

234 

235 block_type = block.get("type") 

236 if block_type is None: 

237 raise LogParseError( 

238 f"Content block at index {index} missing required 'type' field", 

239 data={"block": block, "index": index}, 

240 ) 

241 

242 if block_type == "text": 

243 text = block.get("text") 

244 if text is None: 

245 raise LogParseError( 

246 f"Text block at index {index} missing required 'text' field", 

247 data={"block": block, "index": index}, 

248 ) 

249 if not isinstance(text, str): 

250 raise LogParseError( 

251 f"Text block at index {index} has invalid 'text' type: " 

252 f"expected str, got {type(text).__name__}", 

253 data={"block": block, "index": index}, 

254 ) 

255 return TextBlock(text=text) 

256 

257 if block_type == "tool_use": 

258 tool_id = block.get("id") 

259 name = block.get("name") 

260 tool_input = block.get("input", {}) 

261 # Require id and name fields in strict mode 

262 if tool_id is None: 

263 raise LogParseError( 

264 f"tool_use block at index {index} missing required 'id' field", 

265 data={"block": block, "index": index}, 

266 ) 

267 if not isinstance(tool_id, str): 

268 raise LogParseError( 

269 f"tool_use block at index {index} has invalid 'id' type: " 

270 f"expected str, got {type(tool_id).__name__}", 

271 data={"block": block, "index": index}, 

272 ) 

273 if name is None: 

274 raise LogParseError( 

275 f"tool_use block at index {index} missing required 'name' field", 

276 data={"block": block, "index": index}, 

277 ) 

278 if not isinstance(name, str): 

279 raise LogParseError( 

280 f"tool_use block at index {index} has invalid 'name' type: " 

281 f"expected str, got {type(name).__name__}", 

282 data={"block": block, "index": index}, 

283 ) 

284 if not isinstance(tool_input, dict): 

285 raise LogParseError( 

286 f"tool_use block at index {index} has invalid 'input' type: " 

287 f"expected dict, got {type(tool_input).__name__}", 

288 data={"block": block, "index": index}, 

289 ) 

290 return ToolUseBlock(id=tool_id, name=name, input=tool_input) 

291 

292 if block_type == "tool_result": 

293 tool_use_id = block.get("tool_use_id") 

294 content = block.get("content", "") 

295 is_error = block.get("is_error", False) 

296 # Require tool_use_id field in strict mode 

297 if tool_use_id is None: 

298 raise LogParseError( 

299 f"tool_result block at index {index} missing required 'tool_use_id' field", 

300 data={"block": block, "index": index}, 

301 ) 

302 if not isinstance(tool_use_id, str): 

303 raise LogParseError( 

304 f"tool_result block at index {index} has invalid 'tool_use_id' type: " 

305 f"expected str, got {type(tool_use_id).__name__}", 

306 data={"block": block, "index": index}, 

307 ) 

308 # Require is_error to be a proper boolean in strict mode 

309 if not isinstance(is_error, bool): 

310 raise LogParseError( 

311 f"tool_result block at index {index} has invalid 'is_error' type: " 

312 f"expected bool, got {type(is_error).__name__}", 

313 data={"block": block, "index": index}, 

314 ) 

315 return ToolResultBlock( 

316 tool_use_id=tool_use_id, content=content, is_error=is_error 

317 ) 

318 

319 # Unknown block type - raise in strict mode 

320 raise LogParseError( 

321 f"Unknown content block type '{block_type}' at index {index}. " 

322 f"Expected: text, tool_use, or tool_result", 

323 data={"block": block, "index": index}, 

324 ) 

325 

326 

327def parse_log_entry(data: dict[str, Any]) -> LogEntry | None: 

328 """Parse a raw JSONL entry dict into a typed LogEntry (lenient mode). 

329 

330 This function validates the structure of JSONL log entries from Claude 

331 Agent SDK and returns typed objects. Unknown entry types or malformed 

332 entries return None (not an error) to support forward compatibility. 

333 

334 For strict parsing with detailed error messages, use parse_log_entry_strict(). 

335 

336 Args: 

337 data: Parsed JSON object from a JSONL line. 

338 

339 Returns: 

340 LogEntry (AssistantLogEntry or UserLogEntry) if the entry matches 

341 expected schema, None if the entry type is unrecognized or the 

342 structure is invalid. 

343 

344 Note: 

345 - Unknown fields are ignored (forward compatibility) 

346 - Unknown block types within content are skipped 

347 - Empty content arrays are valid 

348 

349 Example: 

350 >>> data = {"type": "assistant", "message": {"content": [ 

351 ... {"type": "text", "text": "Hello"} 

352 ... ]}} 

353 >>> entry = parse_log_entry(data) 

354 >>> isinstance(entry, AssistantLogEntry) 

355 True 

356 """ 

357 if not isinstance(data, dict): 

358 return None 

359 

360 entry_type = data.get("type") 

361 message_data = data.get("message") 

362 

363 # Also check for role-based messages (alternative format) 

364 # Some entries use message.role instead of top-level type 

365 if entry_type is None and isinstance(message_data, dict): 

366 entry_type = message_data.get("role") 

367 

368 if entry_type not in ("assistant", "user"): 

369 return None 

370 

371 if not isinstance(message_data, dict): 

372 return None 

373 

374 # Return None if content field is missing (required field) 

375 content_data = message_data.get("content") 

376 if content_data is None: 

377 return None 

378 if not isinstance(content_data, list): 

379 return None 

380 

381 # Parse content blocks, filtering out unrecognized ones 

382 content_blocks: list[ContentBlock] = [] 

383 for block_data in content_data: 

384 block = _parse_content_block(block_data) 

385 if block is not None: 

386 content_blocks.append(block) 

387 

388 if entry_type == "assistant": 

389 return AssistantLogEntry(message=AssistantMessage(content=content_blocks)) 

390 else: 

391 return UserLogEntry(message=UserMessage(content=content_blocks)) 

392 

393 

394def parse_log_entry_strict(data: dict[str, Any]) -> LogEntry: 

395 """Parse a raw JSONL entry dict into a typed LogEntry (strict mode). 

396 

397 Unlike parse_log_entry(), this function raises LogParseError with detailed 

398 schema information when parsing fails. Use this for testing, debugging, 

399 or when you need clear error messages about schema violations. 

400 

401 Args: 

402 data: Parsed JSON object from a JSONL line. 

403 

404 Returns: 

405 LogEntry (AssistantLogEntry or UserLogEntry). 

406 

407 Raises: 

408 LogParseError: If the entry doesn't match the expected schema. 

409 The error includes: 

410 - A specific reason explaining what was wrong 

411 - The problematic data 

412 - A reference to the expected schema format 

413 

414 Example: 

415 >>> data = {"type": "invalid"} 

416 >>> parse_log_entry_strict(data) 

417 Traceback (most recent call last): 

418 ... 

419 LogParseError: Log parse error: Entry type must be 'assistant' or 'user', got 'invalid' 

420 """ 

421 if not isinstance(data, dict): 

422 raise LogParseError( 

423 f"Entry must be a dict, got {type(data).__name__}", 

424 data=None, 

425 ) 

426 

427 entry_type = data.get("type") 

428 message_data = data.get("message") 

429 

430 # Also check for role-based messages (alternative format) 

431 if entry_type is None and isinstance(message_data, dict): 

432 entry_type = message_data.get("role") 

433 

434 if entry_type is None: 

435 raise LogParseError( 

436 "Entry missing required 'type' field. " 

437 "Expected top-level 'type' or 'message.role'", 

438 data=data, 

439 ) 

440 

441 if entry_type not in ("assistant", "user"): 

442 raise LogParseError( 

443 f"Entry type must be 'assistant' or 'user', got '{entry_type}'", 

444 data=data, 

445 ) 

446 

447 if message_data is None: 

448 raise LogParseError( 

449 "Entry missing required 'message' field", 

450 data=data, 

451 ) 

452 

453 if not isinstance(message_data, dict): 

454 raise LogParseError( 

455 f"Entry 'message' must be a dict, got {type(message_data).__name__}", 

456 data=data, 

457 ) 

458 

459 content_data = message_data.get("content") 

460 if content_data is None: 

461 raise LogParseError( 

462 "Entry 'message' missing required 'content' field", 

463 data=data, 

464 ) 

465 

466 if not isinstance(content_data, list): 

467 raise LogParseError( 

468 f"Entry 'message.content' must be a list, got {type(content_data).__name__}", 

469 data=data, 

470 ) 

471 

472 # Parse content blocks in strict mode 

473 content_blocks: list[ContentBlock] = [] 

474 for i, block_data in enumerate(content_data): 

475 block = _parse_content_block_strict(block_data, i) 

476 content_blocks.append(block) 

477 

478 if entry_type == "assistant": 

479 return AssistantLogEntry(message=AssistantMessage(content=content_blocks)) 

480 else: 

481 return UserLogEntry(message=UserMessage(content=content_blocks))