Coverage for src / dataknobs_llm / prompts / builders / prompt_builder.py: 15%

101 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-15 10:28 -0700

1"""Synchronous prompt builder for constructing prompts with parameter resolution and RAG. 

2 

3This module provides the PromptBuilder class which coordinates between: 

4- Prompt libraries (template sources) 

5- Resource adapters (data sources) 

6- Template renderer (rendering engine) 

7 

8The builder handles: 

9- Parameter resolution from multiple sources 

10- RAG content retrieval and injection 

11- Validation enforcement 

12- Template defaults merging 

13""" 

14 

15import logging 

16from typing import Any, Dict 

17 

18from ..base import ( 

19 AbstractPromptLibrary, 

20 PromptTemplateDict, 

21 RAGConfig, 

22 ValidationLevel, 

23 RenderResult, 

24) 

25from ..adapters import ResourceAdapter 

26from .base_prompt_builder import BasePromptBuilder 

27 

28logger = logging.getLogger(__name__) 

29 

30 

31class PromptBuilder(BasePromptBuilder): 

32 """Synchronous prompt builder for constructing prompts with RAG and validation. 

33 

34 This class provides a high-level API for building prompts by: 

35 1. Retrieving prompt templates from a library 

36 2. Resolving parameters from adapters and runtime values 

37 3. Executing RAG searches via adapters 

38 4. Injecting RAG content into templates 

39 5. Rendering final prompts with validation 

40 

41 Example: 

42 >>> library = ConfigPromptLibrary(config) 

43 >>> adapters = { 

44 ... 'config': DictResourceAdapter(config_data), 

45 ... 'docs': DataknobsBackendAdapter(docs_db) 

46 ... } 

47 >>> builder = PromptBuilder(library=library, adapters=adapters) 

48 >>> 

49 >>> # Render a system prompt 

50 >>> result = builder.render_system_prompt( 

51 ... 'analyze_code', 

52 ... params={'code': code_snippet, 'language': 'python'} 

53 ... ) 

54 """ 

55 

56 def __init__( 

57 self, 

58 library: AbstractPromptLibrary, 

59 adapters: Dict[str, ResourceAdapter] | None = None, 

60 default_validation: ValidationLevel = ValidationLevel.WARN, 

61 raise_on_rag_error: bool = False 

62 ): 

63 """Initialize the synchronous prompt builder. 

64 

65 Args: 

66 library: Prompt library to retrieve templates from 

67 adapters: Dictionary of named resource adapters for parameter 

68 resolution and RAG searches 

69 default_validation: Default validation level for templates without 

70 explicit validation configuration 

71 raise_on_rag_error: If True, raise exceptions on RAG failures; 

72 if False (default), log warning and continue 

73 

74 Raises: 

75 TypeError: If any adapter is async (use AsyncPromptBuilder instead) 

76 """ 

77 super().__init__(library, adapters, default_validation, raise_on_rag_error) 

78 self._validate_adapters() 

79 

80 def _validate_adapters(self) -> None: 

81 """Validate that all adapters are synchronous. 

82 

83 Raises: 

84 TypeError: If any adapter is async 

85 """ 

86 for name, adapter in self.adapters.items(): 

87 if adapter.is_async(): 

88 raise TypeError( 

89 f"Adapter '{name}' is async. " 

90 "Use AsyncPromptBuilder for async adapters." 

91 ) 

92 

93 def render_system_prompt( 

94 self, 

95 name: str, 

96 params: Dict[str, Any] | None = None, 

97 include_rag: bool = True, 

98 validation_override: ValidationLevel | None = None, 

99 return_rag_metadata: bool = False, 

100 cached_rag: Dict[str, Any] | None = None, 

101 **kwargs: Any 

102 ) -> RenderResult: 

103 """Render a system prompt with parameters and optional RAG content. 

104 

105 Args: 

106 name: System prompt identifier 

107 params: Runtime parameters to use in rendering 

108 include_rag: Whether to include RAG content (default: True) 

109 validation_override: Override validation level for this render 

110 return_rag_metadata: If True, attach RAG metadata to result 

111 cached_rag: If provided, use these cached RAG results instead 

112 of executing new searches 

113 **kwargs: Additional parameters passed to library 

114 

115 Returns: 

116 RenderResult with rendered content and metadata 

117 

118 Raises: 

119 ValueError: If prompt not found or validation fails 

120 

121 Example: 

122 >>> # Capture RAG metadata 

123 >>> result = builder.render_system_prompt( 

124 ... 'code_question', 

125 ... params={'language': 'python'}, 

126 ... return_rag_metadata=True 

127 ... ) 

128 >>> print(result.rag_metadata) 

129 >>> 

130 >>> # Reuse cached RAG 

131 >>> result2 = builder.render_system_prompt( 

132 ... 'code_question', 

133 ... params={'language': 'python'}, 

134 ... cached_rag=result.rag_metadata 

135 ... ) 

136 """ 

137 params = params or {} 

138 

139 # Retrieve template from library 

140 template_dict = self.library.get_system_prompt(name, **kwargs) 

141 if template_dict is None: 

142 raise ValueError(f"System prompt not found: {name}") 

143 

144 # Render the prompt 

145 return self._render_prompt_impl( 

146 prompt_name=name, 

147 prompt_type="system", 

148 template_dict=template_dict, 

149 runtime_params=params, 

150 include_rag=include_rag, 

151 validation_override=validation_override, 

152 return_rag_metadata=return_rag_metadata, 

153 cached_rag=cached_rag, 

154 **kwargs 

155 ) 

156 

157 def render_user_prompt( 

158 self, 

159 name: str, 

160 params: Dict[str, Any] | None = None, 

161 include_rag: bool = True, 

162 validation_override: ValidationLevel | None = None, 

163 return_rag_metadata: bool = False, 

164 cached_rag: Dict[str, Any] | None = None, 

165 **kwargs: Any 

166 ) -> RenderResult: 

167 """Render a user prompt with parameters and optional RAG content. 

168 

169 Args: 

170 name: User prompt identifier 

171 params: Runtime parameters to use in rendering 

172 include_rag: Whether to include RAG content (default: True) 

173 validation_override: Override validation level for this render 

174 return_rag_metadata: If True, attach RAG metadata to result 

175 cached_rag: If provided, use these cached RAG results instead 

176 of executing new searches 

177 **kwargs: Additional parameters passed to library 

178 

179 Returns: 

180 RenderResult with rendered content and metadata 

181 

182 Raises: 

183 ValueError: If prompt not found or validation fails 

184 """ 

185 params = params or {} 

186 

187 # Retrieve template from library 

188 template_dict = self.library.get_user_prompt(name, **kwargs) 

189 if template_dict is None: 

190 raise ValueError(f"User prompt not found: {name}") 

191 

192 # Render the prompt 

193 return self._render_prompt_impl( 

194 prompt_name=name, 

195 prompt_type="user", 

196 template_dict=template_dict, 

197 runtime_params=params, 

198 include_rag=include_rag, 

199 validation_override=validation_override, 

200 return_rag_metadata=return_rag_metadata, 

201 cached_rag=cached_rag, 

202 **kwargs 

203 ) 

204 

205 def _render_prompt_impl( 

206 self, 

207 prompt_name: str, 

208 prompt_type: str, 

209 template_dict: PromptTemplateDict, 

210 runtime_params: Dict[str, Any], 

211 include_rag: bool, 

212 validation_override: ValidationLevel | None, 

213 return_rag_metadata: bool = False, 

214 cached_rag: Dict[str, Any] | None = None, 

215 **kwargs: Any 

216 ) -> RenderResult: 

217 """Internal method to render a prompt template synchronously. 

218 

219 Args: 

220 prompt_name: Name of the prompt 

221 prompt_type: Type of prompt ("system" or "user") 

222 template_dict: Template dictionary from library 

223 runtime_params: Runtime parameters 

224 include_rag: Whether to include RAG content 

225 validation_override: Validation level override 

226 return_rag_metadata: If True, capture and return RAG metadata 

227 cached_rag: If provided, use these cached RAG results instead 

228 of executing new searches 

229 **kwargs: Additional parameters 

230 

231 Returns: 

232 RenderResult with rendered content and metadata 

233 """ 

234 # Extract template components 

235 template = template_dict.get("template", "") 

236 template_metadata = template_dict.get("metadata", {}) 

237 

238 # Step 1: Merge defaults with runtime params 

239 all_params = self._merge_params_with_defaults(template_dict, runtime_params) 

240 

241 # Step 2: Execute or reuse RAG searches 

242 rag_metadata = None 

243 if include_rag: 

244 if cached_rag: 

245 # Use cached RAG results 

246 rag_content = self._extract_formatted_content_from_cache(cached_rag) 

247 if return_rag_metadata: 

248 rag_metadata = cached_rag # Pass through cached metadata 

249 else: 

250 # Execute fresh RAG searches 

251 rag_content, rag_metadata = self._execute_rag_searches_impl( 

252 prompt_name=prompt_name, 

253 prompt_type=prompt_type, 

254 params=all_params, 

255 capture_metadata=return_rag_metadata, 

256 **kwargs 

257 ) 

258 

259 # Merge RAG content into parameters 

260 all_params.update(rag_content) 

261 

262 # Step 3: Prepare validation config with override 

263 validation_config = self._prepare_validation_config(template_dict, validation_override) 

264 

265 # Step 4: Render template with validation 

266 result = self._renderer.render( 

267 template=template, 

268 params=all_params, 

269 validation=validation_config, 

270 template_metadata=template_metadata 

271 ) 

272 

273 # Attach RAG metadata if requested 

274 if return_rag_metadata and rag_metadata: 

275 result.rag_metadata = rag_metadata 

276 

277 # Add builder metadata 

278 result.metadata.update({ 

279 "prompt_name": prompt_name, 

280 "prompt_type": prompt_type, 

281 "include_rag": include_rag, 

282 "used_cached_rag": cached_rag is not None, 

283 }) 

284 

285 return result 

286 

287 def _execute_rag_searches_impl( 

288 self, 

289 prompt_name: str, 

290 prompt_type: str, 

291 params: Dict[str, Any], 

292 capture_metadata: bool = False, 

293 **kwargs: Any 

294 ) -> tuple[Dict[str, str], Dict[str, Any] | None]: 

295 """Execute RAG searches and format results for injection. 

296 

297 Args: 

298 prompt_name: Name of the prompt 

299 prompt_type: Type of prompt ("system" or "user") 

300 params: Resolved parameters for query templating 

301 capture_metadata: If True, capture RAG metadata 

302 **kwargs: Additional parameters 

303 

304 Returns: 

305 Tuple of (rag_content, rag_metadata): 

306 - rag_content: Dictionary mapping placeholder names to formatted content 

307 - rag_metadata: Optional dict with full RAG details (if capture_metadata=True) 

308 """ 

309 # Get RAG configurations for this prompt 

310 rag_configs = self.library.get_prompt_rag_configs( 

311 prompt_name=prompt_name, 

312 prompt_type=prompt_type, 

313 **kwargs 

314 ) 

315 

316 if not rag_configs: 

317 return {}, None 

318 

319 rag_content = {} 

320 rag_metadata = {} if capture_metadata else None 

321 

322 for rag_config in rag_configs: 

323 placeholder = rag_config.get("placeholder", "RAG_CONTENT") 

324 

325 try: 

326 if capture_metadata: 

327 # Execute with metadata capture 

328 formatted_content, metadata = self._execute_single_rag_with_metadata( 

329 rag_config, params 

330 ) 

331 rag_content[placeholder] = formatted_content 

332 if metadata and rag_metadata is not None: 

333 rag_metadata[placeholder] = metadata 

334 else: 

335 # Execute without metadata (faster) 

336 content = self._execute_single_rag_search(rag_config, params) 

337 rag_content[placeholder] = content 

338 

339 except Exception as e: 

340 error_msg = f"RAG search failed for {prompt_name}: {e}" 

341 if self._raise_on_rag_error: 

342 raise RuntimeError(error_msg) from e 

343 else: 

344 logger.warning(error_msg) 

345 # Use empty content on failure 

346 rag_content[placeholder] = "" 

347 if capture_metadata and rag_metadata is not None: 

348 from datetime import datetime 

349 rag_metadata[placeholder] = { 

350 "error": str(e), 

351 "timestamp": datetime.now().isoformat() 

352 } 

353 

354 return rag_content, rag_metadata 

355 

356 def _execute_single_rag_search( 

357 self, 

358 rag_config: RAGConfig, 

359 params: Dict[str, Any] 

360 ) -> str: 

361 """Execute a single RAG search and format results. 

362 

363 Args: 

364 rag_config: RAG configuration 

365 params: Parameters for query templating 

366 

367 Returns: 

368 Formatted RAG content string 

369 

370 Raises: 

371 KeyError: If adapter not found 

372 Exception: If search fails 

373 """ 

374 # Get adapter 

375 adapter_name = rag_config.get("adapter_name") 

376 if not adapter_name: 

377 raise ValueError("RAG config missing 'adapter_name'") 

378 

379 if adapter_name not in self.adapters: 

380 raise KeyError( 

381 f"Adapter '{adapter_name}' not found. " 

382 f"Available adapters: {list(self.adapters.keys())}" 

383 ) 

384 

385 adapter = self.adapters[adapter_name] 

386 

387 # Render query template 

388 query_template = rag_config.get("query", "") 

389 query = self._render_rag_query(query_template, params) 

390 

391 # Execute search (synchronous) 

392 k = rag_config.get("k", 5) 

393 filters = rag_config.get("filters") 

394 search_results = adapter.search(query=query, k=k, filters=filters) 

395 

396 # Format results 

397 formatted_content = self._format_rag_results( 

398 results=search_results, 

399 rag_config=rag_config, 

400 params=params 

401 ) 

402 

403 return formatted_content 

404 

405 def _execute_single_rag_with_metadata( 

406 self, 

407 rag_config: RAGConfig, 

408 params: Dict[str, Any] 

409 ) -> tuple[str, Dict[str, Any]]: 

410 """Execute a single RAG search with metadata capture. 

411 

412 This method executes a RAG search and captures detailed metadata 

413 including the query, results, and query hash for caching. 

414 

415 Args: 

416 rag_config: RAG configuration 

417 params: Parameters for query templating 

418 

419 Returns: 

420 Tuple of (formatted_content, metadata): 

421 - formatted_content: Formatted RAG content string 

422 - metadata: Dictionary with RAG metadata including: 

423 - adapter_name: Name of the adapter used 

424 - query: Rendered query string 

425 - query_hash: SHA256 hash for cache matching 

426 - k: Number of results requested 

427 - filters: Filters applied to search 

428 - timestamp: ISO format timestamp 

429 - results: Raw search results 

430 - formatted_content: Formatted output 

431 - item_template: Template used for formatting 

432 - header: Header text used 

433 

434 Raises: 

435 KeyError: If adapter not found 

436 Exception: If search fails 

437 """ 

438 from datetime import datetime 

439 

440 # Get adapter 

441 adapter_name = rag_config.get("adapter_name") 

442 if not adapter_name: 

443 raise ValueError("RAG config missing 'adapter_name'") 

444 

445 if adapter_name not in self.adapters: 

446 raise KeyError( 

447 f"Adapter '{adapter_name}' not found. " 

448 f"Available adapters: {list(self.adapters.keys())}" 

449 ) 

450 

451 adapter = self.adapters[adapter_name] 

452 

453 # Render query template 

454 query_template = rag_config.get("query", "") 

455 query = self._render_rag_query(query_template, params) 

456 

457 # Compute query hash for cache matching 

458 query_hash = self._compute_rag_query_hash(adapter_name, query) 

459 

460 # Execute search (synchronous) 

461 k = rag_config.get("k", 5) 

462 filters = rag_config.get("filters") 

463 search_results = adapter.search(query=query, k=k, filters=filters) 

464 

465 # Format results 

466 formatted_content = self._format_rag_results( 

467 results=search_results, 

468 rag_config=rag_config, 

469 params=params 

470 ) 

471 

472 # Build metadata 

473 metadata = { 

474 "adapter_name": adapter_name, 

475 "query": query, 

476 "query_hash": query_hash, 

477 "k": k, 

478 "filters": filters, 

479 "timestamp": datetime.now().isoformat(), 

480 "results": search_results, # Store raw results 

481 "formatted_content": formatted_content, 

482 "item_template": rag_config.get("item_template"), 

483 "header": rag_config.get("header"), 

484 } 

485 

486 return formatted_content, metadata