Coverage for src/dataknobs_llm/prompts/builders/prompt_builder.py: 15%

101 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-31 16:04 -0600

1"""Synchronous prompt builder for constructing prompts with parameter resolution and RAG. 

2 

3This module provides the PromptBuilder class which coordinates between: 

4- Prompt libraries (template sources) 

5- Resource adapters (data sources) 

6- Template renderer (rendering engine) 

7 

8The builder handles: 

9- Parameter resolution from multiple sources 

10- RAG content retrieval and injection 

11- Validation enforcement 

12- Template defaults merging 

13""" 

14 

15import logging 

16from typing import Any, Dict, List, Optional 

17 

18from ..base import ( 

19 PromptTemplate, 

20 RAGConfig, 

21 ValidationLevel, 

22 RenderResult, 

23) 

24from ..adapters import ResourceAdapter 

25from .base_prompt_builder import BasePromptBuilder 

26 

27logger = logging.getLogger(__name__) 

28 

29 

30class PromptBuilder(BasePromptBuilder): 

31 """Synchronous prompt builder for constructing prompts with RAG and validation. 

32 

33 This class provides a high-level API for building prompts by: 

34 1. Retrieving prompt templates from a library 

35 2. Resolving parameters from adapters and runtime values 

36 3. Executing RAG searches via adapters 

37 4. Injecting RAG content into templates 

38 5. Rendering final prompts with validation 

39 

40 Example: 

41 >>> library = ConfigPromptLibrary(config) 

42 >>> adapters = { 

43 ... 'config': DictResourceAdapter(config_data), 

44 ... 'docs': DataknobsBackendAdapter(docs_db) 

45 ... } 

46 >>> builder = PromptBuilder(library=library, adapters=adapters) 

47 >>> 

48 >>> # Render a system prompt 

49 >>> result = builder.render_system_prompt( 

50 ... 'analyze_code', 

51 ... params={'code': code_snippet, 'language': 'python'} 

52 ... ) 

53 """ 

54 

55 def __init__( 

56 self, 

57 library, 

58 adapters: Optional[Dict[str, ResourceAdapter]] = None, 

59 default_validation: ValidationLevel = ValidationLevel.WARN, 

60 raise_on_rag_error: bool = False 

61 ): 

62 """Initialize the synchronous prompt builder. 

63 

64 Args: 

65 library: Prompt library to retrieve templates from 

66 adapters: Dictionary of named resource adapters for parameter 

67 resolution and RAG searches 

68 default_validation: Default validation level for templates without 

69 explicit validation configuration 

70 raise_on_rag_error: If True, raise exceptions on RAG failures; 

71 if False (default), log warning and continue 

72 

73 Raises: 

74 TypeError: If any adapter is async (use AsyncPromptBuilder instead) 

75 """ 

76 super().__init__(library, adapters, default_validation, raise_on_rag_error) 

77 self._validate_adapters() 

78 

79 def _validate_adapters(self) -> None: 

80 """Validate that all adapters are synchronous. 

81 

82 Raises: 

83 TypeError: If any adapter is async 

84 """ 

85 for name, adapter in self.adapters.items(): 

86 if adapter.is_async(): 

87 raise TypeError( 

88 f"Adapter '{name}' is async. " 

89 "Use AsyncPromptBuilder for async adapters." 

90 ) 

91 

92 def render_system_prompt( 

93 self, 

94 name: str, 

95 params: Optional[Dict[str, Any]] = None, 

96 include_rag: bool = True, 

97 validation_override: Optional[ValidationLevel] = None, 

98 return_rag_metadata: bool = False, 

99 cached_rag: Optional[Dict[str, Any]] = None, 

100 **kwargs: Any 

101 ) -> RenderResult: 

102 """Render a system prompt with parameters and optional RAG content. 

103 

104 Args: 

105 name: System prompt identifier 

106 params: Runtime parameters to use in rendering 

107 include_rag: Whether to include RAG content (default: True) 

108 validation_override: Override validation level for this render 

109 return_rag_metadata: If True, attach RAG metadata to result 

110 cached_rag: If provided, use these cached RAG results instead 

111 of executing new searches 

112 **kwargs: Additional parameters passed to library 

113 

114 Returns: 

115 RenderResult with rendered content and metadata 

116 

117 Raises: 

118 ValueError: If prompt not found or validation fails 

119 

120 Example: 

121 >>> # Capture RAG metadata 

122 >>> result = builder.render_system_prompt( 

123 ... 'code_question', 

124 ... params={'language': 'python'}, 

125 ... return_rag_metadata=True 

126 ... ) 

127 >>> print(result.rag_metadata) 

128 >>> 

129 >>> # Reuse cached RAG 

130 >>> result2 = builder.render_system_prompt( 

131 ... 'code_question', 

132 ... params={'language': 'python'}, 

133 ... cached_rag=result.rag_metadata 

134 ... ) 

135 """ 

136 params = params or {} 

137 

138 # Retrieve template from library 

139 template_dict = self.library.get_system_prompt(name, **kwargs) 

140 if template_dict is None: 

141 raise ValueError(f"System prompt not found: {name}") 

142 

143 # Render the prompt 

144 return self._render_prompt_impl( 

145 prompt_name=name, 

146 prompt_type="system", 

147 template_dict=template_dict, 

148 runtime_params=params, 

149 include_rag=include_rag, 

150 validation_override=validation_override, 

151 return_rag_metadata=return_rag_metadata, 

152 cached_rag=cached_rag, 

153 **kwargs 

154 ) 

155 

156 def render_user_prompt( 

157 self, 

158 name: str, 

159 params: Optional[Dict[str, Any]] = None, 

160 include_rag: bool = True, 

161 validation_override: Optional[ValidationLevel] = None, 

162 return_rag_metadata: bool = False, 

163 cached_rag: Optional[Dict[str, Any]] = None, 

164 **kwargs: Any 

165 ) -> RenderResult: 

166 """Render a user prompt with parameters and optional RAG content. 

167 

168 Args: 

169 name: User prompt identifier 

170 params: Runtime parameters to use in rendering 

171 include_rag: Whether to include RAG content (default: True) 

172 validation_override: Override validation level for this render 

173 return_rag_metadata: If True, attach RAG metadata to result 

174 cached_rag: If provided, use these cached RAG results instead 

175 of executing new searches 

176 **kwargs: Additional parameters passed to library 

177 

178 Returns: 

179 RenderResult with rendered content and metadata 

180 

181 Raises: 

182 ValueError: If prompt not found or validation fails 

183 """ 

184 params = params or {} 

185 

186 # Retrieve template from library 

187 template_dict = self.library.get_user_prompt(name, **kwargs) 

188 if template_dict is None: 

189 raise ValueError(f"User prompt not found: {name}") 

190 

191 # Render the prompt 

192 return self._render_prompt_impl( 

193 prompt_name=name, 

194 prompt_type="user", 

195 template_dict=template_dict, 

196 runtime_params=params, 

197 include_rag=include_rag, 

198 validation_override=validation_override, 

199 return_rag_metadata=return_rag_metadata, 

200 cached_rag=cached_rag, 

201 **kwargs 

202 ) 

203 

204 def _render_prompt_impl( 

205 self, 

206 prompt_name: str, 

207 prompt_type: str, 

208 template_dict: PromptTemplate, 

209 runtime_params: Dict[str, Any], 

210 include_rag: bool, 

211 validation_override: Optional[ValidationLevel], 

212 return_rag_metadata: bool = False, 

213 cached_rag: Optional[Dict[str, Any]] = None, 

214 **kwargs: Any 

215 ) -> RenderResult: 

216 """Internal method to render a prompt template synchronously. 

217 

218 Args: 

219 prompt_name: Name of the prompt 

220 prompt_type: Type of prompt ("system" or "user") 

221 template_dict: Template dictionary from library 

222 runtime_params: Runtime parameters 

223 include_rag: Whether to include RAG content 

224 validation_override: Validation level override 

225 return_rag_metadata: If True, capture and return RAG metadata 

226 cached_rag: If provided, use these cached RAG results instead 

227 of executing new searches 

228 **kwargs: Additional parameters 

229 

230 Returns: 

231 RenderResult with rendered content and metadata 

232 """ 

233 # Extract template components 

234 template = template_dict.get("template", "") 

235 template_metadata = template_dict.get("metadata", {}) 

236 

237 # Step 1: Merge defaults with runtime params 

238 all_params = self._merge_params_with_defaults(template_dict, runtime_params) 

239 

240 # Step 2: Execute or reuse RAG searches 

241 rag_metadata = None 

242 if include_rag: 

243 if cached_rag: 

244 # Use cached RAG results 

245 rag_content = self._extract_formatted_content_from_cache(cached_rag) 

246 if return_rag_metadata: 

247 rag_metadata = cached_rag # Pass through cached metadata 

248 else: 

249 # Execute fresh RAG searches 

250 rag_content, rag_metadata = self._execute_rag_searches_impl( 

251 prompt_name=prompt_name, 

252 prompt_type=prompt_type, 

253 params=all_params, 

254 capture_metadata=return_rag_metadata, 

255 **kwargs 

256 ) 

257 

258 # Merge RAG content into parameters 

259 all_params.update(rag_content) 

260 

261 # Step 3: Prepare validation config with override 

262 validation_config = self._prepare_validation_config(template_dict, validation_override) 

263 

264 # Step 4: Render template with validation 

265 result = self._renderer.render( 

266 template=template, 

267 params=all_params, 

268 validation=validation_config, 

269 template_metadata=template_metadata 

270 ) 

271 

272 # Attach RAG metadata if requested 

273 if return_rag_metadata and rag_metadata: 

274 result.rag_metadata = rag_metadata 

275 

276 # Add builder metadata 

277 result.metadata.update({ 

278 "prompt_name": prompt_name, 

279 "prompt_type": prompt_type, 

280 "include_rag": include_rag, 

281 "used_cached_rag": cached_rag is not None, 

282 }) 

283 

284 return result 

285 

286 def _execute_rag_searches_impl( 

287 self, 

288 prompt_name: str, 

289 prompt_type: str, 

290 params: Dict[str, Any], 

291 capture_metadata: bool = False, 

292 **kwargs: Any 

293 ) -> tuple[Dict[str, str], Optional[Dict[str, Any]]]: 

294 """Execute RAG searches and format results for injection. 

295 

296 Args: 

297 prompt_name: Name of the prompt 

298 prompt_type: Type of prompt ("system" or "user") 

299 params: Resolved parameters for query templating 

300 capture_metadata: If True, capture RAG metadata 

301 **kwargs: Additional parameters 

302 

303 Returns: 

304 Tuple of (rag_content, rag_metadata): 

305 - rag_content: Dictionary mapping placeholder names to formatted content 

306 - rag_metadata: Optional dict with full RAG details (if capture_metadata=True) 

307 """ 

308 # Get RAG configurations for this prompt 

309 rag_configs = self.library.get_prompt_rag_configs( 

310 prompt_name=prompt_name, 

311 prompt_type=prompt_type, 

312 **kwargs 

313 ) 

314 

315 if not rag_configs: 

316 return {}, None 

317 

318 rag_content = {} 

319 rag_metadata = {} if capture_metadata else None 

320 

321 for rag_config in rag_configs: 

322 placeholder = rag_config.get("placeholder", "RAG_CONTENT") 

323 

324 try: 

325 if capture_metadata: 

326 # Execute with metadata capture 

327 formatted_content, metadata = self._execute_single_rag_with_metadata( 

328 rag_config, params 

329 ) 

330 rag_content[placeholder] = formatted_content 

331 if metadata: 

332 rag_metadata[placeholder] = metadata 

333 else: 

334 # Execute without metadata (faster) 

335 content = self._execute_single_rag_search(rag_config, params) 

336 rag_content[placeholder] = content 

337 

338 except Exception as e: 

339 error_msg = f"RAG search failed for {prompt_name}: {e}" 

340 if self._raise_on_rag_error: 

341 raise RuntimeError(error_msg) from e 

342 else: 

343 logger.warning(error_msg) 

344 # Use empty content on failure 

345 rag_content[placeholder] = "" 

346 if capture_metadata: 

347 from datetime import datetime 

348 rag_metadata[placeholder] = { 

349 "error": str(e), 

350 "timestamp": datetime.now().isoformat() 

351 } 

352 

353 return rag_content, rag_metadata 

354 

355 def _execute_single_rag_search( 

356 self, 

357 rag_config: RAGConfig, 

358 params: Dict[str, Any] 

359 ) -> str: 

360 """Execute a single RAG search and format results. 

361 

362 Args: 

363 rag_config: RAG configuration 

364 params: Parameters for query templating 

365 

366 Returns: 

367 Formatted RAG content string 

368 

369 Raises: 

370 KeyError: If adapter not found 

371 Exception: If search fails 

372 """ 

373 # Get adapter 

374 adapter_name = rag_config.get("adapter_name") 

375 if not adapter_name: 

376 raise ValueError("RAG config missing 'adapter_name'") 

377 

378 if adapter_name not in self.adapters: 

379 raise KeyError( 

380 f"Adapter '{adapter_name}' not found. " 

381 f"Available adapters: {list(self.adapters.keys())}" 

382 ) 

383 

384 adapter = self.adapters[adapter_name] 

385 

386 # Render query template 

387 query_template = rag_config.get("query", "") 

388 query = self._render_rag_query(query_template, params) 

389 

390 # Execute search (synchronous) 

391 k = rag_config.get("k", 5) 

392 filters = rag_config.get("filters") 

393 search_results = adapter.search(query=query, k=k, filters=filters) 

394 

395 # Format results 

396 formatted_content = self._format_rag_results( 

397 results=search_results, 

398 rag_config=rag_config, 

399 params=params 

400 ) 

401 

402 return formatted_content 

403 

404 def _execute_single_rag_with_metadata( 

405 self, 

406 rag_config: RAGConfig, 

407 params: Dict[str, Any] 

408 ) -> tuple[str, Dict[str, Any]]: 

409 """Execute a single RAG search with metadata capture. 

410 

411 This method executes a RAG search and captures detailed metadata 

412 including the query, results, and query hash for caching. 

413 

414 Args: 

415 rag_config: RAG configuration 

416 params: Parameters for query templating 

417 

418 Returns: 

419 Tuple of (formatted_content, metadata): 

420 - formatted_content: Formatted RAG content string 

421 - metadata: Dictionary with RAG metadata including: 

422 - adapter_name: Name of the adapter used 

423 - query: Rendered query string 

424 - query_hash: SHA256 hash for cache matching 

425 - k: Number of results requested 

426 - filters: Filters applied to search 

427 - timestamp: ISO format timestamp 

428 - results: Raw search results 

429 - formatted_content: Formatted output 

430 - item_template: Template used for formatting 

431 - header: Header text used 

432 

433 Raises: 

434 KeyError: If adapter not found 

435 Exception: If search fails 

436 """ 

437 from datetime import datetime 

438 

439 # Get adapter 

440 adapter_name = rag_config.get("adapter_name") 

441 if not adapter_name: 

442 raise ValueError("RAG config missing 'adapter_name'") 

443 

444 if adapter_name not in self.adapters: 

445 raise KeyError( 

446 f"Adapter '{adapter_name}' not found. " 

447 f"Available adapters: {list(self.adapters.keys())}" 

448 ) 

449 

450 adapter = self.adapters[adapter_name] 

451 

452 # Render query template 

453 query_template = rag_config.get("query", "") 

454 query = self._render_rag_query(query_template, params) 

455 

456 # Compute query hash for cache matching 

457 query_hash = self._compute_rag_query_hash(adapter_name, query) 

458 

459 # Execute search (synchronous) 

460 k = rag_config.get("k", 5) 

461 filters = rag_config.get("filters") 

462 search_results = adapter.search(query=query, k=k, filters=filters) 

463 

464 # Format results 

465 formatted_content = self._format_rag_results( 

466 results=search_results, 

467 rag_config=rag_config, 

468 params=params 

469 ) 

470 

471 # Build metadata 

472 metadata = { 

473 "adapter_name": adapter_name, 

474 "query": query, 

475 "query_hash": query_hash, 

476 "k": k, 

477 "filters": filters, 

478 "timestamp": datetime.now().isoformat(), 

479 "results": search_results, # Store raw results 

480 "formatted_content": formatted_content, 

481 "item_template": rag_config.get("item_template"), 

482 "header": rag_config.get("header"), 

483 } 

484 

485 return formatted_content, metadata