Coverage for src/dataknobs_llm/prompts/builders/base_prompt_builder.py: 27%

56 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-31 16:04 -0600

1"""Base prompt builder with shared functionality for sync and async builders. 

2 

3This module provides BasePromptBuilder, an abstract base class that contains 

4all the shared logic between PromptBuilder and AsyncPromptBuilder. This reduces 

5code duplication and ensures consistent behavior across both implementations. 

6""" 

7 

8import logging 

9from abc import ABC, abstractmethod 

10from typing import Any, Dict, List, Optional 

11 

12from ..base import ( 

13 AbstractPromptLibrary, 

14 PromptTemplate, 

15 RAGConfig, 

16 ValidationLevel, 

17 ValidationConfig, 

18 RenderResult, 

19) 

20from ..rendering import TemplateRenderer 

21 

22logger = logging.getLogger(__name__) 

23 

24 

25class BasePromptBuilder(ABC): 

26 """Abstract base class with shared functionality for prompt builders. 

27 

28 This class provides common methods for: 

29 - Template parameter merging 

30 - RAG query rendering 

31 - RAG result formatting 

32 - Required parameter extraction 

33 - String representation 

34 

35 Subclasses must implement the async/sync-specific methods for: 

36 - Rendering prompts (with I/O operations) 

37 - Executing RAG searches (with I/O operations) 

38 """ 

39 

40 def __init__( 

41 self, 

42 library: AbstractPromptLibrary, 

43 adapters: Optional[Dict[str, Any]] = None, 

44 default_validation: ValidationLevel = ValidationLevel.WARN, 

45 raise_on_rag_error: bool = False 

46 ): 

47 """Initialize the base prompt builder. 

48 

49 Args: 

50 library: Prompt library to retrieve templates from 

51 adapters: Dictionary of named resource adapters 

52 default_validation: Default validation level for templates 

53 raise_on_rag_error: If True, raise exceptions on RAG failures 

54 """ 

55 self.library = library 

56 self.adapters = adapters or {} 

57 self._renderer = TemplateRenderer(default_validation=default_validation) 

58 self._raise_on_rag_error = raise_on_rag_error 

59 

60 # ===== Shared Helper Methods ===== 

61 

62 def _extract_formatted_content_from_cache( 

63 self, 

64 cached_rag: Dict[str, Any] 

65 ) -> Dict[str, str]: 

66 """Extract formatted content from cached RAG metadata. 

67 

68 This method extracts the pre-formatted RAG content from cache 

69 so it can be injected directly into templates without re-executing 

70 searches or re-formatting results. 

71 

72 Args: 

73 cached_rag: Cached RAG metadata dict with structure: 

74 { 

75 "placeholder_name": { 

76 "formatted_content": "...", 

77 "query": "...", 

78 "results": [...], 

79 ... 

80 } 

81 } 

82 

83 Returns: 

84 Dict mapping placeholder names to formatted content strings 

85 

86 Example: 

87 >>> cache = { 

88 ... "RAG_CONTENT": { 

89 ... "formatted_content": "# Docs\n\n1. Python is...", 

90 ... "query": "python docs", 

91 ... ... 

92 ... } 

93 ... } 

94 >>> content = builder._extract_formatted_content_from_cache(cache) 

95 >>> content 

96 {'RAG_CONTENT': '# Docs\n\n1. Python is...'} 

97 """ 

98 rag_content = {} 

99 for placeholder, cache_entry in cached_rag.items(): 

100 rag_content[placeholder] = cache_entry.get("formatted_content", "") 

101 return rag_content 

102 

103 def _compute_rag_query_hash( 

104 self, 

105 adapter_name: str, 

106 query: str 

107 ) -> str: 

108 """Compute a hash for RAG query matching. 

109 

110 This hash is used to match cached RAG results with new queries. 

111 Two queries with the same hash are considered equivalent and 

112 can reuse cached results. 

113 

114 Args: 

115 adapter_name: Name of the adapter 

116 query: Rendered query string 

117 

118 Returns: 

119 SHA256 hex digest of adapter_name:query 

120 

121 Example: 

122 >>> hash1 = builder._compute_rag_query_hash("docs", "python decorators") 

123 >>> hash2 = builder._compute_rag_query_hash("docs", "python decorators") 

124 >>> hash1 == hash2 

125 True 

126 """ 

127 import hashlib 

128 combined = f"{adapter_name}:{query}" 

129 return hashlib.sha256(combined.encode()).hexdigest() 

130 

131 def _render_rag_query(self, query_template: str, params: Dict[str, Any]) -> str: 

132 """Render a RAG query template with parameters. 

133 

134 Args: 

135 query_template: Query template string with {{variables}} 

136 params: Parameters for substitution 

137 

138 Returns: 

139 Rendered query string 

140 """ 

141 from dataknobs_llm.template_utils import render_conditional_template 

142 return render_conditional_template(query_template, params) 

143 

144 def _format_rag_results( 

145 self, 

146 results: List[Dict[str, Any]], 

147 rag_config: RAGConfig, 

148 params: Dict[str, Any] 

149 ) -> str: 

150 """Format RAG search results according to configuration. 

151 

152 Args: 

153 results: List of search results from adapter 

154 rag_config: RAG configuration with formatting options 

155 params: Parameters for template rendering 

156 

157 Returns: 

158 Formatted RAG content string 

159 """ 

160 if not results: 

161 return "" 

162 

163 # Get formatting configuration 

164 header = rag_config.get("header", "") 

165 item_template = rag_config.get("item_template", "{{content}}") 

166 

167 # Render header 

168 formatted_header = self._render_rag_query(header, params) 

169 

170 # Format each result 

171 formatted_items = [] 

172 for i, result in enumerate(results, start=1): 

173 # Prepare item parameters 

174 item_params = { 

175 **params, 

176 "index": i, 

177 "content": result.get("content", ""), 

178 "score": result.get("score", 0.0), 

179 "metadata": result.get("metadata", {}), 

180 **result.get("metadata", {}) # Also expose metadata fields directly 

181 } 

182 

183 # Render item 

184 formatted_item = self._render_rag_query(item_template, item_params) 

185 formatted_items.append(formatted_item) 

186 

187 # Combine header and items 

188 return formatted_header + "".join(formatted_items) 

189 

190 def _merge_params_with_defaults( 

191 self, 

192 template_dict: PromptTemplate, 

193 runtime_params: Dict[str, Any] 

194 ) -> Dict[str, Any]: 

195 """Merge template defaults with runtime parameters. 

196 

197 Args: 

198 template_dict: Template dictionary with defaults 

199 runtime_params: Runtime parameters (higher priority) 

200 

201 Returns: 

202 Merged parameters dictionary 

203 """ 

204 defaults = template_dict.get("defaults", {}) 

205 return {**defaults, **runtime_params} 

206 

207 def _prepare_validation_config( 

208 self, 

209 template_dict: PromptTemplate, 

210 validation_override: Optional[ValidationLevel] 

211 ) -> Optional[ValidationConfig]: 

212 """Prepare validation configuration with override support. 

213 

214 Args: 

215 template_dict: Template dictionary 

216 validation_override: Optional validation level override 

217 

218 Returns: 

219 Validation configuration or None 

220 """ 

221 validation_config = template_dict.get("validation") 

222 

223 # Apply validation override if provided 

224 if validation_override is not None: 

225 if validation_config is None: 

226 validation_config = ValidationConfig() 

227 validation_config.level = validation_override 

228 

229 return validation_config 

230 

231 def get_required_parameters( 

232 self, 

233 name: str, 

234 prompt_type: str = "system", 

235 index: int = 0, 

236 **kwargs: Any 

237 ) -> List[str]: 

238 """Get list of required parameters for a prompt. 

239 

240 Useful for validation before rendering. 

241 

242 Args: 

243 name: Prompt identifier 

244 prompt_type: Type of prompt ("system" or "user") 

245 index: Prompt variant index (for user prompts) 

246 **kwargs: Additional parameters passed to library 

247 

248 Returns: 

249 List of required parameter names 

250 

251 Raises: 

252 ValueError: If prompt not found 

253 """ 

254 # Retrieve template 

255 if prompt_type == "system": 

256 template_dict = self.library.get_system_prompt(name, **kwargs) 

257 else: 

258 template_dict = self.library.get_user_prompt(name, index=index, **kwargs) 

259 

260 if template_dict is None: 

261 raise ValueError(f"Prompt not found: {name} (type={prompt_type})") 

262 

263 # Extract required parameters from validation config 

264 validation_config = template_dict.get("validation") 

265 if validation_config: 

266 return list(validation_config.required_params) 

267 

268 return [] 

269 

270 def __repr__(self) -> str: 

271 """Return a string representation of this builder.""" 

272 return ( 

273 f"{self.__class__.__name__}(" 

274 f"library={self.library}, " 

275 f"adapters={list(self.adapters.keys())}" 

276 f")" 

277 ) 

278 

279 # ===== Abstract Methods (Must be implemented by subclasses) ===== 

280 

281 @abstractmethod 

282 def _validate_adapters(self) -> None: 

283 """Validate that all adapters are the correct type (sync or async). 

284 

285 Raises: 

286 TypeError: If adapter types don't match builder type 

287 """ 

288 pass 

289 

290 @abstractmethod 

291 def _render_prompt_impl( 

292 self, 

293 prompt_name: str, 

294 prompt_type: str, 

295 template_dict: PromptTemplate, 

296 runtime_params: Dict[str, Any], 

297 include_rag: bool, 

298 validation_override: Optional[ValidationLevel], 

299 return_rag_metadata: bool = False, 

300 cached_rag: Optional[Dict[str, Any]] = None, 

301 index: int = 0, 

302 **kwargs: Any 

303 ): 

304 """Internal method to render a prompt template. 

305 

306 This is the core rendering logic that differs between sync/async. 

307 

308 Args: 

309 prompt_name: Name of the prompt 

310 prompt_type: Type of prompt ("system" or "user") 

311 template_dict: Template dictionary from library 

312 runtime_params: Runtime parameters 

313 include_rag: Whether to include RAG content 

314 validation_override: Validation level override 

315 return_rag_metadata: If True, capture and return RAG metadata 

316 cached_rag: If provided, use these cached RAG results instead 

317 of executing new searches 

318 index: Prompt index (for user prompts) 

319 **kwargs: Additional parameters 

320 

321 Returns: 

322 RenderResult with rendered content and metadata (including 

323 rag_metadata if return_rag_metadata=True) 

324 """ 

325 pass 

326 

327 @abstractmethod 

328 def _execute_rag_searches_impl( 

329 self, 

330 prompt_name: str, 

331 prompt_type: str, 

332 index: int, 

333 params: Dict[str, Any], 

334 capture_metadata: bool = False, 

335 **kwargs: Any 

336 ): 

337 """Execute RAG searches and format results for injection. 

338 

339 This method differs between sync (sequential) and async (parallel). 

340 

341 Args: 

342 prompt_name: Name of the prompt 

343 prompt_type: Type of prompt ("system" or "user") 

344 index: Prompt index (for user prompts) 

345 params: Resolved parameters for query templating 

346 capture_metadata: If True, capture RAG metadata 

347 **kwargs: Additional parameters 

348 

349 Returns: 

350 Tuple of (rag_content, rag_metadata): 

351 - rag_content: Dictionary mapping placeholder names to formatted content 

352 - rag_metadata: Optional dict with full RAG details (if capture_metadata=True) 

353 """ 

354 pass