Coverage for src / dataknobs_llm / prompts / builders / base_prompt_builder.py: 34%

56 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-15 10:29 -0700

1"""Base prompt builder with shared functionality for sync and async builders. 

2 

3This module provides BasePromptBuilder, an abstract base class that contains 

4all the shared logic between PromptBuilder and AsyncPromptBuilder. This reduces 

5code duplication and ensures consistent behavior across both implementations. 

6""" 

7 

8import logging 

9from abc import ABC, abstractmethod 

10from typing import Any, Dict, List 

11 

12from ..base import ( 

13 AbstractPromptLibrary, 

14 PromptTemplateDict, 

15 RAGConfig, 

16 ValidationLevel, 

17 ValidationConfig, 

18) 

19from ..rendering import TemplateRenderer 

20 

21logger = logging.getLogger(__name__) 

22 

23 

24class BasePromptBuilder(ABC): 

25 """Abstract base class with shared functionality for prompt builders. 

26 

27 This class provides common methods for: 

28 - Template parameter merging 

29 - RAG query rendering 

30 - RAG result formatting 

31 - Required parameter extraction 

32 - String representation 

33 

34 Subclasses must implement the async/sync-specific methods for: 

35 - Rendering prompts (with I/O operations) 

36 - Executing RAG searches (with I/O operations) 

37 """ 

38 

39 def __init__( 

40 self, 

41 library: AbstractPromptLibrary, 

42 adapters: Dict[str, Any] | None = None, 

43 default_validation: ValidationLevel = ValidationLevel.WARN, 

44 raise_on_rag_error: bool = False 

45 ): 

46 """Initialize the base prompt builder. 

47 

48 Args: 

49 library: Prompt library to retrieve templates from 

50 adapters: Dictionary of named resource adapters 

51 default_validation: Default validation level for templates 

52 raise_on_rag_error: If True, raise exceptions on RAG failures 

53 """ 

54 self.library = library 

55 self.adapters = adapters or {} 

56 self._renderer = TemplateRenderer(default_validation=default_validation) 

57 self._raise_on_rag_error = raise_on_rag_error 

58 

59 # ===== Shared Helper Methods ===== 

60 

61 def _extract_formatted_content_from_cache( 

62 self, 

63 cached_rag: Dict[str, Any] 

64 ) -> Dict[str, str]: 

65 r"""Extract formatted content from cached RAG metadata. 

66 

67 This method extracts the pre-formatted RAG content from cache 

68 so it can be injected directly into templates without re-executing 

69 searches or re-formatting results. 

70 

71 Args: 

72 cached_rag: Cached RAG metadata dict with structure: 

73 { 

74 "placeholder_name": { 

75 "formatted_content": "...", 

76 "query": "...", 

77 "results": [...], 

78 ... 

79 } 

80 } 

81 

82 Returns: 

83 Dict mapping placeholder names to formatted content strings 

84 

85 Example: 

86 >>> cache = { 

87 ... "RAG_CONTENT": { 

88 ... "formatted_content": "# Docs\n\n1. Python is...", 

89 ... "query": "python docs", 

90 ... ... 

91 ... } 

92 ... } 

93 >>> content = builder._extract_formatted_content_from_cache(cache) 

94 >>> content 

95 {'RAG_CONTENT': '# Docs\n\n1. Python is...'} 

96 """ 

97 rag_content = {} 

98 for placeholder, cache_entry in cached_rag.items(): 

99 rag_content[placeholder] = cache_entry.get("formatted_content", "") 

100 return rag_content 

101 

102 def _compute_rag_query_hash( 

103 self, 

104 adapter_name: str, 

105 query: str 

106 ) -> str: 

107 """Compute a hash for RAG query matching. 

108 

109 This hash is used to match cached RAG results with new queries. 

110 Two queries with the same hash are considered equivalent and 

111 can reuse cached results. 

112 

113 Args: 

114 adapter_name: Name of the adapter 

115 query: Rendered query string 

116 

117 Returns: 

118 SHA256 hex digest of adapter_name:query 

119 

120 Example: 

121 >>> hash1 = builder._compute_rag_query_hash("docs", "python decorators") 

122 >>> hash2 = builder._compute_rag_query_hash("docs", "python decorators") 

123 >>> hash1 == hash2 

124 True 

125 """ 

126 import hashlib 

127 combined = f"{adapter_name}:{query}" 

128 return hashlib.sha256(combined.encode()).hexdigest() 

129 

130 def _render_rag_query(self, query_template: str, params: Dict[str, Any]) -> str: 

131 """Render a RAG query template with parameters. 

132 

133 Args: 

134 query_template: Query template string with {{variables}} 

135 params: Parameters for substitution 

136 

137 Returns: 

138 Rendered query string 

139 """ 

140 from dataknobs_llm.template_utils import render_conditional_template 

141 return render_conditional_template(query_template, params) 

142 

143 def _format_rag_results( 

144 self, 

145 results: List[Dict[str, Any]], 

146 rag_config: RAGConfig, 

147 params: Dict[str, Any] 

148 ) -> str: 

149 """Format RAG search results according to configuration. 

150 

151 Args: 

152 results: List of search results from adapter 

153 rag_config: RAG configuration with formatting options 

154 params: Parameters for template rendering 

155 

156 Returns: 

157 Formatted RAG content string 

158 """ 

159 if not results: 

160 return "" 

161 

162 # Get formatting configuration 

163 header = rag_config.get("header", "") 

164 item_template = rag_config.get("item_template", "{{content}}") 

165 

166 # Render header 

167 formatted_header = self._render_rag_query(header, params) 

168 

169 # Format each result 

170 formatted_items = [] 

171 for i, result in enumerate(results, start=1): 

172 # Prepare item parameters 

173 item_params = { 

174 **params, 

175 "index": i, 

176 "content": result.get("content", ""), 

177 "score": result.get("score", 0.0), 

178 "metadata": result.get("metadata", {}), 

179 **result.get("metadata", {}) # Also expose metadata fields directly 

180 } 

181 

182 # Render item 

183 formatted_item = self._render_rag_query(item_template, item_params) 

184 formatted_items.append(formatted_item) 

185 

186 # Combine header and items 

187 return formatted_header + "".join(formatted_items) 

188 

189 def _merge_params_with_defaults( 

190 self, 

191 template_dict: PromptTemplateDict, 

192 runtime_params: Dict[str, Any] 

193 ) -> Dict[str, Any]: 

194 """Merge template defaults with runtime parameters. 

195 

196 Args: 

197 template_dict: Template dictionary with defaults 

198 runtime_params: Runtime parameters (higher priority) 

199 

200 Returns: 

201 Merged parameters dictionary 

202 """ 

203 defaults = template_dict.get("defaults", {}) 

204 return {**defaults, **runtime_params} 

205 

206 def _prepare_validation_config( 

207 self, 

208 template_dict: PromptTemplateDict, 

209 validation_override: ValidationLevel | None 

210 ) -> ValidationConfig | None: 

211 """Prepare validation configuration with override support. 

212 

213 Args: 

214 template_dict: Template dictionary 

215 validation_override: Optional validation level override 

216 

217 Returns: 

218 Validation configuration or None 

219 """ 

220 validation_config = template_dict.get("validation") 

221 

222 # Apply validation override if provided 

223 if validation_override is not None: 

224 if validation_config is None: 

225 validation_config = ValidationConfig() 

226 validation_config.level = validation_override 

227 

228 return validation_config 

229 

230 def get_required_parameters( 

231 self, 

232 name: str, 

233 prompt_type: str = "system", 

234 index: int = 0, 

235 **kwargs: Any 

236 ) -> List[str]: 

237 """Get list of required parameters for a prompt. 

238 

239 Useful for validation before rendering. 

240 

241 Args: 

242 name: Prompt identifier 

243 prompt_type: Type of prompt ("system" or "user") 

244 index: Prompt variant index (for user prompts) 

245 **kwargs: Additional parameters passed to library 

246 

247 Returns: 

248 List of required parameter names 

249 

250 Raises: 

251 ValueError: If prompt not found 

252 """ 

253 # Retrieve template 

254 if prompt_type == "system": 

255 template_dict = self.library.get_system_prompt(name, **kwargs) 

256 else: 

257 template_dict = self.library.get_user_prompt(name, index=index, **kwargs) 

258 

259 if template_dict is None: 

260 raise ValueError(f"Prompt not found: {name} (type={prompt_type})") 

261 

262 # Extract required parameters from validation config 

263 validation_config = template_dict.get("validation") 

264 if validation_config: 

265 return list(validation_config.required_params) 

266 

267 return [] 

268 

269 def __repr__(self) -> str: 

270 """Return a string representation of this builder.""" 

271 return ( 

272 f"{self.__class__.__name__}(" 

273 f"library={self.library}, " 

274 f"adapters={list(self.adapters.keys())}" 

275 f")" 

276 ) 

277 

278 # ===== Abstract Methods (Must be implemented by subclasses) ===== 

279 

280 @abstractmethod 

281 def _validate_adapters(self) -> None: 

282 """Validate that all adapters are the correct type (sync or async). 

283 

284 Raises: 

285 TypeError: If adapter types don't match builder type 

286 """ 

287 pass 

288 

289 @abstractmethod 

290 def _render_prompt_impl( 

291 self, 

292 prompt_name: str, 

293 prompt_type: str, 

294 template_dict: PromptTemplateDict, 

295 runtime_params: Dict[str, Any], 

296 include_rag: bool, 

297 validation_override: ValidationLevel | None, 

298 return_rag_metadata: bool = False, 

299 cached_rag: Dict[str, Any] | None = None, 

300 index: int = 0, 

301 **kwargs: Any 

302 ): 

303 """Internal method to render a prompt template. 

304 

305 This is the core rendering logic that differs between sync/async. 

306 

307 Args: 

308 prompt_name: Name of the prompt 

309 prompt_type: Type of prompt ("system" or "user") 

310 template_dict: Template dictionary from library 

311 runtime_params: Runtime parameters 

312 include_rag: Whether to include RAG content 

313 validation_override: Validation level override 

314 return_rag_metadata: If True, capture and return RAG metadata 

315 cached_rag: If provided, use these cached RAG results instead 

316 of executing new searches 

317 index: Prompt index (for user prompts) 

318 **kwargs: Additional parameters 

319 

320 Returns: 

321 RenderResult with rendered content and metadata (including 

322 rag_metadata if return_rag_metadata=True) 

323 """ 

324 pass 

325 

326 @abstractmethod 

327 def _execute_rag_searches_impl( 

328 self, 

329 prompt_name: str, 

330 prompt_type: str, 

331 index: int, 

332 params: Dict[str, Any], 

333 capture_metadata: bool = False, 

334 **kwargs: Any 

335 ): 

336 """Execute RAG searches and format results for injection. 

337 

338 This method differs between sync (sequential) and async (parallel). 

339 

340 Args: 

341 prompt_name: Name of the prompt 

342 prompt_type: Type of prompt ("system" or "user") 

343 index: Prompt index (for user prompts) 

344 params: Resolved parameters for query templating 

345 capture_metadata: If True, capture RAG metadata 

346 **kwargs: Additional parameters 

347 

348 Returns: 

349 Tuple of (rag_content, rag_metadata): 

350 - rag_content: Dictionary mapping placeholder names to formatted content 

351 - rag_metadata: Optional dict with full RAG details (if capture_metadata=True) 

352 """ 

353 pass