Coverage for src/dataknobs_llm/prompts/builders/base_prompt_builder.py: 27%
56 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-31 16:04 -0600
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-31 16:04 -0600
1"""Base prompt builder with shared functionality for sync and async builders.
3This module provides BasePromptBuilder, an abstract base class that contains
4all the shared logic between PromptBuilder and AsyncPromptBuilder. This reduces
5code duplication and ensures consistent behavior across both implementations.
6"""
8import logging
9from abc import ABC, abstractmethod
10from typing import Any, Dict, List, Optional
12from ..base import (
13 AbstractPromptLibrary,
14 PromptTemplate,
15 RAGConfig,
16 ValidationLevel,
17 ValidationConfig,
18 RenderResult,
19)
20from ..rendering import TemplateRenderer
22logger = logging.getLogger(__name__)
25class BasePromptBuilder(ABC):
26 """Abstract base class with shared functionality for prompt builders.
28 This class provides common methods for:
29 - Template parameter merging
30 - RAG query rendering
31 - RAG result formatting
32 - Required parameter extraction
33 - String representation
35 Subclasses must implement the async/sync-specific methods for:
36 - Rendering prompts (with I/O operations)
37 - Executing RAG searches (with I/O operations)
38 """
40 def __init__(
41 self,
42 library: AbstractPromptLibrary,
43 adapters: Optional[Dict[str, Any]] = None,
44 default_validation: ValidationLevel = ValidationLevel.WARN,
45 raise_on_rag_error: bool = False
46 ):
47 """Initialize the base prompt builder.
49 Args:
50 library: Prompt library to retrieve templates from
51 adapters: Dictionary of named resource adapters
52 default_validation: Default validation level for templates
53 raise_on_rag_error: If True, raise exceptions on RAG failures
54 """
55 self.library = library
56 self.adapters = adapters or {}
57 self._renderer = TemplateRenderer(default_validation=default_validation)
58 self._raise_on_rag_error = raise_on_rag_error
60 # ===== Shared Helper Methods =====
62 def _extract_formatted_content_from_cache(
63 self,
64 cached_rag: Dict[str, Any]
65 ) -> Dict[str, str]:
66 """Extract formatted content from cached RAG metadata.
68 This method extracts the pre-formatted RAG content from cache
69 so it can be injected directly into templates without re-executing
70 searches or re-formatting results.
72 Args:
73 cached_rag: Cached RAG metadata dict with structure:
74 {
75 "placeholder_name": {
76 "formatted_content": "...",
77 "query": "...",
78 "results": [...],
79 ...
80 }
81 }
83 Returns:
84 Dict mapping placeholder names to formatted content strings
86 Example:
87 >>> cache = {
88 ... "RAG_CONTENT": {
89 ... "formatted_content": "# Docs\n\n1. Python is...",
90 ... "query": "python docs",
91 ... ...
92 ... }
93 ... }
94 >>> content = builder._extract_formatted_content_from_cache(cache)
95 >>> content
96 {'RAG_CONTENT': '# Docs\n\n1. Python is...'}
97 """
98 rag_content = {}
99 for placeholder, cache_entry in cached_rag.items():
100 rag_content[placeholder] = cache_entry.get("formatted_content", "")
101 return rag_content
103 def _compute_rag_query_hash(
104 self,
105 adapter_name: str,
106 query: str
107 ) -> str:
108 """Compute a hash for RAG query matching.
110 This hash is used to match cached RAG results with new queries.
111 Two queries with the same hash are considered equivalent and
112 can reuse cached results.
114 Args:
115 adapter_name: Name of the adapter
116 query: Rendered query string
118 Returns:
119 SHA256 hex digest of adapter_name:query
121 Example:
122 >>> hash1 = builder._compute_rag_query_hash("docs", "python decorators")
123 >>> hash2 = builder._compute_rag_query_hash("docs", "python decorators")
124 >>> hash1 == hash2
125 True
126 """
127 import hashlib
128 combined = f"{adapter_name}:{query}"
129 return hashlib.sha256(combined.encode()).hexdigest()
131 def _render_rag_query(self, query_template: str, params: Dict[str, Any]) -> str:
132 """Render a RAG query template with parameters.
134 Args:
135 query_template: Query template string with {{variables}}
136 params: Parameters for substitution
138 Returns:
139 Rendered query string
140 """
141 from dataknobs_llm.template_utils import render_conditional_template
142 return render_conditional_template(query_template, params)
144 def _format_rag_results(
145 self,
146 results: List[Dict[str, Any]],
147 rag_config: RAGConfig,
148 params: Dict[str, Any]
149 ) -> str:
150 """Format RAG search results according to configuration.
152 Args:
153 results: List of search results from adapter
154 rag_config: RAG configuration with formatting options
155 params: Parameters for template rendering
157 Returns:
158 Formatted RAG content string
159 """
160 if not results:
161 return ""
163 # Get formatting configuration
164 header = rag_config.get("header", "")
165 item_template = rag_config.get("item_template", "{{content}}")
167 # Render header
168 formatted_header = self._render_rag_query(header, params)
170 # Format each result
171 formatted_items = []
172 for i, result in enumerate(results, start=1):
173 # Prepare item parameters
174 item_params = {
175 **params,
176 "index": i,
177 "content": result.get("content", ""),
178 "score": result.get("score", 0.0),
179 "metadata": result.get("metadata", {}),
180 **result.get("metadata", {}) # Also expose metadata fields directly
181 }
183 # Render item
184 formatted_item = self._render_rag_query(item_template, item_params)
185 formatted_items.append(formatted_item)
187 # Combine header and items
188 return formatted_header + "".join(formatted_items)
190 def _merge_params_with_defaults(
191 self,
192 template_dict: PromptTemplate,
193 runtime_params: Dict[str, Any]
194 ) -> Dict[str, Any]:
195 """Merge template defaults with runtime parameters.
197 Args:
198 template_dict: Template dictionary with defaults
199 runtime_params: Runtime parameters (higher priority)
201 Returns:
202 Merged parameters dictionary
203 """
204 defaults = template_dict.get("defaults", {})
205 return {**defaults, **runtime_params}
207 def _prepare_validation_config(
208 self,
209 template_dict: PromptTemplate,
210 validation_override: Optional[ValidationLevel]
211 ) -> Optional[ValidationConfig]:
212 """Prepare validation configuration with override support.
214 Args:
215 template_dict: Template dictionary
216 validation_override: Optional validation level override
218 Returns:
219 Validation configuration or None
220 """
221 validation_config = template_dict.get("validation")
223 # Apply validation override if provided
224 if validation_override is not None:
225 if validation_config is None:
226 validation_config = ValidationConfig()
227 validation_config.level = validation_override
229 return validation_config
231 def get_required_parameters(
232 self,
233 name: str,
234 prompt_type: str = "system",
235 index: int = 0,
236 **kwargs: Any
237 ) -> List[str]:
238 """Get list of required parameters for a prompt.
240 Useful for validation before rendering.
242 Args:
243 name: Prompt identifier
244 prompt_type: Type of prompt ("system" or "user")
245 index: Prompt variant index (for user prompts)
246 **kwargs: Additional parameters passed to library
248 Returns:
249 List of required parameter names
251 Raises:
252 ValueError: If prompt not found
253 """
254 # Retrieve template
255 if prompt_type == "system":
256 template_dict = self.library.get_system_prompt(name, **kwargs)
257 else:
258 template_dict = self.library.get_user_prompt(name, index=index, **kwargs)
260 if template_dict is None:
261 raise ValueError(f"Prompt not found: {name} (type={prompt_type})")
263 # Extract required parameters from validation config
264 validation_config = template_dict.get("validation")
265 if validation_config:
266 return list(validation_config.required_params)
268 return []
270 def __repr__(self) -> str:
271 """Return a string representation of this builder."""
272 return (
273 f"{self.__class__.__name__}("
274 f"library={self.library}, "
275 f"adapters={list(self.adapters.keys())}"
276 f")"
277 )
279 # ===== Abstract Methods (Must be implemented by subclasses) =====
281 @abstractmethod
282 def _validate_adapters(self) -> None:
283 """Validate that all adapters are the correct type (sync or async).
285 Raises:
286 TypeError: If adapter types don't match builder type
287 """
288 pass
290 @abstractmethod
291 def _render_prompt_impl(
292 self,
293 prompt_name: str,
294 prompt_type: str,
295 template_dict: PromptTemplate,
296 runtime_params: Dict[str, Any],
297 include_rag: bool,
298 validation_override: Optional[ValidationLevel],
299 return_rag_metadata: bool = False,
300 cached_rag: Optional[Dict[str, Any]] = None,
301 index: int = 0,
302 **kwargs: Any
303 ):
304 """Internal method to render a prompt template.
306 This is the core rendering logic that differs between sync/async.
308 Args:
309 prompt_name: Name of the prompt
310 prompt_type: Type of prompt ("system" or "user")
311 template_dict: Template dictionary from library
312 runtime_params: Runtime parameters
313 include_rag: Whether to include RAG content
314 validation_override: Validation level override
315 return_rag_metadata: If True, capture and return RAG metadata
316 cached_rag: If provided, use these cached RAG results instead
317 of executing new searches
318 index: Prompt index (for user prompts)
319 **kwargs: Additional parameters
321 Returns:
322 RenderResult with rendered content and metadata (including
323 rag_metadata if return_rag_metadata=True)
324 """
325 pass
327 @abstractmethod
328 def _execute_rag_searches_impl(
329 self,
330 prompt_name: str,
331 prompt_type: str,
332 index: int,
333 params: Dict[str, Any],
334 capture_metadata: bool = False,
335 **kwargs: Any
336 ):
337 """Execute RAG searches and format results for injection.
339 This method differs between sync (sequential) and async (parallel).
341 Args:
342 prompt_name: Name of the prompt
343 prompt_type: Type of prompt ("system" or "user")
344 index: Prompt index (for user prompts)
345 params: Resolved parameters for query templating
346 capture_metadata: If True, capture RAG metadata
347 **kwargs: Additional parameters
349 Returns:
350 Tuple of (rag_content, rag_metadata):
351 - rag_content: Dictionary mapping placeholder names to formatted content
352 - rag_metadata: Optional dict with full RAG details (if capture_metadata=True)
353 """
354 pass