Coverage for src/dataknobs_llm/prompts/builders/base_prompt_builder.py: 95%
56 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-08 13:51 -0700
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-08 13:51 -0700
1"""Base prompt builder with shared functionality for sync and async builders.
3This module provides BasePromptBuilder, an abstract base class that contains
4all the shared logic between PromptBuilder and AsyncPromptBuilder. This reduces
5code duplication and ensures consistent behavior across both implementations.
6"""
8import logging
9from abc import ABC, abstractmethod
10from typing import Any, Dict, List
12from ..base import (
13 AbstractPromptLibrary,
14 PromptTemplateDict,
15 RAGConfig,
16 ValidationLevel,
17 ValidationConfig,
18)
19from ..rendering import TemplateRenderer
21logger = logging.getLogger(__name__)
24class BasePromptBuilder(ABC):
25 """Abstract base class with shared functionality for prompt builders.
27 This class provides common methods for:
28 - Template parameter merging
29 - RAG query rendering
30 - RAG result formatting
31 - Required parameter extraction
32 - String representation
34 Subclasses must implement the async/sync-specific methods for:
35 - Rendering prompts (with I/O operations)
36 - Executing RAG searches (with I/O operations)
37 """
39 def __init__(
40 self,
41 library: AbstractPromptLibrary,
42 adapters: Dict[str, Any] | None = None,
43 default_validation: ValidationLevel = ValidationLevel.WARN,
44 raise_on_rag_error: bool = False
45 ):
46 """Initialize the base prompt builder.
48 Args:
49 library: Prompt library to retrieve templates from
50 adapters: Dictionary of named resource adapters
51 default_validation: Default validation level for templates
52 raise_on_rag_error: If True, raise exceptions on RAG failures
53 """
54 self.library = library
55 self.adapters = adapters or {}
56 self._renderer = TemplateRenderer(default_validation=default_validation)
57 self._raise_on_rag_error = raise_on_rag_error
59 # ===== Shared Helper Methods =====
61 def _extract_formatted_content_from_cache(
62 self,
63 cached_rag: Dict[str, Any]
64 ) -> Dict[str, str]:
65 r"""Extract formatted content from cached RAG metadata.
67 This method extracts the pre-formatted RAG content from cache
68 so it can be injected directly into templates without re-executing
69 searches or re-formatting results.
71 Args:
72 cached_rag: Cached RAG metadata dict with structure:
73 {
74 "placeholder_name": {
75 "formatted_content": "...",
76 "query": "...",
77 "results": [...],
78 ...
79 }
80 }
82 Returns:
83 Dict mapping placeholder names to formatted content strings
85 Example:
86 >>> cache = {
87 ... "RAG_CONTENT": {
88 ... "formatted_content": "# Docs\n\n1. Python is...",
89 ... "query": "python docs",
90 ... ...
91 ... }
92 ... }
93 >>> content = builder._extract_formatted_content_from_cache(cache)
94 >>> content
95 {'RAG_CONTENT': '# Docs\n\n1. Python is...'}
96 """
97 rag_content = {}
98 for placeholder, cache_entry in cached_rag.items():
99 rag_content[placeholder] = cache_entry.get("formatted_content", "")
100 return rag_content
102 def _compute_rag_query_hash(
103 self,
104 adapter_name: str,
105 query: str
106 ) -> str:
107 """Compute a hash for RAG query matching.
109 This hash is used to match cached RAG results with new queries.
110 Two queries with the same hash are considered equivalent and
111 can reuse cached results.
113 Args:
114 adapter_name: Name of the adapter
115 query: Rendered query string
117 Returns:
118 SHA256 hex digest of adapter_name:query
120 Example:
121 >>> hash1 = builder._compute_rag_query_hash("docs", "python decorators")
122 >>> hash2 = builder._compute_rag_query_hash("docs", "python decorators")
123 >>> hash1 == hash2
124 True
125 """
126 import hashlib
127 combined = f"{adapter_name}:{query}"
128 return hashlib.sha256(combined.encode()).hexdigest()
130 def _render_rag_query(self, query_template: str, params: Dict[str, Any]) -> str:
131 """Render a RAG query template with parameters.
133 Args:
134 query_template: Query template string with {{variables}}
135 params: Parameters for substitution
137 Returns:
138 Rendered query string
139 """
140 from dataknobs_llm.template_utils import render_conditional_template
141 return render_conditional_template(query_template, params)
143 def _format_rag_results(
144 self,
145 results: List[Dict[str, Any]],
146 rag_config: RAGConfig,
147 params: Dict[str, Any]
148 ) -> str:
149 """Format RAG search results according to configuration.
151 Args:
152 results: List of search results from adapter
153 rag_config: RAG configuration with formatting options
154 params: Parameters for template rendering
156 Returns:
157 Formatted RAG content string
158 """
159 if not results:
160 return ""
162 # Get formatting configuration
163 header = rag_config.get("header", "")
164 item_template = rag_config.get("item_template", "{{content}}")
166 # Render header
167 formatted_header = self._render_rag_query(header, params)
169 # Format each result
170 formatted_items = []
171 for i, result in enumerate(results, start=1):
172 # Prepare item parameters
173 item_params = {
174 **params,
175 "index": i,
176 "content": result.get("content", ""),
177 "score": result.get("score", 0.0),
178 "metadata": result.get("metadata", {}),
179 **result.get("metadata", {}) # Also expose metadata fields directly
180 }
182 # Render item
183 formatted_item = self._render_rag_query(item_template, item_params)
184 formatted_items.append(formatted_item)
186 # Combine header and items
187 return formatted_header + "".join(formatted_items)
189 def _merge_params_with_defaults(
190 self,
191 template_dict: PromptTemplateDict,
192 runtime_params: Dict[str, Any]
193 ) -> Dict[str, Any]:
194 """Merge template defaults with runtime parameters.
196 Args:
197 template_dict: Template dictionary with defaults
198 runtime_params: Runtime parameters (higher priority)
200 Returns:
201 Merged parameters dictionary
202 """
203 defaults = template_dict.get("defaults", {})
204 return {**defaults, **runtime_params}
206 def _prepare_validation_config(
207 self,
208 template_dict: PromptTemplateDict,
209 validation_override: ValidationLevel | None
210 ) -> ValidationConfig | None:
211 """Prepare validation configuration with override support.
213 Args:
214 template_dict: Template dictionary
215 validation_override: Optional validation level override
217 Returns:
218 Validation configuration or None
219 """
220 validation_config = template_dict.get("validation")
222 # Apply validation override if provided
223 if validation_override is not None:
224 if validation_config is None:
225 validation_config = ValidationConfig()
226 validation_config.level = validation_override
228 return validation_config
230 def get_required_parameters(
231 self,
232 name: str,
233 prompt_type: str = "system",
234 index: int = 0,
235 **kwargs: Any
236 ) -> List[str]:
237 """Get list of required parameters for a prompt.
239 Useful for validation before rendering.
241 Args:
242 name: Prompt identifier
243 prompt_type: Type of prompt ("system" or "user")
244 index: Prompt variant index (for user prompts)
245 **kwargs: Additional parameters passed to library
247 Returns:
248 List of required parameter names
250 Raises:
251 ValueError: If prompt not found
252 """
253 # Retrieve template
254 if prompt_type == "system":
255 template_dict = self.library.get_system_prompt(name, **kwargs)
256 else:
257 template_dict = self.library.get_user_prompt(name, index=index, **kwargs)
259 if template_dict is None:
260 raise ValueError(f"Prompt not found: {name} (type={prompt_type})")
262 # Extract required parameters from validation config
263 validation_config = template_dict.get("validation")
264 if validation_config:
265 return list(validation_config.required_params)
267 return []
269 def __repr__(self) -> str:
270 """Return a string representation of this builder."""
271 return (
272 f"{self.__class__.__name__}("
273 f"library={self.library}, "
274 f"adapters={list(self.adapters.keys())}"
275 f")"
276 )
278 # ===== Abstract Methods (Must be implemented by subclasses) =====
280 @abstractmethod
281 def _validate_adapters(self) -> None:
282 """Validate that all adapters are the correct type (sync or async).
284 Raises:
285 TypeError: If adapter types don't match builder type
286 """
287 pass
289 @abstractmethod
290 def _render_prompt_impl(
291 self,
292 prompt_name: str,
293 prompt_type: str,
294 template_dict: PromptTemplateDict,
295 runtime_params: Dict[str, Any],
296 include_rag: bool,
297 validation_override: ValidationLevel | None,
298 return_rag_metadata: bool = False,
299 cached_rag: Dict[str, Any] | None = None,
300 index: int = 0,
301 **kwargs: Any
302 ):
303 """Internal method to render a prompt template.
305 This is the core rendering logic that differs between sync/async.
307 Args:
308 prompt_name: Name of the prompt
309 prompt_type: Type of prompt ("system" or "user")
310 template_dict: Template dictionary from library
311 runtime_params: Runtime parameters
312 include_rag: Whether to include RAG content
313 validation_override: Validation level override
314 return_rag_metadata: If True, capture and return RAG metadata
315 cached_rag: If provided, use these cached RAG results instead
316 of executing new searches
317 index: Prompt index (for user prompts)
318 **kwargs: Additional parameters
320 Returns:
321 RenderResult with rendered content and metadata (including
322 rag_metadata if return_rag_metadata=True)
323 """
324 pass
326 @abstractmethod
327 def _execute_rag_searches_impl(
328 self,
329 prompt_name: str,
330 prompt_type: str,
331 index: int,
332 params: Dict[str, Any],
333 capture_metadata: bool = False,
334 **kwargs: Any
335 ):
336 """Execute RAG searches and format results for injection.
338 This method differs between sync (sequential) and async (parallel).
340 Args:
341 prompt_name: Name of the prompt
342 prompt_type: Type of prompt ("system" or "user")
343 index: Prompt index (for user prompts)
344 params: Resolved parameters for query templating
345 capture_metadata: If True, capture RAG metadata
346 **kwargs: Additional parameters
348 Returns:
349 Tuple of (rag_content, rag_metadata):
350 - rag_content: Dictionary mapping placeholder names to formatted content
351 - rag_metadata: Optional dict with full RAG details (if capture_metadata=True)
352 """
353 pass