Coverage for src/dataknobs_llm/prompts/builders/prompt_builder.py: 15%
101 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-31 16:04 -0600
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-31 16:04 -0600
1"""Synchronous prompt builder for constructing prompts with parameter resolution and RAG.
3This module provides the PromptBuilder class which coordinates between:
4- Prompt libraries (template sources)
5- Resource adapters (data sources)
6- Template renderer (rendering engine)
8The builder handles:
9- Parameter resolution from multiple sources
10- RAG content retrieval and injection
11- Validation enforcement
12- Template defaults merging
13"""
15import logging
16from typing import Any, Dict, List, Optional
18from ..base import (
19 PromptTemplate,
20 RAGConfig,
21 ValidationLevel,
22 RenderResult,
23)
24from ..adapters import ResourceAdapter
25from .base_prompt_builder import BasePromptBuilder
27logger = logging.getLogger(__name__)
30class PromptBuilder(BasePromptBuilder):
31 """Synchronous prompt builder for constructing prompts with RAG and validation.
33 This class provides a high-level API for building prompts by:
34 1. Retrieving prompt templates from a library
35 2. Resolving parameters from adapters and runtime values
36 3. Executing RAG searches via adapters
37 4. Injecting RAG content into templates
38 5. Rendering final prompts with validation
40 Example:
41 >>> library = ConfigPromptLibrary(config)
42 >>> adapters = {
43 ... 'config': DictResourceAdapter(config_data),
44 ... 'docs': DataknobsBackendAdapter(docs_db)
45 ... }
46 >>> builder = PromptBuilder(library=library, adapters=adapters)
47 >>>
48 >>> # Render a system prompt
49 >>> result = builder.render_system_prompt(
50 ... 'analyze_code',
51 ... params={'code': code_snippet, 'language': 'python'}
52 ... )
53 """
55 def __init__(
56 self,
57 library,
58 adapters: Optional[Dict[str, ResourceAdapter]] = None,
59 default_validation: ValidationLevel = ValidationLevel.WARN,
60 raise_on_rag_error: bool = False
61 ):
62 """Initialize the synchronous prompt builder.
64 Args:
65 library: Prompt library to retrieve templates from
66 adapters: Dictionary of named resource adapters for parameter
67 resolution and RAG searches
68 default_validation: Default validation level for templates without
69 explicit validation configuration
70 raise_on_rag_error: If True, raise exceptions on RAG failures;
71 if False (default), log warning and continue
73 Raises:
74 TypeError: If any adapter is async (use AsyncPromptBuilder instead)
75 """
76 super().__init__(library, adapters, default_validation, raise_on_rag_error)
77 self._validate_adapters()
79 def _validate_adapters(self) -> None:
80 """Validate that all adapters are synchronous.
82 Raises:
83 TypeError: If any adapter is async
84 """
85 for name, adapter in self.adapters.items():
86 if adapter.is_async():
87 raise TypeError(
88 f"Adapter '{name}' is async. "
89 "Use AsyncPromptBuilder for async adapters."
90 )
92 def render_system_prompt(
93 self,
94 name: str,
95 params: Optional[Dict[str, Any]] = None,
96 include_rag: bool = True,
97 validation_override: Optional[ValidationLevel] = None,
98 return_rag_metadata: bool = False,
99 cached_rag: Optional[Dict[str, Any]] = None,
100 **kwargs: Any
101 ) -> RenderResult:
102 """Render a system prompt with parameters and optional RAG content.
104 Args:
105 name: System prompt identifier
106 params: Runtime parameters to use in rendering
107 include_rag: Whether to include RAG content (default: True)
108 validation_override: Override validation level for this render
109 return_rag_metadata: If True, attach RAG metadata to result
110 cached_rag: If provided, use these cached RAG results instead
111 of executing new searches
112 **kwargs: Additional parameters passed to library
114 Returns:
115 RenderResult with rendered content and metadata
117 Raises:
118 ValueError: If prompt not found or validation fails
120 Example:
121 >>> # Capture RAG metadata
122 >>> result = builder.render_system_prompt(
123 ... 'code_question',
124 ... params={'language': 'python'},
125 ... return_rag_metadata=True
126 ... )
127 >>> print(result.rag_metadata)
128 >>>
129 >>> # Reuse cached RAG
130 >>> result2 = builder.render_system_prompt(
131 ... 'code_question',
132 ... params={'language': 'python'},
133 ... cached_rag=result.rag_metadata
134 ... )
135 """
136 params = params or {}
138 # Retrieve template from library
139 template_dict = self.library.get_system_prompt(name, **kwargs)
140 if template_dict is None:
141 raise ValueError(f"System prompt not found: {name}")
143 # Render the prompt
144 return self._render_prompt_impl(
145 prompt_name=name,
146 prompt_type="system",
147 template_dict=template_dict,
148 runtime_params=params,
149 include_rag=include_rag,
150 validation_override=validation_override,
151 return_rag_metadata=return_rag_metadata,
152 cached_rag=cached_rag,
153 **kwargs
154 )
156 def render_user_prompt(
157 self,
158 name: str,
159 params: Optional[Dict[str, Any]] = None,
160 include_rag: bool = True,
161 validation_override: Optional[ValidationLevel] = None,
162 return_rag_metadata: bool = False,
163 cached_rag: Optional[Dict[str, Any]] = None,
164 **kwargs: Any
165 ) -> RenderResult:
166 """Render a user prompt with parameters and optional RAG content.
168 Args:
169 name: User prompt identifier
170 params: Runtime parameters to use in rendering
171 include_rag: Whether to include RAG content (default: True)
172 validation_override: Override validation level for this render
173 return_rag_metadata: If True, attach RAG metadata to result
174 cached_rag: If provided, use these cached RAG results instead
175 of executing new searches
176 **kwargs: Additional parameters passed to library
178 Returns:
179 RenderResult with rendered content and metadata
181 Raises:
182 ValueError: If prompt not found or validation fails
183 """
184 params = params or {}
186 # Retrieve template from library
187 template_dict = self.library.get_user_prompt(name, **kwargs)
188 if template_dict is None:
189 raise ValueError(f"User prompt not found: {name}")
191 # Render the prompt
192 return self._render_prompt_impl(
193 prompt_name=name,
194 prompt_type="user",
195 template_dict=template_dict,
196 runtime_params=params,
197 include_rag=include_rag,
198 validation_override=validation_override,
199 return_rag_metadata=return_rag_metadata,
200 cached_rag=cached_rag,
201 **kwargs
202 )
204 def _render_prompt_impl(
205 self,
206 prompt_name: str,
207 prompt_type: str,
208 template_dict: PromptTemplate,
209 runtime_params: Dict[str, Any],
210 include_rag: bool,
211 validation_override: Optional[ValidationLevel],
212 return_rag_metadata: bool = False,
213 cached_rag: Optional[Dict[str, Any]] = None,
214 **kwargs: Any
215 ) -> RenderResult:
216 """Internal method to render a prompt template synchronously.
218 Args:
219 prompt_name: Name of the prompt
220 prompt_type: Type of prompt ("system" or "user")
221 template_dict: Template dictionary from library
222 runtime_params: Runtime parameters
223 include_rag: Whether to include RAG content
224 validation_override: Validation level override
225 return_rag_metadata: If True, capture and return RAG metadata
226 cached_rag: If provided, use these cached RAG results instead
227 of executing new searches
228 **kwargs: Additional parameters
230 Returns:
231 RenderResult with rendered content and metadata
232 """
233 # Extract template components
234 template = template_dict.get("template", "")
235 template_metadata = template_dict.get("metadata", {})
237 # Step 1: Merge defaults with runtime params
238 all_params = self._merge_params_with_defaults(template_dict, runtime_params)
240 # Step 2: Execute or reuse RAG searches
241 rag_metadata = None
242 if include_rag:
243 if cached_rag:
244 # Use cached RAG results
245 rag_content = self._extract_formatted_content_from_cache(cached_rag)
246 if return_rag_metadata:
247 rag_metadata = cached_rag # Pass through cached metadata
248 else:
249 # Execute fresh RAG searches
250 rag_content, rag_metadata = self._execute_rag_searches_impl(
251 prompt_name=prompt_name,
252 prompt_type=prompt_type,
253 params=all_params,
254 capture_metadata=return_rag_metadata,
255 **kwargs
256 )
258 # Merge RAG content into parameters
259 all_params.update(rag_content)
261 # Step 3: Prepare validation config with override
262 validation_config = self._prepare_validation_config(template_dict, validation_override)
264 # Step 4: Render template with validation
265 result = self._renderer.render(
266 template=template,
267 params=all_params,
268 validation=validation_config,
269 template_metadata=template_metadata
270 )
272 # Attach RAG metadata if requested
273 if return_rag_metadata and rag_metadata:
274 result.rag_metadata = rag_metadata
276 # Add builder metadata
277 result.metadata.update({
278 "prompt_name": prompt_name,
279 "prompt_type": prompt_type,
280 "include_rag": include_rag,
281 "used_cached_rag": cached_rag is not None,
282 })
284 return result
286 def _execute_rag_searches_impl(
287 self,
288 prompt_name: str,
289 prompt_type: str,
290 params: Dict[str, Any],
291 capture_metadata: bool = False,
292 **kwargs: Any
293 ) -> tuple[Dict[str, str], Optional[Dict[str, Any]]]:
294 """Execute RAG searches and format results for injection.
296 Args:
297 prompt_name: Name of the prompt
298 prompt_type: Type of prompt ("system" or "user")
299 params: Resolved parameters for query templating
300 capture_metadata: If True, capture RAG metadata
301 **kwargs: Additional parameters
303 Returns:
304 Tuple of (rag_content, rag_metadata):
305 - rag_content: Dictionary mapping placeholder names to formatted content
306 - rag_metadata: Optional dict with full RAG details (if capture_metadata=True)
307 """
308 # Get RAG configurations for this prompt
309 rag_configs = self.library.get_prompt_rag_configs(
310 prompt_name=prompt_name,
311 prompt_type=prompt_type,
312 **kwargs
313 )
315 if not rag_configs:
316 return {}, None
318 rag_content = {}
319 rag_metadata = {} if capture_metadata else None
321 for rag_config in rag_configs:
322 placeholder = rag_config.get("placeholder", "RAG_CONTENT")
324 try:
325 if capture_metadata:
326 # Execute with metadata capture
327 formatted_content, metadata = self._execute_single_rag_with_metadata(
328 rag_config, params
329 )
330 rag_content[placeholder] = formatted_content
331 if metadata:
332 rag_metadata[placeholder] = metadata
333 else:
334 # Execute without metadata (faster)
335 content = self._execute_single_rag_search(rag_config, params)
336 rag_content[placeholder] = content
338 except Exception as e:
339 error_msg = f"RAG search failed for {prompt_name}: {e}"
340 if self._raise_on_rag_error:
341 raise RuntimeError(error_msg) from e
342 else:
343 logger.warning(error_msg)
344 # Use empty content on failure
345 rag_content[placeholder] = ""
346 if capture_metadata:
347 from datetime import datetime
348 rag_metadata[placeholder] = {
349 "error": str(e),
350 "timestamp": datetime.now().isoformat()
351 }
353 return rag_content, rag_metadata
355 def _execute_single_rag_search(
356 self,
357 rag_config: RAGConfig,
358 params: Dict[str, Any]
359 ) -> str:
360 """Execute a single RAG search and format results.
362 Args:
363 rag_config: RAG configuration
364 params: Parameters for query templating
366 Returns:
367 Formatted RAG content string
369 Raises:
370 KeyError: If adapter not found
371 Exception: If search fails
372 """
373 # Get adapter
374 adapter_name = rag_config.get("adapter_name")
375 if not adapter_name:
376 raise ValueError("RAG config missing 'adapter_name'")
378 if adapter_name not in self.adapters:
379 raise KeyError(
380 f"Adapter '{adapter_name}' not found. "
381 f"Available adapters: {list(self.adapters.keys())}"
382 )
384 adapter = self.adapters[adapter_name]
386 # Render query template
387 query_template = rag_config.get("query", "")
388 query = self._render_rag_query(query_template, params)
390 # Execute search (synchronous)
391 k = rag_config.get("k", 5)
392 filters = rag_config.get("filters")
393 search_results = adapter.search(query=query, k=k, filters=filters)
395 # Format results
396 formatted_content = self._format_rag_results(
397 results=search_results,
398 rag_config=rag_config,
399 params=params
400 )
402 return formatted_content
404 def _execute_single_rag_with_metadata(
405 self,
406 rag_config: RAGConfig,
407 params: Dict[str, Any]
408 ) -> tuple[str, Dict[str, Any]]:
409 """Execute a single RAG search with metadata capture.
411 This method executes a RAG search and captures detailed metadata
412 including the query, results, and query hash for caching.
414 Args:
415 rag_config: RAG configuration
416 params: Parameters for query templating
418 Returns:
419 Tuple of (formatted_content, metadata):
420 - formatted_content: Formatted RAG content string
421 - metadata: Dictionary with RAG metadata including:
422 - adapter_name: Name of the adapter used
423 - query: Rendered query string
424 - query_hash: SHA256 hash for cache matching
425 - k: Number of results requested
426 - filters: Filters applied to search
427 - timestamp: ISO format timestamp
428 - results: Raw search results
429 - formatted_content: Formatted output
430 - item_template: Template used for formatting
431 - header: Header text used
433 Raises:
434 KeyError: If adapter not found
435 Exception: If search fails
436 """
437 from datetime import datetime
439 # Get adapter
440 adapter_name = rag_config.get("adapter_name")
441 if not adapter_name:
442 raise ValueError("RAG config missing 'adapter_name'")
444 if adapter_name not in self.adapters:
445 raise KeyError(
446 f"Adapter '{adapter_name}' not found. "
447 f"Available adapters: {list(self.adapters.keys())}"
448 )
450 adapter = self.adapters[adapter_name]
452 # Render query template
453 query_template = rag_config.get("query", "")
454 query = self._render_rag_query(query_template, params)
456 # Compute query hash for cache matching
457 query_hash = self._compute_rag_query_hash(adapter_name, query)
459 # Execute search (synchronous)
460 k = rag_config.get("k", 5)
461 filters = rag_config.get("filters")
462 search_results = adapter.search(query=query, k=k, filters=filters)
464 # Format results
465 formatted_content = self._format_rag_results(
466 results=search_results,
467 rag_config=rag_config,
468 params=params
469 )
471 # Build metadata
472 metadata = {
473 "adapter_name": adapter_name,
474 "query": query,
475 "query_hash": query_hash,
476 "k": k,
477 "filters": filters,
478 "timestamp": datetime.now().isoformat(),
479 "results": search_results, # Store raw results
480 "formatted_content": formatted_content,
481 "item_template": rag_config.get("item_template"),
482 "header": rag_config.get("header"),
483 }
485 return formatted_content, metadata