Coverage for src / dataknobs_llm / prompts / builders / prompt_builder.py: 15%
101 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-15 10:28 -0700
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-15 10:28 -0700
1"""Synchronous prompt builder for constructing prompts with parameter resolution and RAG.
3This module provides the PromptBuilder class which coordinates between:
4- Prompt libraries (template sources)
5- Resource adapters (data sources)
6- Template renderer (rendering engine)
8The builder handles:
9- Parameter resolution from multiple sources
10- RAG content retrieval and injection
11- Validation enforcement
12- Template defaults merging
13"""
15import logging
16from typing import Any, Dict
18from ..base import (
19 AbstractPromptLibrary,
20 PromptTemplateDict,
21 RAGConfig,
22 ValidationLevel,
23 RenderResult,
24)
25from ..adapters import ResourceAdapter
26from .base_prompt_builder import BasePromptBuilder
28logger = logging.getLogger(__name__)
31class PromptBuilder(BasePromptBuilder):
32 """Synchronous prompt builder for constructing prompts with RAG and validation.
34 This class provides a high-level API for building prompts by:
35 1. Retrieving prompt templates from a library
36 2. Resolving parameters from adapters and runtime values
37 3. Executing RAG searches via adapters
38 4. Injecting RAG content into templates
39 5. Rendering final prompts with validation
41 Example:
42 >>> library = ConfigPromptLibrary(config)
43 >>> adapters = {
44 ... 'config': DictResourceAdapter(config_data),
45 ... 'docs': DataknobsBackendAdapter(docs_db)
46 ... }
47 >>> builder = PromptBuilder(library=library, adapters=adapters)
48 >>>
49 >>> # Render a system prompt
50 >>> result = builder.render_system_prompt(
51 ... 'analyze_code',
52 ... params={'code': code_snippet, 'language': 'python'}
53 ... )
54 """
56 def __init__(
57 self,
58 library: AbstractPromptLibrary,
59 adapters: Dict[str, ResourceAdapter] | None = None,
60 default_validation: ValidationLevel = ValidationLevel.WARN,
61 raise_on_rag_error: bool = False
62 ):
63 """Initialize the synchronous prompt builder.
65 Args:
66 library: Prompt library to retrieve templates from
67 adapters: Dictionary of named resource adapters for parameter
68 resolution and RAG searches
69 default_validation: Default validation level for templates without
70 explicit validation configuration
71 raise_on_rag_error: If True, raise exceptions on RAG failures;
72 if False (default), log warning and continue
74 Raises:
75 TypeError: If any adapter is async (use AsyncPromptBuilder instead)
76 """
77 super().__init__(library, adapters, default_validation, raise_on_rag_error)
78 self._validate_adapters()
80 def _validate_adapters(self) -> None:
81 """Validate that all adapters are synchronous.
83 Raises:
84 TypeError: If any adapter is async
85 """
86 for name, adapter in self.adapters.items():
87 if adapter.is_async():
88 raise TypeError(
89 f"Adapter '{name}' is async. "
90 "Use AsyncPromptBuilder for async adapters."
91 )
93 def render_system_prompt(
94 self,
95 name: str,
96 params: Dict[str, Any] | None = None,
97 include_rag: bool = True,
98 validation_override: ValidationLevel | None = None,
99 return_rag_metadata: bool = False,
100 cached_rag: Dict[str, Any] | None = None,
101 **kwargs: Any
102 ) -> RenderResult:
103 """Render a system prompt with parameters and optional RAG content.
105 Args:
106 name: System prompt identifier
107 params: Runtime parameters to use in rendering
108 include_rag: Whether to include RAG content (default: True)
109 validation_override: Override validation level for this render
110 return_rag_metadata: If True, attach RAG metadata to result
111 cached_rag: If provided, use these cached RAG results instead
112 of executing new searches
113 **kwargs: Additional parameters passed to library
115 Returns:
116 RenderResult with rendered content and metadata
118 Raises:
119 ValueError: If prompt not found or validation fails
121 Example:
122 >>> # Capture RAG metadata
123 >>> result = builder.render_system_prompt(
124 ... 'code_question',
125 ... params={'language': 'python'},
126 ... return_rag_metadata=True
127 ... )
128 >>> print(result.rag_metadata)
129 >>>
130 >>> # Reuse cached RAG
131 >>> result2 = builder.render_system_prompt(
132 ... 'code_question',
133 ... params={'language': 'python'},
134 ... cached_rag=result.rag_metadata
135 ... )
136 """
137 params = params or {}
139 # Retrieve template from library
140 template_dict = self.library.get_system_prompt(name, **kwargs)
141 if template_dict is None:
142 raise ValueError(f"System prompt not found: {name}")
144 # Render the prompt
145 return self._render_prompt_impl(
146 prompt_name=name,
147 prompt_type="system",
148 template_dict=template_dict,
149 runtime_params=params,
150 include_rag=include_rag,
151 validation_override=validation_override,
152 return_rag_metadata=return_rag_metadata,
153 cached_rag=cached_rag,
154 **kwargs
155 )
157 def render_user_prompt(
158 self,
159 name: str,
160 params: Dict[str, Any] | None = None,
161 include_rag: bool = True,
162 validation_override: ValidationLevel | None = None,
163 return_rag_metadata: bool = False,
164 cached_rag: Dict[str, Any] | None = None,
165 **kwargs: Any
166 ) -> RenderResult:
167 """Render a user prompt with parameters and optional RAG content.
169 Args:
170 name: User prompt identifier
171 params: Runtime parameters to use in rendering
172 include_rag: Whether to include RAG content (default: True)
173 validation_override: Override validation level for this render
174 return_rag_metadata: If True, attach RAG metadata to result
175 cached_rag: If provided, use these cached RAG results instead
176 of executing new searches
177 **kwargs: Additional parameters passed to library
179 Returns:
180 RenderResult with rendered content and metadata
182 Raises:
183 ValueError: If prompt not found or validation fails
184 """
185 params = params or {}
187 # Retrieve template from library
188 template_dict = self.library.get_user_prompt(name, **kwargs)
189 if template_dict is None:
190 raise ValueError(f"User prompt not found: {name}")
192 # Render the prompt
193 return self._render_prompt_impl(
194 prompt_name=name,
195 prompt_type="user",
196 template_dict=template_dict,
197 runtime_params=params,
198 include_rag=include_rag,
199 validation_override=validation_override,
200 return_rag_metadata=return_rag_metadata,
201 cached_rag=cached_rag,
202 **kwargs
203 )
205 def _render_prompt_impl(
206 self,
207 prompt_name: str,
208 prompt_type: str,
209 template_dict: PromptTemplateDict,
210 runtime_params: Dict[str, Any],
211 include_rag: bool,
212 validation_override: ValidationLevel | None,
213 return_rag_metadata: bool = False,
214 cached_rag: Dict[str, Any] | None = None,
215 **kwargs: Any
216 ) -> RenderResult:
217 """Internal method to render a prompt template synchronously.
219 Args:
220 prompt_name: Name of the prompt
221 prompt_type: Type of prompt ("system" or "user")
222 template_dict: Template dictionary from library
223 runtime_params: Runtime parameters
224 include_rag: Whether to include RAG content
225 validation_override: Validation level override
226 return_rag_metadata: If True, capture and return RAG metadata
227 cached_rag: If provided, use these cached RAG results instead
228 of executing new searches
229 **kwargs: Additional parameters
231 Returns:
232 RenderResult with rendered content and metadata
233 """
234 # Extract template components
235 template = template_dict.get("template", "")
236 template_metadata = template_dict.get("metadata", {})
238 # Step 1: Merge defaults with runtime params
239 all_params = self._merge_params_with_defaults(template_dict, runtime_params)
241 # Step 2: Execute or reuse RAG searches
242 rag_metadata = None
243 if include_rag:
244 if cached_rag:
245 # Use cached RAG results
246 rag_content = self._extract_formatted_content_from_cache(cached_rag)
247 if return_rag_metadata:
248 rag_metadata = cached_rag # Pass through cached metadata
249 else:
250 # Execute fresh RAG searches
251 rag_content, rag_metadata = self._execute_rag_searches_impl(
252 prompt_name=prompt_name,
253 prompt_type=prompt_type,
254 params=all_params,
255 capture_metadata=return_rag_metadata,
256 **kwargs
257 )
259 # Merge RAG content into parameters
260 all_params.update(rag_content)
262 # Step 3: Prepare validation config with override
263 validation_config = self._prepare_validation_config(template_dict, validation_override)
265 # Step 4: Render template with validation
266 result = self._renderer.render(
267 template=template,
268 params=all_params,
269 validation=validation_config,
270 template_metadata=template_metadata
271 )
273 # Attach RAG metadata if requested
274 if return_rag_metadata and rag_metadata:
275 result.rag_metadata = rag_metadata
277 # Add builder metadata
278 result.metadata.update({
279 "prompt_name": prompt_name,
280 "prompt_type": prompt_type,
281 "include_rag": include_rag,
282 "used_cached_rag": cached_rag is not None,
283 })
285 return result
287 def _execute_rag_searches_impl(
288 self,
289 prompt_name: str,
290 prompt_type: str,
291 params: Dict[str, Any],
292 capture_metadata: bool = False,
293 **kwargs: Any
294 ) -> tuple[Dict[str, str], Dict[str, Any] | None]:
295 """Execute RAG searches and format results for injection.
297 Args:
298 prompt_name: Name of the prompt
299 prompt_type: Type of prompt ("system" or "user")
300 params: Resolved parameters for query templating
301 capture_metadata: If True, capture RAG metadata
302 **kwargs: Additional parameters
304 Returns:
305 Tuple of (rag_content, rag_metadata):
306 - rag_content: Dictionary mapping placeholder names to formatted content
307 - rag_metadata: Optional dict with full RAG details (if capture_metadata=True)
308 """
309 # Get RAG configurations for this prompt
310 rag_configs = self.library.get_prompt_rag_configs(
311 prompt_name=prompt_name,
312 prompt_type=prompt_type,
313 **kwargs
314 )
316 if not rag_configs:
317 return {}, None
319 rag_content = {}
320 rag_metadata = {} if capture_metadata else None
322 for rag_config in rag_configs:
323 placeholder = rag_config.get("placeholder", "RAG_CONTENT")
325 try:
326 if capture_metadata:
327 # Execute with metadata capture
328 formatted_content, metadata = self._execute_single_rag_with_metadata(
329 rag_config, params
330 )
331 rag_content[placeholder] = formatted_content
332 if metadata and rag_metadata is not None:
333 rag_metadata[placeholder] = metadata
334 else:
335 # Execute without metadata (faster)
336 content = self._execute_single_rag_search(rag_config, params)
337 rag_content[placeholder] = content
339 except Exception as e:
340 error_msg = f"RAG search failed for {prompt_name}: {e}"
341 if self._raise_on_rag_error:
342 raise RuntimeError(error_msg) from e
343 else:
344 logger.warning(error_msg)
345 # Use empty content on failure
346 rag_content[placeholder] = ""
347 if capture_metadata and rag_metadata is not None:
348 from datetime import datetime
349 rag_metadata[placeholder] = {
350 "error": str(e),
351 "timestamp": datetime.now().isoformat()
352 }
354 return rag_content, rag_metadata
356 def _execute_single_rag_search(
357 self,
358 rag_config: RAGConfig,
359 params: Dict[str, Any]
360 ) -> str:
361 """Execute a single RAG search and format results.
363 Args:
364 rag_config: RAG configuration
365 params: Parameters for query templating
367 Returns:
368 Formatted RAG content string
370 Raises:
371 KeyError: If adapter not found
372 Exception: If search fails
373 """
374 # Get adapter
375 adapter_name = rag_config.get("adapter_name")
376 if not adapter_name:
377 raise ValueError("RAG config missing 'adapter_name'")
379 if adapter_name not in self.adapters:
380 raise KeyError(
381 f"Adapter '{adapter_name}' not found. "
382 f"Available adapters: {list(self.adapters.keys())}"
383 )
385 adapter = self.adapters[adapter_name]
387 # Render query template
388 query_template = rag_config.get("query", "")
389 query = self._render_rag_query(query_template, params)
391 # Execute search (synchronous)
392 k = rag_config.get("k", 5)
393 filters = rag_config.get("filters")
394 search_results = adapter.search(query=query, k=k, filters=filters)
396 # Format results
397 formatted_content = self._format_rag_results(
398 results=search_results,
399 rag_config=rag_config,
400 params=params
401 )
403 return formatted_content
405 def _execute_single_rag_with_metadata(
406 self,
407 rag_config: RAGConfig,
408 params: Dict[str, Any]
409 ) -> tuple[str, Dict[str, Any]]:
410 """Execute a single RAG search with metadata capture.
412 This method executes a RAG search and captures detailed metadata
413 including the query, results, and query hash for caching.
415 Args:
416 rag_config: RAG configuration
417 params: Parameters for query templating
419 Returns:
420 Tuple of (formatted_content, metadata):
421 - formatted_content: Formatted RAG content string
422 - metadata: Dictionary with RAG metadata including:
423 - adapter_name: Name of the adapter used
424 - query: Rendered query string
425 - query_hash: SHA256 hash for cache matching
426 - k: Number of results requested
427 - filters: Filters applied to search
428 - timestamp: ISO format timestamp
429 - results: Raw search results
430 - formatted_content: Formatted output
431 - item_template: Template used for formatting
432 - header: Header text used
434 Raises:
435 KeyError: If adapter not found
436 Exception: If search fails
437 """
438 from datetime import datetime
440 # Get adapter
441 adapter_name = rag_config.get("adapter_name")
442 if not adapter_name:
443 raise ValueError("RAG config missing 'adapter_name'")
445 if adapter_name not in self.adapters:
446 raise KeyError(
447 f"Adapter '{adapter_name}' not found. "
448 f"Available adapters: {list(self.adapters.keys())}"
449 )
451 adapter = self.adapters[adapter_name]
453 # Render query template
454 query_template = rag_config.get("query", "")
455 query = self._render_rag_query(query_template, params)
457 # Compute query hash for cache matching
458 query_hash = self._compute_rag_query_hash(adapter_name, query)
460 # Execute search (synchronous)
461 k = rag_config.get("k", 5)
462 filters = rag_config.get("filters")
463 search_results = adapter.search(query=query, k=k, filters=filters)
465 # Format results
466 formatted_content = self._format_rag_results(
467 results=search_results,
468 rag_config=rag_config,
469 params=params
470 )
472 # Build metadata
473 metadata = {
474 "adapter_name": adapter_name,
475 "query": query,
476 "query_hash": query_hash,
477 "k": k,
478 "filters": filters,
479 "timestamp": datetime.now().isoformat(),
480 "results": search_results, # Store raw results
481 "formatted_content": formatted_content,
482 "item_template": rag_config.get("item_template"),
483 "header": rag_config.get("header"),
484 }
486 return formatted_content, metadata