Coverage for src/dataknobs_llm/prompts/implementations/filesystem_library.py: 87%
134 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-08 13:51 -0700
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-08 13:51 -0700
1"""Filesystem-based prompt library implementation.
3This module provides a prompt library that loads prompts from a directory structure
4on the filesystem. Supports YAML and JSON formats.
6Directory Structure:
7 prompts/
8 system/
9 analyze_code.yaml
10 review_pr.yaml
11 user/
12 code_question.yaml
13 followup_question.yaml
14 messages/
15 conversation.yaml
17File Format (YAML):
18 template: |
19 Analyze this {{language}} code:
20 {{code}}
21 defaults:
22 language: python
23 validation:
24 level: error
25 required_params:
26 - code
27 metadata:
28 author: alice
29 version: "1.0"
30"""
32import json
33import logging
34import yaml
35from pathlib import Path
36from typing import Any, Dict, List, Union
38from ..base import (
39 BasePromptLibrary,
40 PromptTemplateDict,
41 RAGConfig,
42 MessageIndex,
43)
45logger = logging.getLogger(__name__)
48class FileSystemPromptLibrary(BasePromptLibrary):
49 """Prompt library that loads prompts from filesystem directory.
51 Features:
52 - Supports YAML and JSON file formats
53 - Organized directory structure (system/, user/, messages/)
54 - Caching of loaded prompts for performance
55 - Automatic file discovery and loading
56 - Validation config parsing from files
58 Example:
59 >>> library = FileSystemPromptLibrary(Path("prompts/"))
60 >>> template = library.get_system_prompt("analyze_code")
61 >>> print(template["template"])
62 """
64 def __init__(
65 self,
66 prompt_dir: Union[str, Path],
67 auto_load: bool = True,
68 file_extensions: List[str] | None = None
69 ):
70 """Initialize filesystem prompt library.
72 Args:
73 prompt_dir: Root directory containing prompt files
74 auto_load: Whether to automatically load all prompts on init (default: True)
75 file_extensions: List of file extensions to load (default: [".yaml", ".yml", ".json"])
76 """
77 super().__init__()
79 self.prompt_dir = Path(prompt_dir)
80 self.file_extensions = file_extensions or [".yaml", ".yml", ".json"]
82 # Validate directory exists
83 if not self.prompt_dir.exists():
84 raise ValueError(f"Prompt directory does not exist: {self.prompt_dir}")
86 if not self.prompt_dir.is_dir():
87 raise ValueError(f"Prompt path is not a directory: {self.prompt_dir}")
89 # Auto-load prompts if requested
90 if auto_load:
91 self.load_all()
93 def load_all(self) -> None:
94 """Load all prompts from the filesystem directory."""
95 self._load_system_prompts()
96 self._load_user_prompts()
97 self._load_message_indexes()
98 self._load_rag_configs()
100 def _load_system_prompts(self) -> None:
101 """Load all system prompts from system/ directory."""
102 system_dir = self.prompt_dir / "system"
103 if not system_dir.exists():
104 logger.debug(f"System prompts directory not found: {system_dir}")
105 return
107 for file_path in system_dir.iterdir():
108 if file_path.is_file() and file_path.suffix in self.file_extensions:
109 name = file_path.stem
110 try:
111 template = self._load_prompt_template(file_path)
112 self._cache_system_prompt(name, template)
113 logger.debug(f"Loaded system prompt: {name}")
114 except Exception as e:
115 logger.error(f"Error loading system prompt {name}: {e}")
117 def _load_user_prompts(self) -> None:
118 """Load all user prompts from user/ directory.
120 User prompts are loaded by name. Files should be named:
121 - question.yaml
122 - followup_question.yaml
123 - etc.
124 """
125 user_dir = self.prompt_dir / "user"
126 if not user_dir.exists():
127 logger.debug(f"User prompts directory not found: {user_dir}")
128 return
130 for file_path in user_dir.iterdir():
131 if file_path.is_file() and file_path.suffix in self.file_extensions:
132 name = file_path.stem
133 try:
134 template = self._load_prompt_template(file_path)
135 self._cache_user_prompt(name, template)
136 logger.debug(f"Loaded user prompt: {name}")
137 except Exception as e:
138 logger.error(f"Error loading user prompt {name}: {e}")
140 def _load_message_indexes(self) -> None:
141 """Load all message indexes from messages/ directory."""
142 messages_dir = self.prompt_dir / "messages"
143 if not messages_dir.exists():
144 logger.debug(f"Message indexes directory not found: {messages_dir}")
145 return
147 for file_path in messages_dir.iterdir():
148 if file_path.is_file() and file_path.suffix in self.file_extensions:
149 name = file_path.stem
150 try:
151 message_index = self._load_message_index(file_path)
152 self._cache_message_index(name, message_index)
153 logger.debug(f"Loaded message index: {name}")
154 except Exception as e:
155 logger.error(f"Error loading message index {name}: {e}")
157 def _load_rag_configs(self) -> None:
158 """Load all RAG configurations from rag/ directory."""
159 rag_dir = self.prompt_dir / "rag"
160 if not rag_dir.exists():
161 logger.debug(f"RAG configs directory not found: {rag_dir}")
162 return
164 for file_path in rag_dir.iterdir():
165 if file_path.is_file() and file_path.suffix in self.file_extensions:
166 name = file_path.stem
167 try:
168 rag_config = self._load_rag_config(file_path)
169 self._cache_rag_config(name, rag_config)
170 logger.debug(f"Loaded RAG config: {name}")
171 except Exception as e:
172 logger.error(f"Error loading RAG config {name}: {e}")
174 def _load_prompt_template(self, file_path: Path) -> PromptTemplateDict:
175 """Load a prompt template from a file.
177 Args:
178 file_path: Path to the prompt template file
180 Returns:
181 PromptTemplateDict dictionary
182 """
183 data = self._load_file(file_path)
185 # Use inherited _parse_prompt_template for consistent parsing
186 # This supports templates with 'extends' but no 'template' field
187 return self._parse_prompt_template(data)
189 def _load_message_index(self, file_path: Path) -> MessageIndex:
190 """Load a message index from a file.
192 Args:
193 file_path: Path to the message index file
195 Returns:
196 MessageIndex dictionary
197 """
198 data = self._load_file(file_path)
200 # Build MessageIndex
201 message_index: MessageIndex = {
202 "messages": data.get("messages", []),
203 }
205 # Add optional fields
206 if "rag_configs" in data:
207 message_index["rag_configs"] = [
208 self._parse_rag_config(rag_data)
209 for rag_data in data["rag_configs"]
210 ]
212 if "metadata" in data:
213 message_index["metadata"] = data["metadata"]
215 return message_index
217 def _load_rag_config(self, file_path: Path) -> RAGConfig:
218 """Load a RAG configuration from a file.
220 Args:
221 file_path: Path to the RAG config file
223 Returns:
224 RAGConfig dictionary
225 """
226 data = self._load_file(file_path)
227 return self._parse_rag_config(data)
229 def _load_file(self, file_path: Path) -> Dict[str, Any]:
230 """Load and parse a YAML or JSON file.
232 Args:
233 file_path: Path to the file
235 Returns:
236 Parsed file contents as dictionary
238 Raises:
239 ValueError: If file format is unsupported or parsing fails
240 """
241 try:
242 with open(file_path, encoding='utf-8') as f:
243 content = f.read()
245 if file_path.suffix in [".yaml", ".yml"]:
246 return yaml.safe_load(content) or {}
248 elif file_path.suffix == ".json":
249 return json.loads(content)
251 else:
252 raise ValueError(f"Unsupported file extension: {file_path.suffix}")
254 except Exception as e:
255 raise ValueError(f"Error loading file {file_path}: {e}") from e
257 # Note: _parse_prompt_template(), _parse_validation_config(), and
258 # _parse_rag_config() are now inherited from BasePromptLibrary
260 def get_system_prompt(self, name: str, **kwargs: Any) -> PromptTemplateDict | None:
261 """Get a system prompt by name.
263 Args:
264 name: System prompt name
265 **kwargs: Additional arguments (unused in filesystem library)
267 Returns:
268 PromptTemplateDict if found, None otherwise
269 """
270 return self._get_cached_system_prompt(name)
272 def get_user_prompt(self, name: str, **kwargs: Any) -> PromptTemplateDict | None:
273 """Get a user prompt by name.
275 Args:
276 name: User prompt name
277 **kwargs: Additional arguments (unused in filesystem library)
279 Returns:
280 PromptTemplateDict if found, None otherwise
281 """
282 return self._get_cached_user_prompt(name)
284 def get_message_index(self, name: str, **kwargs: Any) -> MessageIndex | None:
285 """Get a message index by name.
287 Args:
288 name: Message index name
289 **kwargs: Additional arguments (unused in filesystem library)
291 Returns:
292 MessageIndex if found, None otherwise
293 """
294 return self._get_cached_message_index(name)
296 def get_rag_config(self, name: str, **kwargs: Any) -> RAGConfig | None:
297 """Get a standalone RAG configuration by name.
299 Args:
300 name: RAG config name
301 **kwargs: Additional arguments (unused in filesystem library)
303 Returns:
304 RAGConfig if found, None otherwise
305 """
306 return self._get_cached_rag_config(name)
308 def get_prompt_rag_configs(
309 self,
310 prompt_name: str,
311 prompt_type: str = "user",
312 **kwargs: Any
313 ) -> List[RAGConfig]:
314 """Get RAG configurations for a specific prompt.
316 Resolves both inline RAG configs and references to standalone configs.
318 Args:
319 prompt_name: Prompt name
320 prompt_type: Type of prompt ("user" or "system")
321 **kwargs: Additional arguments (unused)
323 Returns:
324 List of RAGConfig (empty if none defined)
325 """
326 # Get the prompt template
327 if prompt_type == "system":
328 template = self.get_system_prompt(prompt_name)
329 else:
330 template = self.get_user_prompt(prompt_name)
332 if template is None:
333 return []
335 configs = []
337 # Get inline RAG configs from template
338 if "rag_configs" in template:
339 configs.extend(template["rag_configs"])
341 # Resolve RAG config references
342 if "rag_config_refs" in template:
343 for ref_name in template["rag_config_refs"]:
344 ref_config = self.get_rag_config(ref_name)
345 if ref_config:
346 configs.append(ref_config)
347 else:
348 logger.warning(
349 f"RAG config reference '{ref_name}' not found "
350 f"for prompt '{prompt_name}'"
351 )
353 return configs
355 def list_system_prompts(self) -> List[str]:
356 """List all available system prompt names.
358 Returns:
359 List of system prompt identifiers
360 """
361 return list(self._system_prompt_cache.keys())
363 def list_user_prompts(self) -> List[str]:
364 """List available user prompts.
366 Returns:
367 List of user prompt names
368 """
369 return list(self._user_prompt_cache.keys())
371 def list_message_indexes(self) -> List[str]:
372 """List all available message index names.
374 Returns:
375 List of message index identifiers
376 """
377 return list(self._message_index_cache.keys())