Coverage for src/dataknobs_llm/prompts/implementations/filesystem_library.py: 19%

134 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-31 15:21 -0600

1"""Filesystem-based prompt library implementation. 

2 

3This module provides a prompt library that loads prompts from a directory structure 

4on the filesystem. Supports YAML and JSON formats. 

5 

6Directory Structure: 

7 prompts/ 

8 system/ 

9 analyze_code.yaml 

10 review_pr.yaml 

11 user/ 

12 code_question.yaml 

13 followup_question.yaml 

14 messages/ 

15 conversation.yaml 

16 

17File Format (YAML): 

18 template: | 

19 Analyze this {{language}} code: 

20 {{code}} 

21 defaults: 

22 language: python 

23 validation: 

24 level: error 

25 required_params: 

26 - code 

27 metadata: 

28 author: alice 

29 version: "1.0" 

30""" 

31 

32import json 

33import logging 

34import yaml 

35from pathlib import Path 

36from typing import Any, Dict, List, Optional, Union 

37 

38from ..base import ( 

39 BasePromptLibrary, 

40 PromptTemplate, 

41 RAGConfig, 

42 MessageIndex, 

43 ValidationConfig, 

44 ValidationLevel, 

45) 

46 

47logger = logging.getLogger(__name__) 

48 

49 

50class FileSystemPromptLibrary(BasePromptLibrary): 

51 """Prompt library that loads prompts from filesystem directory. 

52 

53 Features: 

54 - Supports YAML and JSON file formats 

55 - Organized directory structure (system/, user/, messages/) 

56 - Caching of loaded prompts for performance 

57 - Automatic file discovery and loading 

58 - Validation config parsing from files 

59 

60 Example: 

61 >>> library = FileSystemPromptLibrary(Path("prompts/")) 

62 >>> template = library.get_system_prompt("analyze_code") 

63 >>> print(template["template"]) 

64 """ 

65 

66 def __init__( 

67 self, 

68 prompt_dir: Union[str, Path], 

69 auto_load: bool = True, 

70 file_extensions: Optional[List[str]] = None 

71 ): 

72 """Initialize filesystem prompt library. 

73 

74 Args: 

75 prompt_dir: Root directory containing prompt files 

76 auto_load: Whether to automatically load all prompts on init (default: True) 

77 file_extensions: List of file extensions to load (default: [".yaml", ".yml", ".json"]) 

78 """ 

79 super().__init__() 

80 

81 self.prompt_dir = Path(prompt_dir) 

82 self.file_extensions = file_extensions or [".yaml", ".yml", ".json"] 

83 

84 # Validate directory exists 

85 if not self.prompt_dir.exists(): 

86 raise ValueError(f"Prompt directory does not exist: {self.prompt_dir}") 

87 

88 if not self.prompt_dir.is_dir(): 

89 raise ValueError(f"Prompt path is not a directory: {self.prompt_dir}") 

90 

91 # Auto-load prompts if requested 

92 if auto_load: 

93 self.load_all() 

94 

95 def load_all(self) -> None: 

96 """Load all prompts from the filesystem directory.""" 

97 self._load_system_prompts() 

98 self._load_user_prompts() 

99 self._load_message_indexes() 

100 self._load_rag_configs() 

101 

102 def _load_system_prompts(self) -> None: 

103 """Load all system prompts from system/ directory.""" 

104 system_dir = self.prompt_dir / "system" 

105 if not system_dir.exists(): 

106 logger.debug(f"System prompts directory not found: {system_dir}") 

107 return 

108 

109 for file_path in system_dir.iterdir(): 

110 if file_path.is_file() and file_path.suffix in self.file_extensions: 

111 name = file_path.stem 

112 try: 

113 template = self._load_prompt_template(file_path) 

114 self._cache_system_prompt(name, template) 

115 logger.debug(f"Loaded system prompt: {name}") 

116 except Exception as e: 

117 logger.error(f"Error loading system prompt {name}: {e}") 

118 

119 def _load_user_prompts(self) -> None: 

120 """Load all user prompts from user/ directory. 

121 

122 User prompts are loaded by name. Files should be named: 

123 - question.yaml 

124 - followup_question.yaml 

125 - etc. 

126 """ 

127 user_dir = self.prompt_dir / "user" 

128 if not user_dir.exists(): 

129 logger.debug(f"User prompts directory not found: {user_dir}") 

130 return 

131 

132 for file_path in user_dir.iterdir(): 

133 if file_path.is_file() and file_path.suffix in self.file_extensions: 

134 name = file_path.stem 

135 try: 

136 template = self._load_prompt_template(file_path) 

137 self._cache_user_prompt(name, template) 

138 logger.debug(f"Loaded user prompt: {name}") 

139 except Exception as e: 

140 logger.error(f"Error loading user prompt {name}: {e}") 

141 

142 def _load_message_indexes(self) -> None: 

143 """Load all message indexes from messages/ directory.""" 

144 messages_dir = self.prompt_dir / "messages" 

145 if not messages_dir.exists(): 

146 logger.debug(f"Message indexes directory not found: {messages_dir}") 

147 return 

148 

149 for file_path in messages_dir.iterdir(): 

150 if file_path.is_file() and file_path.suffix in self.file_extensions: 

151 name = file_path.stem 

152 try: 

153 message_index = self._load_message_index(file_path) 

154 self._cache_message_index(name, message_index) 

155 logger.debug(f"Loaded message index: {name}") 

156 except Exception as e: 

157 logger.error(f"Error loading message index {name}: {e}") 

158 

159 def _load_rag_configs(self) -> None: 

160 """Load all RAG configurations from rag/ directory.""" 

161 rag_dir = self.prompt_dir / "rag" 

162 if not rag_dir.exists(): 

163 logger.debug(f"RAG configs directory not found: {rag_dir}") 

164 return 

165 

166 for file_path in rag_dir.iterdir(): 

167 if file_path.is_file() and file_path.suffix in self.file_extensions: 

168 name = file_path.stem 

169 try: 

170 rag_config = self._load_rag_config(file_path) 

171 self._cache_rag_config(name, rag_config) 

172 logger.debug(f"Loaded RAG config: {name}") 

173 except Exception as e: 

174 logger.error(f"Error loading RAG config {name}: {e}") 

175 

176 def _load_prompt_template(self, file_path: Path) -> PromptTemplate: 

177 """Load a prompt template from a file. 

178 

179 Args: 

180 file_path: Path to the prompt template file 

181 

182 Returns: 

183 PromptTemplate dictionary 

184 """ 

185 data = self._load_file(file_path) 

186 

187 # Use inherited _parse_prompt_template for consistent parsing 

188 # This supports templates with 'extends' but no 'template' field 

189 return self._parse_prompt_template(data) 

190 

191 def _load_message_index(self, file_path: Path) -> MessageIndex: 

192 """Load a message index from a file. 

193 

194 Args: 

195 file_path: Path to the message index file 

196 

197 Returns: 

198 MessageIndex dictionary 

199 """ 

200 data = self._load_file(file_path) 

201 

202 # Build MessageIndex 

203 message_index: MessageIndex = { 

204 "messages": data.get("messages", []), 

205 } 

206 

207 # Add optional fields 

208 if "rag_configs" in data: 

209 message_index["rag_configs"] = [ 

210 self._parse_rag_config(rag_data) 

211 for rag_data in data["rag_configs"] 

212 ] 

213 

214 if "metadata" in data: 

215 message_index["metadata"] = data["metadata"] 

216 

217 return message_index 

218 

219 def _load_rag_config(self, file_path: Path) -> RAGConfig: 

220 """Load a RAG configuration from a file. 

221 

222 Args: 

223 file_path: Path to the RAG config file 

224 

225 Returns: 

226 RAGConfig dictionary 

227 """ 

228 data = self._load_file(file_path) 

229 return self._parse_rag_config(data) 

230 

231 def _load_file(self, file_path: Path) -> Dict[str, Any]: 

232 """Load and parse a YAML or JSON file. 

233 

234 Args: 

235 file_path: Path to the file 

236 

237 Returns: 

238 Parsed file contents as dictionary 

239 

240 Raises: 

241 ValueError: If file format is unsupported or parsing fails 

242 """ 

243 try: 

244 with open(file_path, 'r', encoding='utf-8') as f: 

245 content = f.read() 

246 

247 if file_path.suffix in [".yaml", ".yml"]: 

248 return yaml.safe_load(content) or {} 

249 

250 elif file_path.suffix == ".json": 

251 return json.loads(content) 

252 

253 else: 

254 raise ValueError(f"Unsupported file extension: {file_path.suffix}") 

255 

256 except Exception as e: 

257 raise ValueError(f"Error loading file {file_path}: {e}") 

258 

259 # Note: _parse_prompt_template(), _parse_validation_config(), and 

260 # _parse_rag_config() are now inherited from BasePromptLibrary 

261 

262 def get_system_prompt(self, name: str, **kwargs) -> Optional[PromptTemplate]: 

263 """Get a system prompt by name. 

264 

265 Args: 

266 name: System prompt name 

267 **kwargs: Additional arguments (unused in filesystem library) 

268 

269 Returns: 

270 PromptTemplate if found, None otherwise 

271 """ 

272 return self._get_cached_system_prompt(name) 

273 

274 def get_user_prompt(self, name: str, **kwargs) -> Optional[PromptTemplate]: 

275 """Get a user prompt by name. 

276 

277 Args: 

278 name: User prompt name 

279 **kwargs: Additional arguments (unused in filesystem library) 

280 

281 Returns: 

282 PromptTemplate if found, None otherwise 

283 """ 

284 return self._get_cached_user_prompt(name) 

285 

286 def get_message_index(self, name: str, **kwargs) -> Optional[MessageIndex]: 

287 """Get a message index by name. 

288 

289 Args: 

290 name: Message index name 

291 **kwargs: Additional arguments (unused in filesystem library) 

292 

293 Returns: 

294 MessageIndex if found, None otherwise 

295 """ 

296 return self._get_cached_message_index(name) 

297 

298 def get_rag_config(self, name: str, **kwargs) -> Optional[RAGConfig]: 

299 """Get a standalone RAG configuration by name. 

300 

301 Args: 

302 name: RAG config name 

303 **kwargs: Additional arguments (unused in filesystem library) 

304 

305 Returns: 

306 RAGConfig if found, None otherwise 

307 """ 

308 return self._get_cached_rag_config(name) 

309 

310 def get_prompt_rag_configs( 

311 self, 

312 prompt_name: str, 

313 prompt_type: str = "user", 

314 **kwargs 

315 ) -> List[RAGConfig]: 

316 """Get RAG configurations for a specific prompt. 

317 

318 Resolves both inline RAG configs and references to standalone configs. 

319 

320 Args: 

321 prompt_name: Prompt name 

322 prompt_type: Type of prompt ("user" or "system") 

323 **kwargs: Additional arguments (unused) 

324 

325 Returns: 

326 List of RAGConfig (empty if none defined) 

327 """ 

328 # Get the prompt template 

329 if prompt_type == "system": 

330 template = self.get_system_prompt(prompt_name) 

331 else: 

332 template = self.get_user_prompt(prompt_name) 

333 

334 if template is None: 

335 return [] 

336 

337 configs = [] 

338 

339 # Get inline RAG configs from template 

340 if "rag_configs" in template: 

341 configs.extend(template["rag_configs"]) 

342 

343 # Resolve RAG config references 

344 if "rag_config_refs" in template: 

345 for ref_name in template["rag_config_refs"]: 

346 ref_config = self.get_rag_config(ref_name) 

347 if ref_config: 

348 configs.append(ref_config) 

349 else: 

350 logger.warning( 

351 f"RAG config reference '{ref_name}' not found " 

352 f"for prompt '{prompt_name}'" 

353 ) 

354 

355 return configs 

356 

357 def list_system_prompts(self) -> List[str]: 

358 """List all available system prompt names. 

359 

360 Returns: 

361 List of system prompt identifiers 

362 """ 

363 return list(self._system_prompt_cache.keys()) 

364 

365 def list_user_prompts(self) -> List[str]: 

366 """List available user prompts. 

367 

368 Returns: 

369 List of user prompt names 

370 """ 

371 return list(self._user_prompt_cache.keys()) 

372 

373 def list_message_indexes(self) -> List[str]: 

374 """List all available message index names. 

375 

376 Returns: 

377 List of message index identifiers 

378 """ 

379 return list(self._message_index_cache.keys())