Coverage for src/dataknobs_llm/prompts/implementations/filesystem_library.py: 87%

134 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-08 13:51 -0700

1"""Filesystem-based prompt library implementation. 

2 

3This module provides a prompt library that loads prompts from a directory structure 

4on the filesystem. Supports YAML and JSON formats. 

5 

6Directory Structure: 

7 prompts/ 

8 system/ 

9 analyze_code.yaml 

10 review_pr.yaml 

11 user/ 

12 code_question.yaml 

13 followup_question.yaml 

14 messages/ 

15 conversation.yaml 

16 

17File Format (YAML): 

18 template: | 

19 Analyze this {{language}} code: 

20 {{code}} 

21 defaults: 

22 language: python 

23 validation: 

24 level: error 

25 required_params: 

26 - code 

27 metadata: 

28 author: alice 

29 version: "1.0" 

30""" 

31 

32import json 

33import logging 

34import yaml 

35from pathlib import Path 

36from typing import Any, Dict, List, Union 

37 

38from ..base import ( 

39 BasePromptLibrary, 

40 PromptTemplateDict, 

41 RAGConfig, 

42 MessageIndex, 

43) 

44 

45logger = logging.getLogger(__name__) 

46 

47 

48class FileSystemPromptLibrary(BasePromptLibrary): 

49 """Prompt library that loads prompts from filesystem directory. 

50 

51 Features: 

52 - Supports YAML and JSON file formats 

53 - Organized directory structure (system/, user/, messages/) 

54 - Caching of loaded prompts for performance 

55 - Automatic file discovery and loading 

56 - Validation config parsing from files 

57 

58 Example: 

59 >>> library = FileSystemPromptLibrary(Path("prompts/")) 

60 >>> template = library.get_system_prompt("analyze_code") 

61 >>> print(template["template"]) 

62 """ 

63 

64 def __init__( 

65 self, 

66 prompt_dir: Union[str, Path], 

67 auto_load: bool = True, 

68 file_extensions: List[str] | None = None 

69 ): 

70 """Initialize filesystem prompt library. 

71 

72 Args: 

73 prompt_dir: Root directory containing prompt files 

74 auto_load: Whether to automatically load all prompts on init (default: True) 

75 file_extensions: List of file extensions to load (default: [".yaml", ".yml", ".json"]) 

76 """ 

77 super().__init__() 

78 

79 self.prompt_dir = Path(prompt_dir) 

80 self.file_extensions = file_extensions or [".yaml", ".yml", ".json"] 

81 

82 # Validate directory exists 

83 if not self.prompt_dir.exists(): 

84 raise ValueError(f"Prompt directory does not exist: {self.prompt_dir}") 

85 

86 if not self.prompt_dir.is_dir(): 

87 raise ValueError(f"Prompt path is not a directory: {self.prompt_dir}") 

88 

89 # Auto-load prompts if requested 

90 if auto_load: 

91 self.load_all() 

92 

93 def load_all(self) -> None: 

94 """Load all prompts from the filesystem directory.""" 

95 self._load_system_prompts() 

96 self._load_user_prompts() 

97 self._load_message_indexes() 

98 self._load_rag_configs() 

99 

100 def _load_system_prompts(self) -> None: 

101 """Load all system prompts from system/ directory.""" 

102 system_dir = self.prompt_dir / "system" 

103 if not system_dir.exists(): 

104 logger.debug(f"System prompts directory not found: {system_dir}") 

105 return 

106 

107 for file_path in system_dir.iterdir(): 

108 if file_path.is_file() and file_path.suffix in self.file_extensions: 

109 name = file_path.stem 

110 try: 

111 template = self._load_prompt_template(file_path) 

112 self._cache_system_prompt(name, template) 

113 logger.debug(f"Loaded system prompt: {name}") 

114 except Exception as e: 

115 logger.error(f"Error loading system prompt {name}: {e}") 

116 

117 def _load_user_prompts(self) -> None: 

118 """Load all user prompts from user/ directory. 

119 

120 User prompts are loaded by name. Files should be named: 

121 - question.yaml 

122 - followup_question.yaml 

123 - etc. 

124 """ 

125 user_dir = self.prompt_dir / "user" 

126 if not user_dir.exists(): 

127 logger.debug(f"User prompts directory not found: {user_dir}") 

128 return 

129 

130 for file_path in user_dir.iterdir(): 

131 if file_path.is_file() and file_path.suffix in self.file_extensions: 

132 name = file_path.stem 

133 try: 

134 template = self._load_prompt_template(file_path) 

135 self._cache_user_prompt(name, template) 

136 logger.debug(f"Loaded user prompt: {name}") 

137 except Exception as e: 

138 logger.error(f"Error loading user prompt {name}: {e}") 

139 

140 def _load_message_indexes(self) -> None: 

141 """Load all message indexes from messages/ directory.""" 

142 messages_dir = self.prompt_dir / "messages" 

143 if not messages_dir.exists(): 

144 logger.debug(f"Message indexes directory not found: {messages_dir}") 

145 return 

146 

147 for file_path in messages_dir.iterdir(): 

148 if file_path.is_file() and file_path.suffix in self.file_extensions: 

149 name = file_path.stem 

150 try: 

151 message_index = self._load_message_index(file_path) 

152 self._cache_message_index(name, message_index) 

153 logger.debug(f"Loaded message index: {name}") 

154 except Exception as e: 

155 logger.error(f"Error loading message index {name}: {e}") 

156 

157 def _load_rag_configs(self) -> None: 

158 """Load all RAG configurations from rag/ directory.""" 

159 rag_dir = self.prompt_dir / "rag" 

160 if not rag_dir.exists(): 

161 logger.debug(f"RAG configs directory not found: {rag_dir}") 

162 return 

163 

164 for file_path in rag_dir.iterdir(): 

165 if file_path.is_file() and file_path.suffix in self.file_extensions: 

166 name = file_path.stem 

167 try: 

168 rag_config = self._load_rag_config(file_path) 

169 self._cache_rag_config(name, rag_config) 

170 logger.debug(f"Loaded RAG config: {name}") 

171 except Exception as e: 

172 logger.error(f"Error loading RAG config {name}: {e}") 

173 

174 def _load_prompt_template(self, file_path: Path) -> PromptTemplateDict: 

175 """Load a prompt template from a file. 

176 

177 Args: 

178 file_path: Path to the prompt template file 

179 

180 Returns: 

181 PromptTemplateDict dictionary 

182 """ 

183 data = self._load_file(file_path) 

184 

185 # Use inherited _parse_prompt_template for consistent parsing 

186 # This supports templates with 'extends' but no 'template' field 

187 return self._parse_prompt_template(data) 

188 

189 def _load_message_index(self, file_path: Path) -> MessageIndex: 

190 """Load a message index from a file. 

191 

192 Args: 

193 file_path: Path to the message index file 

194 

195 Returns: 

196 MessageIndex dictionary 

197 """ 

198 data = self._load_file(file_path) 

199 

200 # Build MessageIndex 

201 message_index: MessageIndex = { 

202 "messages": data.get("messages", []), 

203 } 

204 

205 # Add optional fields 

206 if "rag_configs" in data: 

207 message_index["rag_configs"] = [ 

208 self._parse_rag_config(rag_data) 

209 for rag_data in data["rag_configs"] 

210 ] 

211 

212 if "metadata" in data: 

213 message_index["metadata"] = data["metadata"] 

214 

215 return message_index 

216 

217 def _load_rag_config(self, file_path: Path) -> RAGConfig: 

218 """Load a RAG configuration from a file. 

219 

220 Args: 

221 file_path: Path to the RAG config file 

222 

223 Returns: 

224 RAGConfig dictionary 

225 """ 

226 data = self._load_file(file_path) 

227 return self._parse_rag_config(data) 

228 

229 def _load_file(self, file_path: Path) -> Dict[str, Any]: 

230 """Load and parse a YAML or JSON file. 

231 

232 Args: 

233 file_path: Path to the file 

234 

235 Returns: 

236 Parsed file contents as dictionary 

237 

238 Raises: 

239 ValueError: If file format is unsupported or parsing fails 

240 """ 

241 try: 

242 with open(file_path, encoding='utf-8') as f: 

243 content = f.read() 

244 

245 if file_path.suffix in [".yaml", ".yml"]: 

246 return yaml.safe_load(content) or {} 

247 

248 elif file_path.suffix == ".json": 

249 return json.loads(content) 

250 

251 else: 

252 raise ValueError(f"Unsupported file extension: {file_path.suffix}") 

253 

254 except Exception as e: 

255 raise ValueError(f"Error loading file {file_path}: {e}") from e 

256 

257 # Note: _parse_prompt_template(), _parse_validation_config(), and 

258 # _parse_rag_config() are now inherited from BasePromptLibrary 

259 

260 def get_system_prompt(self, name: str, **kwargs: Any) -> PromptTemplateDict | None: 

261 """Get a system prompt by name. 

262 

263 Args: 

264 name: System prompt name 

265 **kwargs: Additional arguments (unused in filesystem library) 

266 

267 Returns: 

268 PromptTemplateDict if found, None otherwise 

269 """ 

270 return self._get_cached_system_prompt(name) 

271 

272 def get_user_prompt(self, name: str, **kwargs: Any) -> PromptTemplateDict | None: 

273 """Get a user prompt by name. 

274 

275 Args: 

276 name: User prompt name 

277 **kwargs: Additional arguments (unused in filesystem library) 

278 

279 Returns: 

280 PromptTemplateDict if found, None otherwise 

281 """ 

282 return self._get_cached_user_prompt(name) 

283 

284 def get_message_index(self, name: str, **kwargs: Any) -> MessageIndex | None: 

285 """Get a message index by name. 

286 

287 Args: 

288 name: Message index name 

289 **kwargs: Additional arguments (unused in filesystem library) 

290 

291 Returns: 

292 MessageIndex if found, None otherwise 

293 """ 

294 return self._get_cached_message_index(name) 

295 

296 def get_rag_config(self, name: str, **kwargs: Any) -> RAGConfig | None: 

297 """Get a standalone RAG configuration by name. 

298 

299 Args: 

300 name: RAG config name 

301 **kwargs: Additional arguments (unused in filesystem library) 

302 

303 Returns: 

304 RAGConfig if found, None otherwise 

305 """ 

306 return self._get_cached_rag_config(name) 

307 

308 def get_prompt_rag_configs( 

309 self, 

310 prompt_name: str, 

311 prompt_type: str = "user", 

312 **kwargs: Any 

313 ) -> List[RAGConfig]: 

314 """Get RAG configurations for a specific prompt. 

315 

316 Resolves both inline RAG configs and references to standalone configs. 

317 

318 Args: 

319 prompt_name: Prompt name 

320 prompt_type: Type of prompt ("user" or "system") 

321 **kwargs: Additional arguments (unused) 

322 

323 Returns: 

324 List of RAGConfig (empty if none defined) 

325 """ 

326 # Get the prompt template 

327 if prompt_type == "system": 

328 template = self.get_system_prompt(prompt_name) 

329 else: 

330 template = self.get_user_prompt(prompt_name) 

331 

332 if template is None: 

333 return [] 

334 

335 configs = [] 

336 

337 # Get inline RAG configs from template 

338 if "rag_configs" in template: 

339 configs.extend(template["rag_configs"]) 

340 

341 # Resolve RAG config references 

342 if "rag_config_refs" in template: 

343 for ref_name in template["rag_config_refs"]: 

344 ref_config = self.get_rag_config(ref_name) 

345 if ref_config: 

346 configs.append(ref_config) 

347 else: 

348 logger.warning( 

349 f"RAG config reference '{ref_name}' not found " 

350 f"for prompt '{prompt_name}'" 

351 ) 

352 

353 return configs 

354 

355 def list_system_prompts(self) -> List[str]: 

356 """List all available system prompt names. 

357 

358 Returns: 

359 List of system prompt identifiers 

360 """ 

361 return list(self._system_prompt_cache.keys()) 

362 

363 def list_user_prompts(self) -> List[str]: 

364 """List available user prompts. 

365 

366 Returns: 

367 List of user prompt names 

368 """ 

369 return list(self._user_prompt_cache.keys()) 

370 

371 def list_message_indexes(self) -> List[str]: 

372 """List all available message index names. 

373 

374 Returns: 

375 List of message index identifiers 

376 """ 

377 return list(self._message_index_cache.keys())