Coverage for src / dataknobs_llm / prompts / adapters / dict_adapter.py: 13%

90 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-15 10:28 -0700

1"""Dictionary-based resource adapters. 

2 

3This module provides adapters that wrap Python dictionaries, enabling them to be 

4used as resource providers in the prompt library system. Supports both flat and 

5nested dictionaries with dot-notation key access. 

6""" 

7 

8from typing import Any, Dict, List 

9from .resource_adapter import ResourceAdapter, AsyncResourceAdapter, BaseSearchLogic 

10 

11 

12class DictResourceAdapter(ResourceAdapter): 

13 """Synchronous adapter for Python dictionary resources. 

14 

15 Features: 

16 - Nested key access using dot notation (e.g., "user.name") 

17 - Simple text-based search across values 

18 - Optional case-insensitive search 

19 - Filtering and deduplication via BaseSearchLogic 

20 

21 Example: 

22 >>> data = { 

23 ... "user": {"name": "Alice", "age": 30}, 

24 ... "settings": {"theme": "dark"} 

25 ... } 

26 >>> adapter = DictResourceAdapter(data, name="config") 

27 >>> adapter.get_value("user.name") 

28 "Alice" 

29 >>> adapter.search("Alice") 

30 [{'content': "Alice", 'key': "user.name", 'score': 1.0}] 

31 """ 

32 

33 def __init__( 

34 self, 

35 data: Dict[str, Any], 

36 name: str = "dict_adapter", 

37 case_sensitive: bool = False 

38 ): 

39 """Initialize dictionary adapter. 

40 

41 Args: 

42 data: Dictionary to wrap as a resource 

43 name: Name identifier for this adapter 

44 case_sensitive: Whether search should be case-sensitive (default: False) 

45 """ 

46 super().__init__(name=name) 

47 self._data = data 

48 self._case_sensitive = case_sensitive 

49 

50 def get_value( 

51 self, 

52 key: str, 

53 default: Any = None, 

54 context: Dict[str, Any] | None = None 

55 ) -> Any: 

56 """Retrieve a value by key from the dictionary. 

57 

58 Supports nested key access using dot notation. Dot-separated keys 

59 traverse nested dictionaries (e.g., a.b.c accesses nested values). 

60 

61 Args: 

62 key: Key to look up (supports dot notation for nested access) 

63 default: Value to return if key is not found 

64 context: Optional context (unused in dict adapter) 

65 

66 Returns: 

67 Value at the key, or default if not found 

68 """ 

69 # Handle dot notation for nested keys 

70 if '.' in key: 

71 parts = key.split('.') 

72 value = self._data 

73 for part in parts: 

74 if isinstance(value, dict) and part in value: 

75 value = value[part] 

76 else: 

77 return default 

78 return value 

79 else: 

80 return self._data.get(key, default) 

81 

82 def search( 

83 self, 

84 query: str, 

85 k: int = 5, 

86 filters: Dict[str, Any] | None = None, 

87 **kwargs: Any 

88 ) -> List[Dict[str, Any]]: 

89 """Perform text-based search across dictionary values. 

90 

91 Searches through all values in the dictionary (including nested values) 

92 and returns items where the query string appears in the value. 

93 

94 Args: 

95 query: Search query string 

96 k: Maximum number of results to return 

97 filters: Optional filters to apply (passed to BaseSearchLogic) 

98 **kwargs: Additional search options: 

99 - min_score: Minimum score threshold (default: 0.0) 

100 - deduplicate: Whether to deduplicate results (default: False) 

101 

102 Returns: 

103 List of search results with structure: 

104 { 

105 'content': <value>, 

106 'key': <key path>, 

107 'score': <relevance score>, 

108 'metadata': {<additional metadata>} 

109 } 

110 """ 

111 results = [] 

112 

113 # Normalize query for case-insensitive search 

114 search_query = query if self._case_sensitive else query.lower() 

115 

116 # Flatten dictionary and search 

117 for key, value in self._flatten_dict(self._data).items(): 

118 value_str = str(value) 

119 search_value = value_str if self._case_sensitive else value_str.lower() 

120 

121 if search_query in search_value: 

122 # Simple scoring: exact match = 1.0, contains = 0.8 

123 score = 1.0 if search_query == search_value else 0.8 

124 

125 result = BaseSearchLogic.format_search_result( 

126 value_str, 

127 score=score, 

128 metadata={"key": key} 

129 ) 

130 result["key"] = key # Add key to top level for easier access 

131 results.append(result) 

132 

133 if len(results) >= k: 

134 break 

135 

136 # Apply filters if provided 

137 if filters: 

138 results = BaseSearchLogic.filter_results(results, filters=filters) 

139 

140 # Apply min_score filter if provided 

141 min_score = kwargs.get('min_score', 0.0) 

142 if min_score > 0: 

143 results = BaseSearchLogic.filter_results(results, min_score=min_score) 

144 

145 # Deduplicate if requested 

146 if kwargs.get('deduplicate', False): 

147 results = BaseSearchLogic.deduplicate_results(results, key='content') 

148 

149 return results[:k] 

150 

151 def _flatten_dict( 

152 self, 

153 data: Dict[str, Any], 

154 parent_key: str = '', 

155 separator: str = '.' 

156 ) -> Dict[str, Any]: 

157 """Flatten nested dictionary with dot notation keys. 

158 

159 Args: 

160 data: Dictionary to flatten 

161 parent_key: Parent key prefix 

162 separator: Separator for nested keys 

163 

164 Returns: 

165 Flattened dictionary with dot-notation keys 

166 """ 

167 items = [] 

168 for key, value in data.items(): 

169 new_key = f"{parent_key}{separator}{key}" if parent_key else key 

170 

171 if isinstance(value, dict): 

172 items.extend(self._flatten_dict(value, new_key, separator).items()) 

173 else: 

174 items.append((new_key, value)) 

175 

176 return dict(items) 

177 

178 

179class AsyncDictResourceAdapter(AsyncResourceAdapter): 

180 """Asynchronous adapter for Python dictionary resources. 

181 

182 Provides the same functionality as DictResourceAdapter but with async methods. 

183 Useful for consistency in async codebases or when mixing with other async adapters. 

184 

185 Example: 

186 >>> data = {"user": {"name": "Alice", "age": 30}} 

187 >>> adapter = AsyncDictResourceAdapter(data) 

188 >>> await adapter.get_value("user.name") 

189 "Alice" 

190 """ 

191 

192 def __init__( 

193 self, 

194 data: Dict[str, Any], 

195 name: str = "async_dict_adapter", 

196 case_sensitive: bool = False 

197 ): 

198 """Initialize async dictionary adapter. 

199 

200 Args: 

201 data: Dictionary to wrap as a resource 

202 name: Name identifier for this adapter 

203 case_sensitive: Whether search should be case-sensitive (default: False) 

204 """ 

205 super().__init__(name=name) 

206 self._data = data 

207 self._case_sensitive = case_sensitive 

208 

209 async def get_value( 

210 self, 

211 key: str, 

212 default: Any = None, 

213 context: Dict[str, Any] | None = None 

214 ) -> Any: 

215 """Retrieve a value by key from the dictionary (async). 

216 

217 See DictResourceAdapter.get_value for details. 

218 """ 

219 # Handle dot notation for nested keys 

220 if '.' in key: 

221 parts = key.split('.') 

222 value = self._data 

223 for part in parts: 

224 if isinstance(value, dict) and part in value: 

225 value = value[part] 

226 else: 

227 return default 

228 return value 

229 else: 

230 return self._data.get(key, default) 

231 

232 async def search( 

233 self, 

234 query: str, 

235 k: int = 5, 

236 filters: Dict[str, Any] | None = None, 

237 **kwargs: Any 

238 ) -> List[Dict[str, Any]]: 

239 """Perform text-based search across dictionary values (async). 

240 

241 See DictResourceAdapter.search for details. 

242 """ 

243 results = [] 

244 

245 # Normalize query for case-insensitive search 

246 search_query = query if self._case_sensitive else query.lower() 

247 

248 # Flatten dictionary and search 

249 for key, value in self._flatten_dict(self._data).items(): 

250 value_str = str(value) 

251 search_value = value_str if self._case_sensitive else value_str.lower() 

252 

253 if search_query in search_value: 

254 # Simple scoring: exact match = 1.0, contains = 0.8 

255 score = 1.0 if search_query == search_value else 0.8 

256 

257 result = BaseSearchLogic.format_search_result( 

258 value_str, 

259 score=score, 

260 metadata={"key": key} 

261 ) 

262 result["key"] = key 

263 results.append(result) 

264 

265 if len(results) >= k: 

266 break 

267 

268 # Apply filters if provided 

269 if filters: 

270 results = BaseSearchLogic.filter_results(results, filters=filters) 

271 

272 # Apply min_score filter if provided 

273 min_score = kwargs.get('min_score', 0.0) 

274 if min_score > 0: 

275 results = BaseSearchLogic.filter_results(results, min_score=min_score) 

276 

277 # Deduplicate if requested 

278 if kwargs.get('deduplicate', False): 

279 results = BaseSearchLogic.deduplicate_results(results, key='content') 

280 

281 return results[:k] 

282 

283 def _flatten_dict( 

284 self, 

285 data: Dict[str, Any], 

286 parent_key: str = '', 

287 separator: str = '.' 

288 ) -> Dict[str, Any]: 

289 """Flatten nested dictionary with dot notation keys. 

290 

291 Args: 

292 data: Dictionary to flatten 

293 parent_key: Parent key prefix 

294 separator: Separator for nested keys 

295 

296 Returns: 

297 Flattened dictionary with dot-notation keys 

298 """ 

299 items = [] 

300 for key, value in data.items(): 

301 new_key = f"{parent_key}{separator}{key}" if parent_key else key 

302 

303 if isinstance(value, dict): 

304 items.extend(self._flatten_dict(value, new_key, separator).items()) 

305 else: 

306 items.append((new_key, value)) 

307 

308 return dict(items)