Coverage for src / dataknobs_common / testing.py: 81%

83 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-08 17:37 -0700

1"""Test utilities for dataknobs packages. 

2 

3This module provides pytest utilities for service availability checking, 

4test configuration factories, and fixture helpers. 

5 

6Example: 

7 ```python 

8 import pytest 

9 from dataknobs_common.testing import ( 

10 is_ollama_available, 

11 requires_ollama, 

12 get_test_bot_config, 

13 ) 

14 

15 # Skip test if Ollama not available 

16 @pytest.mark.skipif(not is_ollama_available(), reason="Ollama not available") 

17 def test_with_ollama(): 

18 ... 

19 

20 # Or use the marker 

21 @requires_ollama 

22 def test_with_ollama_marker(): 

23 ... 

24 

25 # Get test configuration 

26 config = get_test_bot_config(use_echo_llm=True) 

27 ``` 

28""" 

29 

30import importlib.util 

31import logging 

32import subprocess 

33from pathlib import Path 

34from typing import Any 

35 

36logger = logging.getLogger(__name__) 

37 

38 

39# Service Availability Checks 

40 

41 

42def is_ollama_available() -> bool: 

43 """Check if Ollama service is available. 

44 

45 Returns: 

46 True if Ollama is running, False otherwise 

47 """ 

48 try: 

49 result = subprocess.run( 

50 ["ollama", "list"], 

51 capture_output=True, 

52 text=True, 

53 timeout=5, 

54 check=False, 

55 ) 

56 return result.returncode == 0 

57 except (subprocess.TimeoutExpired, FileNotFoundError, OSError): 

58 return False 

59 

60 

61def is_ollama_model_available(model_name: str = "nomic-embed-text") -> bool: 

62 """Check if a specific Ollama model is available. 

63 

64 Args: 

65 model_name: Name of the model to check (default: nomic-embed-text) 

66 

67 Returns: 

68 True if model is available, False otherwise 

69 """ 

70 if not is_ollama_available(): 

71 return False 

72 

73 try: 

74 result = subprocess.run( 

75 ["ollama", "list"], 

76 capture_output=True, 

77 text=True, 

78 timeout=5, 

79 check=False, 

80 ) 

81 return model_name in result.stdout 

82 except (subprocess.TimeoutExpired, FileNotFoundError, OSError): 

83 return False 

84 

85 

86def is_faiss_available() -> bool: 

87 """Check if FAISS is available. 

88 

89 Returns: 

90 True if FAISS can be imported, False otherwise 

91 """ 

92 return importlib.util.find_spec("faiss") is not None 

93 

94 

95def is_chromadb_available() -> bool: 

96 """Check if ChromaDB is available. 

97 

98 Returns: 

99 True if ChromaDB can be imported, False otherwise 

100 """ 

101 return importlib.util.find_spec("chromadb") is not None 

102 

103 

104def is_redis_available(host: str = "localhost", port: int = 6379) -> bool: 

105 """Check if Redis service is available. 

106 

107 Args: 

108 host: Redis host 

109 port: Redis port 

110 

111 Returns: 

112 True if Redis is available, False otherwise 

113 """ 

114 try: 

115 import socket 

116 

117 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

118 sock.settimeout(1) 

119 result = sock.connect_ex((host, port)) 

120 sock.close() 

121 return result == 0 

122 except OSError: 

123 return False 

124 

125 

126def is_package_available(package_name: str) -> bool: 

127 """Check if a Python package is available. 

128 

129 Args: 

130 package_name: Name of the package to check 

131 

132 Returns: 

133 True if package can be imported, False otherwise 

134 """ 

135 return importlib.util.find_spec(package_name) is not None 

136 

137 

138# Pytest Markers 

139 

140 

141try: 

142 import pytest 

143 

144 requires_ollama = pytest.mark.skipif( 

145 not is_ollama_available(), 

146 reason="Ollama service not available", 

147 ) 

148 

149 requires_faiss = pytest.mark.skipif( 

150 not is_faiss_available(), 

151 reason="FAISS not installed", 

152 ) 

153 

154 requires_chromadb = pytest.mark.skipif( 

155 not is_chromadb_available(), 

156 reason="ChromaDB not installed", 

157 ) 

158 

159 requires_redis = pytest.mark.skipif( 

160 not is_redis_available(), 

161 reason="Redis not available", 

162 ) 

163 

164 def requires_package(package_name: str) -> Any: 

165 """Create a skip marker for a required package. 

166 

167 Args: 

168 package_name: Name of the required package 

169 

170 Returns: 

171 pytest.mark.skipif marker 

172 """ 

173 return pytest.mark.skipif( 

174 not is_package_available(package_name), 

175 reason=f"{package_name} not installed", 

176 ) 

177 

178 def requires_ollama_model(model_name: str = "nomic-embed-text") -> Any: 

179 """Create a skip marker for a required Ollama model. 

180 

181 Args: 

182 model_name: Name of the required model 

183 

184 Returns: 

185 pytest.mark.skipif marker 

186 """ 

187 return pytest.mark.skipif( 

188 not is_ollama_model_available(model_name), 

189 reason=f"Ollama model {model_name} not available", 

190 ) 

191 

192except ImportError: 

193 # pytest not installed - provide placeholder markers 

194 requires_ollama = None # type: ignore 

195 requires_faiss = None # type: ignore 

196 requires_chromadb = None # type: ignore 

197 requires_redis = None # type: ignore 

198 

199 def requires_package(package_name: str) -> Any: # type: ignore 

200 return None 

201 

202 def requires_ollama_model(model_name: str = "nomic-embed-text") -> Any: # type: ignore 

203 return None 

204 

205 

206# Test Configuration Factories 

207 

208 

209def get_test_bot_config( 

210 use_echo_llm: bool = True, 

211 use_in_memory_storage: bool = True, 

212 include_memory: bool = False, 

213 system_prompt: str | None = None, 

214) -> dict[str, Any]: 

215 """Get a test bot configuration. 

216 

217 Args: 

218 use_echo_llm: Use echo LLM instead of real LLM (default: True) 

219 use_in_memory_storage: Use in-memory conversation storage (default: True) 

220 include_memory: Include buffer memory configuration (default: False) 

221 system_prompt: Optional system prompt content 

222 

223 Returns: 

224 Bot configuration dictionary suitable for DynaBot.from_config() 

225 

226 Example: 

227 ```python 

228 config = get_test_bot_config( 

229 use_echo_llm=True, 

230 system_prompt="You are a test assistant." 

231 ) 

232 bot = await DynaBot.from_config(config) 

233 ``` 

234 """ 

235 config: dict[str, Any] = { 

236 "llm": { 

237 "provider": "echo" if use_echo_llm else "openai", 

238 "model": "test" if use_echo_llm else "gpt-4o-mini", 

239 "temperature": 0.7, 

240 }, 

241 "conversation_storage": { 

242 "backend": "memory" if use_in_memory_storage else "file", 

243 }, 

244 } 

245 

246 if include_memory: 

247 config["memory"] = { 

248 "type": "buffer", 

249 "max_messages": 10, 

250 } 

251 

252 if system_prompt: 

253 config["system_prompt"] = system_prompt 

254 

255 return config 

256 

257 

258def get_test_rag_config( 

259 use_in_memory_store: bool = True, 

260 embedding_provider: str = "ollama", 

261 embedding_model: str = "nomic-embed-text", 

262) -> dict[str, Any]: 

263 """Get a test RAG/knowledge base configuration. 

264 

265 Args: 

266 use_in_memory_store: Use in-memory vector store (default: True) 

267 embedding_provider: Embedding provider (default: "ollama") 

268 embedding_model: Embedding model name (default: "nomic-embed-text") 

269 

270 Returns: 

271 Knowledge base configuration dictionary 

272 

273 Example: 

274 ```python 

275 config = get_test_rag_config(use_in_memory_store=True) 

276 bot_config = get_test_bot_config() 

277 bot_config["knowledge_base"] = config 

278 ``` 

279 """ 

280 return { 

281 "type": "rag", 

282 "vector_store": { 

283 "backend": "memory" if use_in_memory_store else "faiss", 

284 "dimensions": 768, 

285 "metric": "cosine", 

286 }, 

287 "embedding_provider": embedding_provider, 

288 "embedding_model": embedding_model, 

289 "chunking": { 

290 "max_chunk_size": 800, 

291 "chunk_overlap": 100, 

292 }, 

293 "retrieval": { 

294 "top_k": 5, 

295 "score_threshold": 0.7, 

296 }, 

297 } 

298 

299 

300# Test File Helpers 

301 

302 

303def create_test_markdown_files(tmp_path: Path) -> list[str]: 

304 """Create test markdown files for ingestion. 

305 

306 Args: 

307 tmp_path: Temporary directory path (from pytest fixture) 

308 

309 Returns: 

310 List of created file paths as strings 

311 

312 Example: 

313 ```python 

314 def test_ingestion(tmp_path): 

315 files = create_test_markdown_files(tmp_path) 

316 # files contains paths to test markdown documents 

317 ``` 

318 """ 

319 files = [] 

320 

321 # Create test markdown file 1 

322 md1 = tmp_path / "test_doc1.md" 

323 md1.write_text( 

324 """# Test Document 1 

325 

326## Introduction 

327 

328This is a test document for validating ingestion and retrieval. 

329 

330### Key Points 

331 

3321. First important point 

3332. Second important point 

3343. Third important point 

335 

336## Details 

337 

338More detailed information about the topic goes here. 

339""" 

340 ) 

341 files.append(str(md1)) 

342 

343 # Create test markdown file 2 

344 md2 = tmp_path / "test_doc2.md" 

345 md2.write_text( 

346 """# Test Document 2 

347 

348## Overview 

349 

350Another test document with different content. 

351 

352## Content 

353 

354- Item A: Description of item A 

355- Item B: Description of item B 

356- Item C: Description of item C 

357 

358## Summary 

359 

360This concludes the second test document. 

361""" 

362 ) 

363 files.append(str(md2)) 

364 

365 return files 

366 

367 

368def create_test_json_files(tmp_path: Path) -> list[str]: 

369 """Create test JSON files. 

370 

371 Args: 

372 tmp_path: Temporary directory path (from pytest fixture) 

373 

374 Returns: 

375 List of created file paths as strings 

376 """ 

377 import json 

378 

379 files = [] 

380 

381 # Create test JSON file 1 

382 json1 = tmp_path / "test_data1.json" 

383 json1.write_text( 

384 json.dumps( 

385 { 

386 "title": "Test Data 1", 

387 "items": [ 

388 {"id": 1, "name": "Item 1", "value": 100}, 

389 {"id": 2, "name": "Item 2", "value": 200}, 

390 ], 

391 "metadata": {"version": "1.0", "created": "2024-01-01"}, 

392 }, 

393 indent=2, 

394 ) 

395 ) 

396 files.append(str(json1)) 

397 

398 # Create test JSON file 2 

399 json2 = tmp_path / "test_data2.json" 

400 json2.write_text( 

401 json.dumps( 

402 { 

403 "title": "Test Data 2", 

404 "items": [ 

405 {"id": 3, "name": "Item 3", "value": 300}, 

406 {"id": 4, "name": "Item 4", "value": 400}, 

407 ], 

408 "metadata": {"version": "1.0", "created": "2024-01-02"}, 

409 }, 

410 indent=2, 

411 ) 

412 ) 

413 files.append(str(json2)) 

414 

415 return files