Coverage for src / dataknobs_llm / llm / providers / anthropic.py: 14%

112 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-15 10:29 -0700

1"""Anthropic Claude LLM provider implementation. 

2 

3This module provides Anthropic Claude API integration for dataknobs-llm, supporting: 

4- Claude 3 (Opus, Sonnet, Haiku) and Claude 2 models 

5- Native tools API for function calling 

6- Vision capabilities (Claude 3+) 

7- Streaming responses 

8- Long context windows (up to 200k tokens) 

9- Advanced reasoning and coding capabilities 

10 

11The AnthropicProvider uses the official Anthropic Python SDK and supports 

12all standard Anthropic API parameters including system prompts, temperature, 

13and token limits. 

14 

15Example: 

16 ```python 

17 from dataknobs_llm.llm.providers import AnthropicProvider 

18 from dataknobs_llm.llm.base import LLMConfig 

19 

20 # Create provider 

21 config = LLMConfig( 

22 provider="anthropic", 

23 model="claude-3-sonnet-20240229", 

24 api_key="sk-ant-...", # or set ANTHROPIC_API_KEY env var 

25 temperature=0.7, 

26 max_tokens=1024 

27 ) 

28 

29 async with AnthropicProvider(config) as llm: 

30 # Simple completion 

31 response = await llm.complete("Explain quantum computing") 

32 print(response.content) 

33 

34 # Streaming for real-time output 

35 async for chunk in llm.stream_complete("Write a story"): 

36 print(chunk.delta, end="", flush=True) 

37 

38 # Tool use (Claude 3+) 

39 tools = [{ 

40 "name": "calculator", 

41 "description": "Perform arithmetic", 

42 "input_schema": { 

43 "type": "object", 

44 "properties": { 

45 "operation": {"type": "string"}, 

46 "x": {"type": "number"}, 

47 "y": {"type": "number"} 

48 } 

49 } 

50 }] 

51 

52 response = await llm.function_call(messages, tools) 

53 ``` 

54 

55See Also: 

56 - Anthropic API Documentation: https://docs.anthropic.com/ 

57 - anthropic Python package: https://github.com/anthropics/anthropic-sdk-python 

58""" 

59 

60import os 

61import json 

62from typing import TYPE_CHECKING, Any, Dict, List, Union, AsyncIterator 

63 

64from ..base import ( 

65 LLMConfig, LLMMessage, LLMResponse, LLMStreamResponse, 

66 AsyncLLMProvider, ModelCapability, 

67 normalize_llm_config 

68) 

69from dataknobs_llm.prompts import AsyncPromptBuilder 

70 

71if TYPE_CHECKING: 

72 from dataknobs_config.config import Config 

73 

74 

75class AnthropicProvider(AsyncLLMProvider): 

76 r"""Anthropic Claude LLM provider with full API support. 

77 

78 Provides async access to Anthropic's Claude models including Claude 3 

79 (Opus, Sonnet, Haiku) and Claude 2. Supports advanced features like 

80 native tool use, vision, and extended context windows. 

81 

82 Features: 

83 - Claude 3 Opus/Sonnet/Haiku and Claude 2 models 

84 - Native tools API for function calling (Claude 3+) 

85 - Vision capabilities for image understanding (Claude 3+) 

86 - Streaming responses for real-time output 

87 - Long context windows (up to 200k tokens) 

88 - Advanced reasoning and coding capabilities 

89 - System prompts for behavior control 

90 - JSON output mode 

91 

92 Example: 

93 ```python 

94 from dataknobs_llm.llm.providers import AnthropicProvider 

95 from dataknobs_llm.llm.base import LLMConfig, LLMMessage 

96 

97 # Basic usage 

98 config = LLMConfig( 

99 provider="anthropic", 

100 model="claude-3-sonnet-20240229", 

101 api_key="sk-ant-...", 

102 temperature=0.7, 

103 max_tokens=1024 

104 ) 

105 

106 async with AnthropicProvider(config) as llm: 

107 # Simple completion 

108 response = await llm.complete("Explain machine learning") 

109 print(response.content) 

110 

111 # With system prompt 

112 messages = [ 

113 LLMMessage( 

114 role="system", 

115 content="You are an expert Python tutor" 

116 ), 

117 LLMMessage( 

118 role="user", 

119 content="How do I use decorators?" 

120 ) 

121 ] 

122 response = await llm.complete(messages) 

123 

124 # Long context processing (Claude 3+) 

125 long_config = LLMConfig( 

126 provider="anthropic", 

127 model="claude-3-opus-20240229", 

128 max_tokens=4096 

129 ) 

130 

131 llm = AnthropicProvider(long_config) 

132 await llm.initialize() 

133 

134 # Process large document 

135 with open("large_doc.txt") as f: 

136 long_text = f.read() # Up to 200k tokens! 

137 

138 response = await llm.complete( 

139 f"Summarize this document:\n\n{long_text}" 

140 ) 

141 

142 # Tool use / function calling (Claude 3+) 

143 tools = [ 

144 { 

145 "name": "web_search", 

146 "description": "Search the web for information", 

147 "input_schema": { 

148 "type": "object", 

149 "properties": { 

150 "query": { 

151 "type": "string", 

152 "description": "Search query" 

153 }, 

154 "num_results": { 

155 "type": "integer", 

156 "description": "Number of results" 

157 } 

158 }, 

159 "required": ["query"] 

160 } 

161 } 

162 ] 

163 

164 messages = [ 

165 LLMMessage( 

166 role="user", 

167 content="Search for latest AI news" 

168 ) 

169 ] 

170 

171 response = await llm.function_call(messages, tools) 

172 if response.function_call: 

173 import json 

174 tool_input = json.loads(response.function_call["arguments"]) 

175 print(f"Tool: {response.function_call['name']}") 

176 print(f"Input: {tool_input}") 

177 ``` 

178 

179 Args: 

180 config: LLMConfig, dataknobs Config, or dict with provider settings 

181 prompt_builder: Optional AsyncPromptBuilder for prompt rendering 

182 

183 Attributes: 

184 _client: Anthropic AsyncAnthropic client instance 

185 

186 See Also: 

187 LLMConfig: Configuration options 

188 AsyncLLMProvider: Base provider interface 

189 Anthropic API Docs: https://docs.anthropic.com/ 

190 """ 

191 

192 def __init__( 

193 self, 

194 config: Union[LLMConfig, "Config", Dict[str, Any]], 

195 prompt_builder: AsyncPromptBuilder | None = None 

196 ): 

197 # Normalize config first 

198 llm_config = normalize_llm_config(config) 

199 super().__init__(llm_config, prompt_builder=prompt_builder) 

200 

201 async def initialize(self) -> None: 

202 """Initialize Anthropic client.""" 

203 try: 

204 import anthropic 

205 

206 api_key = self.config.api_key or os.environ.get('ANTHROPIC_API_KEY') 

207 if not api_key: 

208 raise ValueError("Anthropic API key not provided") 

209 

210 self._client = anthropic.AsyncAnthropic( 

211 api_key=api_key, 

212 base_url=self.config.api_base, 

213 timeout=self.config.timeout 

214 ) 

215 self._is_initialized = True 

216 except ImportError as e: 

217 raise ImportError("anthropic package not installed. Install with: pip install anthropic") from e 

218 

219 async def close(self) -> None: 

220 """Close Anthropic client.""" 

221 if self._client: 

222 await self._client.close() # type: ignore[unreachable] 

223 self._is_initialized = False 

224 

225 async def validate_model(self) -> bool: 

226 """Validate model availability.""" 

227 valid_models = [ 

228 'claude-3-opus', 'claude-3-sonnet', 'claude-3-haiku', 

229 'claude-2.1', 'claude-2.0', 'claude-instant-1.2' 

230 ] 

231 return any(m in self.config.model for m in valid_models) 

232 

233 def get_capabilities(self) -> List[ModelCapability]: 

234 """Get Anthropic model capabilities.""" 

235 capabilities = [ 

236 ModelCapability.TEXT_GENERATION, 

237 ModelCapability.CHAT, 

238 ModelCapability.STREAMING, 

239 ModelCapability.CODE 

240 ] 

241 

242 # Claude 3+ models support vision and tools 

243 if 'claude-3' in self.config.model or 'claude-sonnet' in self.config.model or 'claude-opus' in self.config.model: 

244 capabilities.extend([ 

245 ModelCapability.VISION, 

246 ModelCapability.FUNCTION_CALLING 

247 ]) 

248 

249 return capabilities 

250 

251 async def complete( 

252 self, 

253 messages: Union[str, List[LLMMessage]], 

254 config_overrides: Dict[str, Any] | None = None, 

255 **kwargs 

256 ) -> LLMResponse: 

257 """Generate completion. 

258 

259 Args: 

260 messages: Input messages or prompt 

261 config_overrides: Optional dict to override config fields (model, 

262 temperature, max_tokens, top_p, stop_sequences, seed) 

263 **kwargs: Additional provider-specific parameters 

264 """ 

265 if not self._is_initialized: 

266 await self.initialize() 

267 

268 # Get runtime config (with overrides applied if provided) 

269 runtime_config = self._get_runtime_config(config_overrides) 

270 

271 # Convert to Anthropic format 

272 if isinstance(messages, str): 

273 prompt = messages 

274 else: 

275 # Build prompt from messages 

276 prompt = "" 

277 for msg in messages: 

278 if msg.role == 'system': 

279 prompt = msg.content + "\n\n" + prompt 

280 elif msg.role == 'user': 

281 prompt += f"\n\nHuman: {msg.content}" 

282 elif msg.role == 'assistant': 

283 prompt += f"\n\nAssistant: {msg.content}" 

284 prompt += "\n\nAssistant:" 

285 

286 # Make API call 

287 response = await self._client.messages.create( 

288 model=runtime_config.model, 

289 messages=[{"role": "user", "content": prompt}], 

290 max_tokens=runtime_config.max_tokens or 1024, 

291 temperature=runtime_config.temperature, 

292 top_p=runtime_config.top_p, 

293 stop_sequences=runtime_config.stop_sequences 

294 ) 

295 

296 return LLMResponse( 

297 content=response.content[0].text, 

298 model=response.model, 

299 finish_reason=response.stop_reason, 

300 usage={ 

301 'prompt_tokens': response.usage.input_tokens, 

302 'completion_tokens': response.usage.output_tokens, 

303 'total_tokens': response.usage.input_tokens + response.usage.output_tokens 

304 } if hasattr(response, 'usage') else None 

305 ) 

306 

307 async def stream_complete( 

308 self, 

309 messages: Union[str, List[LLMMessage]], 

310 config_overrides: Dict[str, Any] | None = None, 

311 **kwargs 

312 ) -> AsyncIterator[LLMStreamResponse]: 

313 """Generate streaming completion. 

314 

315 Args: 

316 messages: Input messages or prompt 

317 config_overrides: Optional dict to override config fields (model, 

318 temperature, max_tokens, top_p, stop_sequences, seed) 

319 **kwargs: Additional provider-specific parameters 

320 """ 

321 if not self._is_initialized: 

322 await self.initialize() 

323 

324 # Get runtime config (with overrides applied if provided) 

325 runtime_config = self._get_runtime_config(config_overrides) 

326 

327 # Convert to Anthropic format 

328 if isinstance(messages, str): 

329 prompt = messages 

330 else: 

331 prompt = self._build_prompt(messages) 

332 

333 # Stream API call 

334 async with self._client.messages.stream( 

335 model=runtime_config.model, 

336 messages=[{"role": "user", "content": prompt}], 

337 max_tokens=runtime_config.max_tokens or 1024, 

338 temperature=runtime_config.temperature 

339 ) as stream: 

340 async for chunk in stream: 

341 if chunk.type == 'content_block_delta': 

342 yield LLMStreamResponse( 

343 delta=chunk.delta.text, 

344 is_final=False 

345 ) 

346 

347 # Final message 

348 message = await stream.get_final_message() 

349 yield LLMStreamResponse( 

350 delta='', 

351 is_final=True, 

352 finish_reason=message.stop_reason 

353 ) 

354 

355 async def embed( 

356 self, 

357 texts: Union[str, List[str]], 

358 **kwargs 

359 ) -> Union[List[float], List[List[float]]]: 

360 """Anthropic doesn't provide embeddings.""" 

361 raise NotImplementedError("Anthropic doesn't provide embedding models") 

362 

363 async def function_call( 

364 self, 

365 messages: List[LLMMessage], 

366 functions: List[Dict[str, Any]], 

367 **kwargs 

368 ) -> LLMResponse: 

369 """Execute function calling with native Anthropic tools API (Claude 3+).""" 

370 if not self._is_initialized: 

371 await self.initialize() 

372 

373 # Convert to Anthropic message format 

374 anthropic_messages = [] 

375 system_content = self.config.system_prompt or '' 

376 

377 for msg in messages: 

378 if msg.role == 'system': 

379 # Anthropic uses system parameter, not system messages 

380 system_content = msg.content if not system_content else f"{system_content}\n\n{msg.content}" 

381 else: 

382 anthropic_messages.append({ 

383 'role': msg.role, 

384 'content': msg.content 

385 }) 

386 

387 # Convert functions to Anthropic tools format 

388 tools = [] 

389 for func in functions: 

390 tool = { 

391 'name': func.get('name', ''), 

392 'description': func.get('description', ''), 

393 'input_schema': func.get('parameters', { 

394 'type': 'object', 

395 'properties': {}, 

396 'required': [] 

397 }) 

398 } 

399 tools.append(tool) 

400 

401 # Make API call with tools 

402 try: 

403 response = await self._client.messages.create( 

404 model=self.config.model, 

405 messages=anthropic_messages, 

406 system=system_content if system_content else None, 

407 tools=tools, 

408 max_tokens=self.config.max_tokens or 1024, 

409 temperature=self.config.temperature, 

410 top_p=self.config.top_p 

411 ) 

412 

413 # Extract response content and tool use 

414 content = '' 

415 tool_use = None 

416 

417 for block in response.content: 

418 if block.type == 'text': 

419 content += block.text 

420 elif block.type == 'tool_use': 

421 tool_use = { 

422 'name': block.name, 

423 'arguments': block.input 

424 } 

425 

426 llm_response = LLMResponse( 

427 content=content, 

428 model=response.model, 

429 finish_reason=response.stop_reason, 

430 usage={ 

431 'prompt_tokens': response.usage.input_tokens, 

432 'completion_tokens': response.usage.output_tokens, 

433 'total_tokens': response.usage.input_tokens + response.usage.output_tokens 

434 }, 

435 function_call=tool_use 

436 ) 

437 

438 return llm_response 

439 

440 except Exception as e: 

441 # Fallback to prompt-based approach for older models 

442 import logging 

443 logging.warning(f"Anthropic native tools failed, falling back to prompt-based: {e}") 

444 

445 function_descriptions = "\n".join([ 

446 f"- {f['name']}: {f['description']}" 

447 for f in functions 

448 ]) 

449 

450 system_prompt = f"""You have access to the following functions: 

451{function_descriptions} 

452 

453When you need to call a function, respond with: 

454FUNCTION_CALL: {{ 

455 "name": "function_name", 

456 "arguments": {{...}} 

457}}""" 

458 

459 messages_with_system = [ 

460 LLMMessage(role='system', content=system_prompt) 

461 ] + list(messages) 

462 

463 response = await self.complete(messages_with_system, **kwargs) 

464 

465 # Parse function call from response 

466 if 'FUNCTION_CALL:' in response.content: 

467 try: 

468 func_json = response.content.split('FUNCTION_CALL:')[1].strip() 

469 function_call = json.loads(func_json) 

470 response.function_call = function_call 

471 except (json.JSONDecodeError, IndexError): 

472 pass 

473 

474 return response 

475 

476 def _build_prompt(self, messages: List[LLMMessage]) -> str: 

477 """Build Anthropic-style prompt from messages.""" 

478 prompt = "" 

479 for msg in messages: 

480 if msg.role == 'system': 

481 prompt = msg.content + "\n\n" + prompt 

482 elif msg.role == 'user': 

483 prompt += f"\n\nHuman: {msg.content}" 

484 elif msg.role == 'assistant': 

485 prompt += f"\n\nAssistant: {msg.content}" 

486 prompt += "\n\nAssistant:" 

487 return prompt