Coverage for src / dataknobs_llm / llm / providers / ollama.py: 9%

219 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-15 10:29 -0700

1"""Ollama local LLM provider implementation. 

2 

3This module provides Ollama integration for dataknobs-llm, enabling local LLM 

4deployment and usage without cloud APIs. Perfect for privacy-sensitive applications, 

5offline usage, and cost reduction. 

6 

7Supports: 

8- All Ollama models (Llama, Mistral, CodeLlama, Phi, etc.) 

9- Chat with message history 

10- Streaming responses 

11- Embeddings for semantic search 

12- Tool/function calling (Ollama 0.1.17+) 

13- Vision models with image inputs 

14- Custom model parameters (temperature, top_p, seed, etc.) 

15- Docker environment auto-detection 

16- Multi-modal capabilities 

17 

18The OllamaProvider automatically detects Docker environments and adjusts 

19connection URLs accordingly. 

20 

21Example: 

22 ```python 

23 from dataknobs_llm.llm.providers import OllamaProvider 

24 from dataknobs_llm.llm.base import LLMConfig 

25 

26 # Basic usage (assumes Ollama running on localhost:11434) 

27 config = LLMConfig( 

28 provider="ollama", 

29 model="llama2", 

30 temperature=0.7 

31 ) 

32 

33 async with OllamaProvider(config) as llm: 

34 # Simple completion 

35 response = await llm.complete("Explain Python generators") 

36 print(response.content) 

37 

38 # Streaming 

39 async for chunk in llm.stream_complete("Write a poem"): 

40 print(chunk.delta, end="", flush=True) 

41 

42 # Custom Ollama URL (remote or Docker) 

43 remote_config = LLMConfig( 

44 provider="ollama", 

45 model="codellama", 

46 api_base="http://my-ollama-server:11434" 

47 ) 

48 

49 # Generate embeddings 

50 embed_config = LLMConfig( 

51 provider="ollama", 

52 model="nomic-embed-text" 

53 ) 

54 

55 llm = OllamaProvider(embed_config) 

56 await llm.initialize() 

57 embeddings = await llm.embed([ 

58 "Python is great", 

59 "JavaScript is versatile" 

60 ]) 

61 

62 # Vision model with images 

63 vision_messages = [ 

64 LLMMessage( 

65 role="user", 

66 content="What's in this image?", 

67 metadata={"images": ["base64encodedimage..."]} 

68 ) 

69 ] 

70 

71 vision_config = LLMConfig(provider="ollama", model="llava") 

72 llm = OllamaProvider(vision_config) 

73 await llm.initialize() 

74 response = await llm.complete(vision_messages) 

75 ``` 

76 

77Installation: 

78 1. Install Ollama from https://ollama.ai 

79 2. Pull a model: `ollama pull llama2` 

80 3. Start server: `ollama serve` (usually auto-starts) 

81 4. Use with dataknobs-llm (no API key needed!) 

82 

83See Also: 

84 - Ollama: https://ollama.ai 

85 - Ollama Models: https://ollama.ai/library 

86 - Ollama GitHub: https://github.com/ollama/ollama 

87""" 

88 

89import os 

90import json 

91from typing import TYPE_CHECKING, Any, Dict, List, Union, AsyncIterator 

92 

93from ..base import ( 

94 LLMConfig, LLMMessage, LLMResponse, LLMStreamResponse, 

95 AsyncLLMProvider, ModelCapability, 

96 normalize_llm_config 

97) 

98from dataknobs_llm.prompts import AsyncPromptBuilder 

99 

100if TYPE_CHECKING: 

101 from dataknobs_config.config import Config 

102 

103 

104class OllamaProvider(AsyncLLMProvider): 

105 """Ollama local LLM provider for privacy-first, offline LLM usage. 

106 

107 Provides async access to locally-hosted Ollama models, enabling 

108 on-premise LLM deployment without cloud APIs. Perfect for sensitive 

109 data, air-gapped environments, and cost optimization. 

110 

111 Features: 

112 - All Ollama models (Llama 2/3, Mistral, Phi, CodeLlama, etc.) 

113 - No API key required - fully local 

114 - Chat with message history 

115 - Streaming responses for real-time output 

116 - Embeddings for RAG and semantic search 

117 - Tool/function calling (Ollama 0.1.17+) 

118 - Vision models (LLaVA, bakllava) 

119 - Docker environment auto-detection 

120 - Custom model parameters (temperature, top_p, seed) 

121 - Zero-cost inference 

122 

123 Example: 

124 ```python 

125 from dataknobs_llm.llm.providers import OllamaProvider 

126 from dataknobs_llm.llm.base import LLMConfig, LLMMessage 

127 

128 # Basic local usage 

129 config = LLMConfig( 

130 provider="ollama", 

131 model="llama2", # or llama3, mistral, phi, etc. 

132 temperature=0.7 

133 ) 

134 

135 async with OllamaProvider(config) as llm: 

136 # Simple completion 

137 response = await llm.complete("Explain decorators in Python") 

138 print(response.content) 

139 

140 # Multi-turn conversation 

141 messages = [ 

142 LLMMessage(role="system", content="You are a helpful assistant"), 

143 LLMMessage(role="user", content="What is recursion?"), 

144 LLMMessage(role="assistant", content="Recursion is..."), 

145 LLMMessage(role="user", content="Show me an example") 

146 ] 

147 response = await llm.complete(messages) 

148 

149 # Code generation with CodeLlama 

150 code_config = LLMConfig( 

151 provider="ollama", 

152 model="codellama", 

153 temperature=0.2, # Lower for more deterministic code 

154 max_tokens=500 

155 ) 

156 

157 llm = OllamaProvider(code_config) 

158 await llm.initialize() 

159 response = await llm.complete( 

160 "Write a Python function to merge two sorted lists" 

161 ) 

162 print(response.content) 

163 

164 # Remote Ollama server 

165 remote_config = LLMConfig( 

166 provider="ollama", 

167 model="llama2", 

168 api_base="http://192.168.1.100:11434" # Remote server 

169 ) 

170 

171 # Docker usage (auto-detects) 

172 # In Docker, automatically uses host.docker.internal 

173 docker_config = LLMConfig( 

174 provider="ollama", 

175 model="mistral" 

176 ) 

177 

178 # Vision model with image input 

179 from dataknobs_llm.llm.base import LLMMessage 

180 import base64 

181 

182 with open("image.jpg", "rb") as f: 

183 image_data = base64.b64encode(f.read()).decode() 

184 

185 vision_config = LLMConfig( 

186 provider="ollama", 

187 model="llava" # or bakllava 

188 ) 

189 

190 llm = OllamaProvider(vision_config) 

191 await llm.initialize() 

192 

193 messages = [ 

194 LLMMessage( 

195 role="user", 

196 content="What objects are in this image?", 

197 metadata={"images": [image_data]} 

198 ) 

199 ] 

200 

201 response = await llm.complete(messages) 

202 print(response.content) 

203 

204 # Embeddings for RAG 

205 embed_config = LLMConfig( 

206 provider="ollama", 

207 model="nomic-embed-text" # or mxbai-embed-large 

208 ) 

209 

210 llm = OllamaProvider(embed_config) 

211 await llm.initialize() 

212 

213 # Single embedding 

214 embedding = await llm.embed("Sample text") 

215 print(f"Dimensions: {len(embedding)}") 

216 

217 # Batch embeddings 

218 texts = [ 

219 "Python programming", 

220 "Machine learning basics", 

221 "Web development with Flask" 

222 ] 

223 embeddings = await llm.embed(texts) 

224 print(f"Generated {len(embeddings)} embeddings") 

225 

226 # Tool use (Ollama 0.1.17+) 

227 tools = [ 

228 { 

229 "type": "function", 

230 "function": { 

231 "name": "get_weather", 

232 "description": "Get current weather", 

233 "parameters": { 

234 "type": "object", 

235 "properties": { 

236 "location": {"type": "string"} 

237 }, 

238 "required": ["location"] 

239 } 

240 } 

241 } 

242 ] 

243 

244 response = await llm.function_call(messages, tools) 

245 ``` 

246 

247 Args: 

248 config: LLMConfig, dataknobs Config, or dict with provider settings 

249 prompt_builder: Optional AsyncPromptBuilder for prompt rendering 

250 

251 Attributes: 

252 base_url (str): Ollama API base URL (auto-detects Docker environment) 

253 _client: HTTP client for Ollama API 

254 

255 See Also: 

256 LLMConfig: Configuration options 

257 AsyncLLMProvider: Base provider interface 

258 Ollama Documentation: https://ollama.ai 

259 """ 

260 

261 def __init__( 

262 self, 

263 config: Union[LLMConfig, "Config", Dict[str, Any]], 

264 prompt_builder: AsyncPromptBuilder | None = None 

265 ): 

266 # Normalize config first 

267 llm_config = normalize_llm_config(config) 

268 super().__init__(llm_config, prompt_builder=prompt_builder) 

269 

270 # Check for Docker environment and adjust URL accordingly 

271 default_url = 'http://localhost:11434' 

272 if os.path.exists('/.dockerenv'): 

273 # Running in Docker, use host.docker.internal 

274 default_url = 'http://host.docker.internal:11434' 

275 

276 # Allow environment variable override 

277 self.base_url = llm_config.api_base or os.environ.get('OLLAMA_BASE_URL', default_url) 

278 

279 def _build_options(self, config: LLMConfig | None = None) -> Dict[str, Any]: 

280 """Build options dict for Ollama API calls. 

281 

282 Args: 

283 config: Config to use for options. If None, uses self.config. 

284 

285 Returns: 

286 Dictionary of options for the API request. 

287 """ 

288 cfg = config or self.config 

289 options: Dict[str, Any] = {} 

290 

291 # Only add temperature if it's not the default to avoid issues 

292 if cfg.temperature != 1.0: 

293 options['temperature'] = float(cfg.temperature) 

294 

295 # Only add top_p if explicitly set and different from default 

296 if cfg.top_p != 1.0: 

297 options['top_p'] = float(cfg.top_p) 

298 

299 if cfg.seed is not None: 

300 options['seed'] = int(cfg.seed) 

301 

302 if cfg.max_tokens: 

303 # Ensure it's an integer 

304 options['num_predict'] = int(cfg.max_tokens) 

305 

306 if cfg.stop_sequences: 

307 options['stop'] = list(cfg.stop_sequences) 

308 

309 return options 

310 

311 def _messages_to_ollama(self, messages: List[LLMMessage]) -> List[Dict[str, Any]]: 

312 """Convert LLMMessage list to Ollama chat format. 

313 

314 Args: 

315 messages: List of LLM messages 

316 

317 Returns: 

318 List of message dicts in Ollama format 

319 """ 

320 ollama_messages = [] 

321 for msg in messages: 

322 message = { 

323 'role': msg.role, 

324 'content': msg.content 

325 } 

326 # Ollama supports images in messages for vision models 

327 if msg.metadata.get('images'): 

328 message['images'] = msg.metadata['images'] 

329 ollama_messages.append(message) 

330 return ollama_messages 

331 

332 def _adapt_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]: 

333 """Adapt tools to Ollama format. 

334 

335 Ollama uses a similar format to OpenAI for tools. 

336 

337 Args: 

338 tools: List of tool definitions 

339 

340 Returns: 

341 List of tools in Ollama format 

342 """ 

343 # Ollama format is similar to OpenAI 

344 ollama_tools = [] 

345 for tool in tools: 

346 ollama_tools.append({ 

347 'type': 'function', 

348 'function': { 

349 'name': tool.get('name'), 

350 'description': tool.get('description', ''), 

351 'parameters': tool.get('parameters', {}) 

352 } 

353 }) 

354 return ollama_tools 

355 

356 async def initialize(self) -> None: 

357 """Initialize Ollama client.""" 

358 try: 

359 import aiohttp 

360 self._session = aiohttp.ClientSession( 

361 timeout=aiohttp.ClientTimeout(total=self.config.timeout or 30.0) 

362 ) 

363 

364 # Test connection and verify model availability 

365 try: 

366 async with self._session.get(f"{self.base_url}/api/tags") as response: 

367 if response.status == 200: 

368 data = await response.json() 

369 models = [m['name'] for m in data.get('models', [])] 

370 if models: 

371 # Check if configured model is available 

372 if self.config.model not in models: 

373 # Try without tag (e.g., 'llama2' instead of 'llama2:latest') 

374 base_model = self.config.model.split(':')[0] 

375 matching_models = [m for m in models if m.startswith(base_model)] 

376 if matching_models: 

377 # Use first matching model 

378 self.config.model = matching_models[0] 

379 import logging 

380 logging.info(f"Ollama: Using model {self.config.model}") 

381 else: 

382 import logging 

383 logging.warning(f"Ollama: Model {self.config.model} not found. Available: {models}") 

384 else: 

385 import logging 

386 logging.warning("Ollama: No models found. Please pull a model first.") 

387 else: 

388 import logging 

389 logging.warning(f"Ollama: API returned status {response.status}") 

390 except Exception as e: 

391 import logging 

392 logging.warning(f"Ollama: Could not connect to {self.base_url}: {e}") 

393 

394 self._is_initialized = True 

395 except ImportError as e: 

396 raise ImportError("aiohttp package not installed. Install with: pip install aiohttp") from e 

397 

398 async def close(self) -> None: 

399 """Close Ollama client.""" 

400 if hasattr(self, '_session') and self._session: 

401 await self._session.close() 

402 self._is_initialized = False 

403 

404 async def validate_model(self) -> bool: 

405 """Validate model availability.""" 

406 if not self._is_initialized or not hasattr(self, '_session'): 

407 return False 

408 

409 try: 

410 async with self._session.get(f"{self.base_url}/api/tags") as response: 

411 if response.status == 200: 

412 data = await response.json() 

413 models = [m['name'] for m in data.get('models', [])] 

414 # Check exact match or base model match 

415 if self.config.model in models: 

416 return True 

417 base_model = self.config.model.split(':')[0] 

418 return any(m.startswith(base_model) for m in models) 

419 except Exception: 

420 return False 

421 return False 

422 

423 def get_capabilities(self) -> List[ModelCapability]: 

424 """Get Ollama model capabilities.""" 

425 # Capabilities depend on the specific model 

426 capabilities = [ 

427 ModelCapability.TEXT_GENERATION, 

428 ModelCapability.CHAT, 

429 ModelCapability.STREAMING 

430 ] 

431 

432 # Most recent Ollama models support function calling 

433 if any(model in self.config.model.lower() for model in ['llama3', 'mistral', 'mixtral', 'qwen']): 

434 capabilities.append(ModelCapability.FUNCTION_CALLING) 

435 

436 if 'llava' in self.config.model.lower(): 

437 capabilities.append(ModelCapability.VISION) 

438 

439 if 'codellama' in self.config.model.lower() or 'codegemma' in self.config.model.lower(): 

440 capabilities.append(ModelCapability.CODE) 

441 

442 return capabilities 

443 

444 async def complete( 

445 self, 

446 messages: Union[str, List[LLMMessage]], 

447 config_overrides: Dict[str, Any] | None = None, 

448 **kwargs 

449 ) -> LLMResponse: 

450 """Generate completion using Ollama chat endpoint. 

451 

452 Args: 

453 messages: Input messages or prompt 

454 config_overrides: Optional dict to override config fields (model, 

455 temperature, max_tokens, top_p, stop_sequences, seed) 

456 **kwargs: Additional provider-specific parameters 

457 """ 

458 if not self._is_initialized: 

459 await self.initialize() 

460 

461 # Get runtime config (with overrides applied if provided) 

462 runtime_config = self._get_runtime_config(config_overrides) 

463 

464 # Convert to message list 

465 if isinstance(messages, str): 

466 messages = [LLMMessage(role='user', content=messages)] 

467 

468 # Add system prompt if configured 

469 if runtime_config.system_prompt and (not messages or messages[0].role != 'system'): 

470 messages = [LLMMessage(role='system', content=runtime_config.system_prompt)] + list(messages) 

471 

472 # Convert to Ollama format 

473 ollama_messages = self._messages_to_ollama(messages) 

474 

475 # Build payload for chat endpoint 

476 payload = { 

477 'model': runtime_config.model, 

478 'messages': ollama_messages, 

479 'stream': False, 

480 'options': self._build_options(runtime_config) 

481 } 

482 

483 # Add format if JSON mode requested 

484 if runtime_config.response_format == 'json': 

485 payload['format'] = 'json' 

486 

487 # Handle tools if provided 

488 tools = kwargs.get('tools') 

489 if tools: 

490 # Convert Tool objects to dict format for _adapt_tools 

491 tool_dicts = [] 

492 for tool in tools: 

493 tool_dicts.append({ 

494 'name': tool.name, 

495 'description': tool.description, 

496 'parameters': tool.schema if hasattr(tool, 'schema') else {} 

497 }) 

498 ollama_tools = self._adapt_tools(tool_dicts) 

499 payload['tools'] = ollama_tools 

500 

501 async with self._session.post(f"{self.base_url}/api/chat", json=payload) as response: 

502 if response.status != 200: 

503 error_text = await response.text() 

504 import logging 

505 logger = logging.getLogger(__name__) 

506 

507 # Handle tools not supported - retry without tools 

508 if response.status == 400 and "does not support tools" in error_text: 

509 model_name = runtime_config.model 

510 logger.warning( 

511 f"Model '{model_name}' does not support tools. " 

512 f"Continuing without tool support. " 

513 f"For tool support, use: llama3.1:8b, llama3.2:3b, mistral:7b, or qwen2.5:7b" 

514 ) 

515 # Retry without tools 

516 payload.pop('tools', None) 

517 async with self._session.post(f"{self.base_url}/api/chat", json=payload) as retry_response: 

518 if retry_response.status != 200: 

519 retry_error = await retry_response.text() 

520 logger.error(f"Ollama API error on retry (status {retry_response.status}): {retry_error}") 

521 retry_response.raise_for_status() 

522 data = await retry_response.json() 

523 else: 

524 logger.error(f"Ollama API error (status {response.status}): {error_text}") 

525 logger.error(f"Request payload: {json.dumps(payload, indent=2)}") 

526 response.raise_for_status() 

527 else: 

528 data = await response.json() 

529 

530 # Extract response and tool calls 

531 message = data.get('message', {}) 

532 content = message.get('content', '') 

533 raw_tool_calls = message.get('tool_calls', []) 

534 

535 # Convert tool calls to ToolCall objects 

536 from ..base import ToolCall 

537 tool_calls = None 

538 if raw_tool_calls: 

539 tool_calls = [] 

540 for tc in raw_tool_calls: 

541 func = tc.get('function', {}) 

542 tool_calls.append(ToolCall( 

543 name=func.get('name', ''), 

544 parameters=func.get('arguments', {}), 

545 id=tc.get('id') 

546 )) 

547 

548 return LLMResponse( 

549 content=content, 

550 model=runtime_config.model, 

551 finish_reason='tool_calls' if tool_calls else ('stop' if data.get('done') else 'length'), 

552 usage={ 

553 'prompt_tokens': data.get('prompt_eval_count', 0), 

554 'completion_tokens': data.get('eval_count', 0), 

555 'total_tokens': data.get('prompt_eval_count', 0) + data.get('eval_count', 0) 

556 } if 'eval_count' in data else None, 

557 tool_calls=tool_calls, 

558 metadata={ 

559 'eval_duration': data.get('eval_duration'), 

560 'total_duration': data.get('total_duration'), 

561 'model_info': data.get('model', '') 

562 } 

563 ) 

564 

565 async def stream_complete( 

566 self, 

567 messages: Union[str, List[LLMMessage]], 

568 config_overrides: Dict[str, Any] | None = None, 

569 **kwargs 

570 ) -> AsyncIterator[LLMStreamResponse]: 

571 """Generate streaming completion. 

572 

573 Args: 

574 messages: Input messages or prompt 

575 config_overrides: Optional dict to override config fields (model, 

576 temperature, max_tokens, top_p, stop_sequences, seed) 

577 **kwargs: Additional provider-specific parameters 

578 """ 

579 if not self._is_initialized: 

580 await self.initialize() 

581 

582 # Get runtime config (with overrides applied if provided) 

583 runtime_config = self._get_runtime_config(config_overrides) 

584 

585 # Convert to Ollama format 

586 if isinstance(messages, str): 

587 prompt = messages 

588 else: 

589 prompt = self._build_prompt(messages) 

590 

591 # Stream API call 

592 payload = { 

593 'model': runtime_config.model, 

594 'prompt': prompt, 

595 'stream': True, 

596 'options': self._build_options(runtime_config) 

597 } 

598 

599 async with self._session.post(f"{self.base_url}/api/generate", json=payload) as response: 

600 response.raise_for_status() 

601 

602 async for line in response.content: 

603 if line: 

604 data = json.loads(line.decode('utf-8')) 

605 yield LLMStreamResponse( 

606 delta=data.get('response', ''), 

607 is_final=data.get('done', False), 

608 finish_reason='stop' if data.get('done') else None 

609 ) 

610 

611 async def embed( 

612 self, 

613 texts: Union[str, List[str]], 

614 **kwargs 

615 ) -> Union[List[float], List[List[float]]]: 

616 """Generate embeddings.""" 

617 if not self._is_initialized: 

618 await self.initialize() 

619 

620 if isinstance(texts, str): 

621 texts = [texts] 

622 single = True 

623 else: 

624 single = False 

625 

626 embeddings = [] 

627 for text in texts: 

628 payload = { 

629 'model': self.config.model, 

630 'prompt': text 

631 } 

632 

633 async with self._session.post(f"{self.base_url}/api/embeddings", json=payload) as response: 

634 response.raise_for_status() 

635 data = await response.json() 

636 embeddings.append(data['embedding']) 

637 

638 return embeddings[0] if single else embeddings 

639 

640 async def function_call( 

641 self, 

642 messages: List[LLMMessage], 

643 functions: List[Dict[str, Any]], 

644 **kwargs 

645 ) -> LLMResponse: 

646 """Execute function calling with native Ollama tools support. 

647 

648 For Ollama 0.1.17+, uses native tools API. 

649 Falls back to prompt-based approach for older versions. 

650 """ 

651 if not self._is_initialized: 

652 await self.initialize() 

653 

654 # Add system prompt if configured 

655 if self.config.system_prompt and (not messages or messages[0].role != 'system'): 

656 messages = [LLMMessage(role='system', content=self.config.system_prompt)] + list(messages) 

657 

658 # Convert to Ollama format 

659 ollama_messages = self._messages_to_ollama(messages) 

660 

661 # Adapt tools to Ollama format 

662 ollama_tools = self._adapt_tools(functions) 

663 

664 # Build payload with tools 

665 payload = { 

666 'model': self.config.model, 

667 'messages': ollama_messages, 

668 'tools': ollama_tools, 

669 'stream': False, 

670 'options': self._build_options() 

671 } 

672 

673 try: 

674 async with self._session.post(f"{self.base_url}/api/chat", json=payload) as response: 

675 response.raise_for_status() 

676 data = await response.json() 

677 

678 # Extract response and tool calls 

679 message = data.get('message', {}) 

680 content = message.get('content', '') 

681 tool_calls = message.get('tool_calls', []) 

682 

683 # Build response 

684 llm_response = LLMResponse( 

685 content=content, 

686 model=self.config.model, 

687 finish_reason='tool_calls' if tool_calls else 'stop', 

688 usage={ 

689 'prompt_tokens': data.get('prompt_eval_count', 0), 

690 'completion_tokens': data.get('eval_count', 0), 

691 'total_tokens': data.get('prompt_eval_count', 0) + data.get('eval_count', 0) 

692 } if 'eval_count' in data else None 

693 ) 

694 

695 # Add tool call information if present 

696 if tool_calls: 

697 # Use first tool call (Ollama can return multiple) 

698 tool_call = tool_calls[0] 

699 llm_response.function_call = { 

700 'name': tool_call.get('function', {}).get('name', ''), 

701 'arguments': tool_call.get('function', {}).get('arguments', {}) 

702 } 

703 

704 return llm_response 

705 

706 except Exception as e: 

707 # Fallback to prompt-based approach if native tools not supported 

708 import logging 

709 logging.warning(f"Ollama native tools failed, falling back to prompt-based: {e}") 

710 

711 function_descriptions = json.dumps(functions, indent=2) 

712 

713 system_prompt = f"""You have access to these functions: 

714{function_descriptions} 

715 

716To call a function, respond with JSON: 

717{{"function": "name", "arguments": {{...}}}}""" 

718 

719 messages_with_system = [ 

720 LLMMessage(role='system', content=system_prompt) 

721 ] + list(messages) 

722 

723 llm_response = await self.complete(messages_with_system, **kwargs) 

724 

725 # Try to parse function call 

726 try: 

727 func_data = json.loads(llm_response.content) 

728 if 'function' in func_data: 

729 llm_response.function_call = { 

730 'name': func_data['function'], 

731 'arguments': func_data.get('arguments', {}) 

732 } 

733 except json.JSONDecodeError: 

734 pass 

735 

736 return llm_response 

737 

738 def _build_prompt(self, messages: List[LLMMessage]) -> str: 

739 """Build prompt from messages.""" 

740 prompt = "" 

741 for msg in messages: 

742 if msg.role == 'system': 

743 prompt += f"System: {msg.content}\n\n" 

744 elif msg.role == 'user': 

745 prompt += f"User: {msg.content}\n\n" 

746 elif msg.role == 'assistant': 

747 prompt += f"Assistant: {msg.content}\n\n" 

748 return prompt