Coverage for src / dataknobs_llm / llm / providers / ollama.py: 9%
219 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-15 10:29 -0700
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-15 10:29 -0700
1"""Ollama local LLM provider implementation.
3This module provides Ollama integration for dataknobs-llm, enabling local LLM
4deployment and usage without cloud APIs. Perfect for privacy-sensitive applications,
5offline usage, and cost reduction.
7Supports:
8- All Ollama models (Llama, Mistral, CodeLlama, Phi, etc.)
9- Chat with message history
10- Streaming responses
11- Embeddings for semantic search
12- Tool/function calling (Ollama 0.1.17+)
13- Vision models with image inputs
14- Custom model parameters (temperature, top_p, seed, etc.)
15- Docker environment auto-detection
16- Multi-modal capabilities
18The OllamaProvider automatically detects Docker environments and adjusts
19connection URLs accordingly.
21Example:
22 ```python
23 from dataknobs_llm.llm.providers import OllamaProvider
24 from dataknobs_llm.llm.base import LLMConfig
26 # Basic usage (assumes Ollama running on localhost:11434)
27 config = LLMConfig(
28 provider="ollama",
29 model="llama2",
30 temperature=0.7
31 )
33 async with OllamaProvider(config) as llm:
34 # Simple completion
35 response = await llm.complete("Explain Python generators")
36 print(response.content)
38 # Streaming
39 async for chunk in llm.stream_complete("Write a poem"):
40 print(chunk.delta, end="", flush=True)
42 # Custom Ollama URL (remote or Docker)
43 remote_config = LLMConfig(
44 provider="ollama",
45 model="codellama",
46 api_base="http://my-ollama-server:11434"
47 )
49 # Generate embeddings
50 embed_config = LLMConfig(
51 provider="ollama",
52 model="nomic-embed-text"
53 )
55 llm = OllamaProvider(embed_config)
56 await llm.initialize()
57 embeddings = await llm.embed([
58 "Python is great",
59 "JavaScript is versatile"
60 ])
62 # Vision model with images
63 vision_messages = [
64 LLMMessage(
65 role="user",
66 content="What's in this image?",
67 metadata={"images": ["base64encodedimage..."]}
68 )
69 ]
71 vision_config = LLMConfig(provider="ollama", model="llava")
72 llm = OllamaProvider(vision_config)
73 await llm.initialize()
74 response = await llm.complete(vision_messages)
75 ```
77Installation:
78 1. Install Ollama from https://ollama.ai
79 2. Pull a model: `ollama pull llama2`
80 3. Start server: `ollama serve` (usually auto-starts)
81 4. Use with dataknobs-llm (no API key needed!)
83See Also:
84 - Ollama: https://ollama.ai
85 - Ollama Models: https://ollama.ai/library
86 - Ollama GitHub: https://github.com/ollama/ollama
87"""
89import os
90import json
91from typing import TYPE_CHECKING, Any, Dict, List, Union, AsyncIterator
93from ..base import (
94 LLMConfig, LLMMessage, LLMResponse, LLMStreamResponse,
95 AsyncLLMProvider, ModelCapability,
96 normalize_llm_config
97)
98from dataknobs_llm.prompts import AsyncPromptBuilder
100if TYPE_CHECKING:
101 from dataknobs_config.config import Config
104class OllamaProvider(AsyncLLMProvider):
105 """Ollama local LLM provider for privacy-first, offline LLM usage.
107 Provides async access to locally-hosted Ollama models, enabling
108 on-premise LLM deployment without cloud APIs. Perfect for sensitive
109 data, air-gapped environments, and cost optimization.
111 Features:
112 - All Ollama models (Llama 2/3, Mistral, Phi, CodeLlama, etc.)
113 - No API key required - fully local
114 - Chat with message history
115 - Streaming responses for real-time output
116 - Embeddings for RAG and semantic search
117 - Tool/function calling (Ollama 0.1.17+)
118 - Vision models (LLaVA, bakllava)
119 - Docker environment auto-detection
120 - Custom model parameters (temperature, top_p, seed)
121 - Zero-cost inference
123 Example:
124 ```python
125 from dataknobs_llm.llm.providers import OllamaProvider
126 from dataknobs_llm.llm.base import LLMConfig, LLMMessage
128 # Basic local usage
129 config = LLMConfig(
130 provider="ollama",
131 model="llama2", # or llama3, mistral, phi, etc.
132 temperature=0.7
133 )
135 async with OllamaProvider(config) as llm:
136 # Simple completion
137 response = await llm.complete("Explain decorators in Python")
138 print(response.content)
140 # Multi-turn conversation
141 messages = [
142 LLMMessage(role="system", content="You are a helpful assistant"),
143 LLMMessage(role="user", content="What is recursion?"),
144 LLMMessage(role="assistant", content="Recursion is..."),
145 LLMMessage(role="user", content="Show me an example")
146 ]
147 response = await llm.complete(messages)
149 # Code generation with CodeLlama
150 code_config = LLMConfig(
151 provider="ollama",
152 model="codellama",
153 temperature=0.2, # Lower for more deterministic code
154 max_tokens=500
155 )
157 llm = OllamaProvider(code_config)
158 await llm.initialize()
159 response = await llm.complete(
160 "Write a Python function to merge two sorted lists"
161 )
162 print(response.content)
164 # Remote Ollama server
165 remote_config = LLMConfig(
166 provider="ollama",
167 model="llama2",
168 api_base="http://192.168.1.100:11434" # Remote server
169 )
171 # Docker usage (auto-detects)
172 # In Docker, automatically uses host.docker.internal
173 docker_config = LLMConfig(
174 provider="ollama",
175 model="mistral"
176 )
178 # Vision model with image input
179 from dataknobs_llm.llm.base import LLMMessage
180 import base64
182 with open("image.jpg", "rb") as f:
183 image_data = base64.b64encode(f.read()).decode()
185 vision_config = LLMConfig(
186 provider="ollama",
187 model="llava" # or bakllava
188 )
190 llm = OllamaProvider(vision_config)
191 await llm.initialize()
193 messages = [
194 LLMMessage(
195 role="user",
196 content="What objects are in this image?",
197 metadata={"images": [image_data]}
198 )
199 ]
201 response = await llm.complete(messages)
202 print(response.content)
204 # Embeddings for RAG
205 embed_config = LLMConfig(
206 provider="ollama",
207 model="nomic-embed-text" # or mxbai-embed-large
208 )
210 llm = OllamaProvider(embed_config)
211 await llm.initialize()
213 # Single embedding
214 embedding = await llm.embed("Sample text")
215 print(f"Dimensions: {len(embedding)}")
217 # Batch embeddings
218 texts = [
219 "Python programming",
220 "Machine learning basics",
221 "Web development with Flask"
222 ]
223 embeddings = await llm.embed(texts)
224 print(f"Generated {len(embeddings)} embeddings")
226 # Tool use (Ollama 0.1.17+)
227 tools = [
228 {
229 "type": "function",
230 "function": {
231 "name": "get_weather",
232 "description": "Get current weather",
233 "parameters": {
234 "type": "object",
235 "properties": {
236 "location": {"type": "string"}
237 },
238 "required": ["location"]
239 }
240 }
241 }
242 ]
244 response = await llm.function_call(messages, tools)
245 ```
247 Args:
248 config: LLMConfig, dataknobs Config, or dict with provider settings
249 prompt_builder: Optional AsyncPromptBuilder for prompt rendering
251 Attributes:
252 base_url (str): Ollama API base URL (auto-detects Docker environment)
253 _client: HTTP client for Ollama API
255 See Also:
256 LLMConfig: Configuration options
257 AsyncLLMProvider: Base provider interface
258 Ollama Documentation: https://ollama.ai
259 """
261 def __init__(
262 self,
263 config: Union[LLMConfig, "Config", Dict[str, Any]],
264 prompt_builder: AsyncPromptBuilder | None = None
265 ):
266 # Normalize config first
267 llm_config = normalize_llm_config(config)
268 super().__init__(llm_config, prompt_builder=prompt_builder)
270 # Check for Docker environment and adjust URL accordingly
271 default_url = 'http://localhost:11434'
272 if os.path.exists('/.dockerenv'):
273 # Running in Docker, use host.docker.internal
274 default_url = 'http://host.docker.internal:11434'
276 # Allow environment variable override
277 self.base_url = llm_config.api_base or os.environ.get('OLLAMA_BASE_URL', default_url)
279 def _build_options(self, config: LLMConfig | None = None) -> Dict[str, Any]:
280 """Build options dict for Ollama API calls.
282 Args:
283 config: Config to use for options. If None, uses self.config.
285 Returns:
286 Dictionary of options for the API request.
287 """
288 cfg = config or self.config
289 options: Dict[str, Any] = {}
291 # Only add temperature if it's not the default to avoid issues
292 if cfg.temperature != 1.0:
293 options['temperature'] = float(cfg.temperature)
295 # Only add top_p if explicitly set and different from default
296 if cfg.top_p != 1.0:
297 options['top_p'] = float(cfg.top_p)
299 if cfg.seed is not None:
300 options['seed'] = int(cfg.seed)
302 if cfg.max_tokens:
303 # Ensure it's an integer
304 options['num_predict'] = int(cfg.max_tokens)
306 if cfg.stop_sequences:
307 options['stop'] = list(cfg.stop_sequences)
309 return options
311 def _messages_to_ollama(self, messages: List[LLMMessage]) -> List[Dict[str, Any]]:
312 """Convert LLMMessage list to Ollama chat format.
314 Args:
315 messages: List of LLM messages
317 Returns:
318 List of message dicts in Ollama format
319 """
320 ollama_messages = []
321 for msg in messages:
322 message = {
323 'role': msg.role,
324 'content': msg.content
325 }
326 # Ollama supports images in messages for vision models
327 if msg.metadata.get('images'):
328 message['images'] = msg.metadata['images']
329 ollama_messages.append(message)
330 return ollama_messages
332 def _adapt_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
333 """Adapt tools to Ollama format.
335 Ollama uses a similar format to OpenAI for tools.
337 Args:
338 tools: List of tool definitions
340 Returns:
341 List of tools in Ollama format
342 """
343 # Ollama format is similar to OpenAI
344 ollama_tools = []
345 for tool in tools:
346 ollama_tools.append({
347 'type': 'function',
348 'function': {
349 'name': tool.get('name'),
350 'description': tool.get('description', ''),
351 'parameters': tool.get('parameters', {})
352 }
353 })
354 return ollama_tools
356 async def initialize(self) -> None:
357 """Initialize Ollama client."""
358 try:
359 import aiohttp
360 self._session = aiohttp.ClientSession(
361 timeout=aiohttp.ClientTimeout(total=self.config.timeout or 30.0)
362 )
364 # Test connection and verify model availability
365 try:
366 async with self._session.get(f"{self.base_url}/api/tags") as response:
367 if response.status == 200:
368 data = await response.json()
369 models = [m['name'] for m in data.get('models', [])]
370 if models:
371 # Check if configured model is available
372 if self.config.model not in models:
373 # Try without tag (e.g., 'llama2' instead of 'llama2:latest')
374 base_model = self.config.model.split(':')[0]
375 matching_models = [m for m in models if m.startswith(base_model)]
376 if matching_models:
377 # Use first matching model
378 self.config.model = matching_models[0]
379 import logging
380 logging.info(f"Ollama: Using model {self.config.model}")
381 else:
382 import logging
383 logging.warning(f"Ollama: Model {self.config.model} not found. Available: {models}")
384 else:
385 import logging
386 logging.warning("Ollama: No models found. Please pull a model first.")
387 else:
388 import logging
389 logging.warning(f"Ollama: API returned status {response.status}")
390 except Exception as e:
391 import logging
392 logging.warning(f"Ollama: Could not connect to {self.base_url}: {e}")
394 self._is_initialized = True
395 except ImportError as e:
396 raise ImportError("aiohttp package not installed. Install with: pip install aiohttp") from e
398 async def close(self) -> None:
399 """Close Ollama client."""
400 if hasattr(self, '_session') and self._session:
401 await self._session.close()
402 self._is_initialized = False
404 async def validate_model(self) -> bool:
405 """Validate model availability."""
406 if not self._is_initialized or not hasattr(self, '_session'):
407 return False
409 try:
410 async with self._session.get(f"{self.base_url}/api/tags") as response:
411 if response.status == 200:
412 data = await response.json()
413 models = [m['name'] for m in data.get('models', [])]
414 # Check exact match or base model match
415 if self.config.model in models:
416 return True
417 base_model = self.config.model.split(':')[0]
418 return any(m.startswith(base_model) for m in models)
419 except Exception:
420 return False
421 return False
423 def get_capabilities(self) -> List[ModelCapability]:
424 """Get Ollama model capabilities."""
425 # Capabilities depend on the specific model
426 capabilities = [
427 ModelCapability.TEXT_GENERATION,
428 ModelCapability.CHAT,
429 ModelCapability.STREAMING
430 ]
432 # Most recent Ollama models support function calling
433 if any(model in self.config.model.lower() for model in ['llama3', 'mistral', 'mixtral', 'qwen']):
434 capabilities.append(ModelCapability.FUNCTION_CALLING)
436 if 'llava' in self.config.model.lower():
437 capabilities.append(ModelCapability.VISION)
439 if 'codellama' in self.config.model.lower() or 'codegemma' in self.config.model.lower():
440 capabilities.append(ModelCapability.CODE)
442 return capabilities
444 async def complete(
445 self,
446 messages: Union[str, List[LLMMessage]],
447 config_overrides: Dict[str, Any] | None = None,
448 **kwargs
449 ) -> LLMResponse:
450 """Generate completion using Ollama chat endpoint.
452 Args:
453 messages: Input messages or prompt
454 config_overrides: Optional dict to override config fields (model,
455 temperature, max_tokens, top_p, stop_sequences, seed)
456 **kwargs: Additional provider-specific parameters
457 """
458 if not self._is_initialized:
459 await self.initialize()
461 # Get runtime config (with overrides applied if provided)
462 runtime_config = self._get_runtime_config(config_overrides)
464 # Convert to message list
465 if isinstance(messages, str):
466 messages = [LLMMessage(role='user', content=messages)]
468 # Add system prompt if configured
469 if runtime_config.system_prompt and (not messages or messages[0].role != 'system'):
470 messages = [LLMMessage(role='system', content=runtime_config.system_prompt)] + list(messages)
472 # Convert to Ollama format
473 ollama_messages = self._messages_to_ollama(messages)
475 # Build payload for chat endpoint
476 payload = {
477 'model': runtime_config.model,
478 'messages': ollama_messages,
479 'stream': False,
480 'options': self._build_options(runtime_config)
481 }
483 # Add format if JSON mode requested
484 if runtime_config.response_format == 'json':
485 payload['format'] = 'json'
487 # Handle tools if provided
488 tools = kwargs.get('tools')
489 if tools:
490 # Convert Tool objects to dict format for _adapt_tools
491 tool_dicts = []
492 for tool in tools:
493 tool_dicts.append({
494 'name': tool.name,
495 'description': tool.description,
496 'parameters': tool.schema if hasattr(tool, 'schema') else {}
497 })
498 ollama_tools = self._adapt_tools(tool_dicts)
499 payload['tools'] = ollama_tools
501 async with self._session.post(f"{self.base_url}/api/chat", json=payload) as response:
502 if response.status != 200:
503 error_text = await response.text()
504 import logging
505 logger = logging.getLogger(__name__)
507 # Handle tools not supported - retry without tools
508 if response.status == 400 and "does not support tools" in error_text:
509 model_name = runtime_config.model
510 logger.warning(
511 f"Model '{model_name}' does not support tools. "
512 f"Continuing without tool support. "
513 f"For tool support, use: llama3.1:8b, llama3.2:3b, mistral:7b, or qwen2.5:7b"
514 )
515 # Retry without tools
516 payload.pop('tools', None)
517 async with self._session.post(f"{self.base_url}/api/chat", json=payload) as retry_response:
518 if retry_response.status != 200:
519 retry_error = await retry_response.text()
520 logger.error(f"Ollama API error on retry (status {retry_response.status}): {retry_error}")
521 retry_response.raise_for_status()
522 data = await retry_response.json()
523 else:
524 logger.error(f"Ollama API error (status {response.status}): {error_text}")
525 logger.error(f"Request payload: {json.dumps(payload, indent=2)}")
526 response.raise_for_status()
527 else:
528 data = await response.json()
530 # Extract response and tool calls
531 message = data.get('message', {})
532 content = message.get('content', '')
533 raw_tool_calls = message.get('tool_calls', [])
535 # Convert tool calls to ToolCall objects
536 from ..base import ToolCall
537 tool_calls = None
538 if raw_tool_calls:
539 tool_calls = []
540 for tc in raw_tool_calls:
541 func = tc.get('function', {})
542 tool_calls.append(ToolCall(
543 name=func.get('name', ''),
544 parameters=func.get('arguments', {}),
545 id=tc.get('id')
546 ))
548 return LLMResponse(
549 content=content,
550 model=runtime_config.model,
551 finish_reason='tool_calls' if tool_calls else ('stop' if data.get('done') else 'length'),
552 usage={
553 'prompt_tokens': data.get('prompt_eval_count', 0),
554 'completion_tokens': data.get('eval_count', 0),
555 'total_tokens': data.get('prompt_eval_count', 0) + data.get('eval_count', 0)
556 } if 'eval_count' in data else None,
557 tool_calls=tool_calls,
558 metadata={
559 'eval_duration': data.get('eval_duration'),
560 'total_duration': data.get('total_duration'),
561 'model_info': data.get('model', '')
562 }
563 )
565 async def stream_complete(
566 self,
567 messages: Union[str, List[LLMMessage]],
568 config_overrides: Dict[str, Any] | None = None,
569 **kwargs
570 ) -> AsyncIterator[LLMStreamResponse]:
571 """Generate streaming completion.
573 Args:
574 messages: Input messages or prompt
575 config_overrides: Optional dict to override config fields (model,
576 temperature, max_tokens, top_p, stop_sequences, seed)
577 **kwargs: Additional provider-specific parameters
578 """
579 if not self._is_initialized:
580 await self.initialize()
582 # Get runtime config (with overrides applied if provided)
583 runtime_config = self._get_runtime_config(config_overrides)
585 # Convert to Ollama format
586 if isinstance(messages, str):
587 prompt = messages
588 else:
589 prompt = self._build_prompt(messages)
591 # Stream API call
592 payload = {
593 'model': runtime_config.model,
594 'prompt': prompt,
595 'stream': True,
596 'options': self._build_options(runtime_config)
597 }
599 async with self._session.post(f"{self.base_url}/api/generate", json=payload) as response:
600 response.raise_for_status()
602 async for line in response.content:
603 if line:
604 data = json.loads(line.decode('utf-8'))
605 yield LLMStreamResponse(
606 delta=data.get('response', ''),
607 is_final=data.get('done', False),
608 finish_reason='stop' if data.get('done') else None
609 )
611 async def embed(
612 self,
613 texts: Union[str, List[str]],
614 **kwargs
615 ) -> Union[List[float], List[List[float]]]:
616 """Generate embeddings."""
617 if not self._is_initialized:
618 await self.initialize()
620 if isinstance(texts, str):
621 texts = [texts]
622 single = True
623 else:
624 single = False
626 embeddings = []
627 for text in texts:
628 payload = {
629 'model': self.config.model,
630 'prompt': text
631 }
633 async with self._session.post(f"{self.base_url}/api/embeddings", json=payload) as response:
634 response.raise_for_status()
635 data = await response.json()
636 embeddings.append(data['embedding'])
638 return embeddings[0] if single else embeddings
640 async def function_call(
641 self,
642 messages: List[LLMMessage],
643 functions: List[Dict[str, Any]],
644 **kwargs
645 ) -> LLMResponse:
646 """Execute function calling with native Ollama tools support.
648 For Ollama 0.1.17+, uses native tools API.
649 Falls back to prompt-based approach for older versions.
650 """
651 if not self._is_initialized:
652 await self.initialize()
654 # Add system prompt if configured
655 if self.config.system_prompt and (not messages or messages[0].role != 'system'):
656 messages = [LLMMessage(role='system', content=self.config.system_prompt)] + list(messages)
658 # Convert to Ollama format
659 ollama_messages = self._messages_to_ollama(messages)
661 # Adapt tools to Ollama format
662 ollama_tools = self._adapt_tools(functions)
664 # Build payload with tools
665 payload = {
666 'model': self.config.model,
667 'messages': ollama_messages,
668 'tools': ollama_tools,
669 'stream': False,
670 'options': self._build_options()
671 }
673 try:
674 async with self._session.post(f"{self.base_url}/api/chat", json=payload) as response:
675 response.raise_for_status()
676 data = await response.json()
678 # Extract response and tool calls
679 message = data.get('message', {})
680 content = message.get('content', '')
681 tool_calls = message.get('tool_calls', [])
683 # Build response
684 llm_response = LLMResponse(
685 content=content,
686 model=self.config.model,
687 finish_reason='tool_calls' if tool_calls else 'stop',
688 usage={
689 'prompt_tokens': data.get('prompt_eval_count', 0),
690 'completion_tokens': data.get('eval_count', 0),
691 'total_tokens': data.get('prompt_eval_count', 0) + data.get('eval_count', 0)
692 } if 'eval_count' in data else None
693 )
695 # Add tool call information if present
696 if tool_calls:
697 # Use first tool call (Ollama can return multiple)
698 tool_call = tool_calls[0]
699 llm_response.function_call = {
700 'name': tool_call.get('function', {}).get('name', ''),
701 'arguments': tool_call.get('function', {}).get('arguments', {})
702 }
704 return llm_response
706 except Exception as e:
707 # Fallback to prompt-based approach if native tools not supported
708 import logging
709 logging.warning(f"Ollama native tools failed, falling back to prompt-based: {e}")
711 function_descriptions = json.dumps(functions, indent=2)
713 system_prompt = f"""You have access to these functions:
714{function_descriptions}
716To call a function, respond with JSON:
717{{"function": "name", "arguments": {{...}}}}"""
719 messages_with_system = [
720 LLMMessage(role='system', content=system_prompt)
721 ] + list(messages)
723 llm_response = await self.complete(messages_with_system, **kwargs)
725 # Try to parse function call
726 try:
727 func_data = json.loads(llm_response.content)
728 if 'function' in func_data:
729 llm_response.function_call = {
730 'name': func_data['function'],
731 'arguments': func_data.get('arguments', {})
732 }
733 except json.JSONDecodeError:
734 pass
736 return llm_response
738 def _build_prompt(self, messages: List[LLMMessage]) -> str:
739 """Build prompt from messages."""
740 prompt = ""
741 for msg in messages:
742 if msg.role == 'system':
743 prompt += f"System: {msg.content}\n\n"
744 elif msg.role == 'user':
745 prompt += f"User: {msg.content}\n\n"
746 elif msg.role == 'assistant':
747 prompt += f"Assistant: {msg.content}\n\n"
748 return prompt