"""Client utilities for interacting with the Ollama API via direct HTTP calls.""" from __future__ import annotations import json from typing import Any, Dict, Iterable, Iterator from urllib.request import Request, urlopen from urllib.error import URLError, HTTPError class OllamaClientError(RuntimeError): """Base exception raised when Ollama operations fail.""" class OllamaUnavailableError(OllamaClientError): """Raised when the Ollama server is not available.""" class OllamaClient: """HTTP client for interacting with Ollama's REST API.""" def __init__(self, host: str | None = None) -> None: self._host = host or "http://localhost:11434" self._cached_models: list[str] | None = None self._is_available = False # Check connection during initialization without raising exceptions self._check_connection() # ------------------------------------------------------------------ helpers def _check_connection(self) -> None: """Check if Ollama server is reachable and update internal flag.""" try: req = Request(f"{self._host}/api/tags", method="GET") with urlopen(req, timeout=2) as response: self._is_available = response.status == 200 except (URLError, HTTPError, TimeoutError, Exception): self._is_available = False @property def is_available(self) -> bool: """Check if Ollama server is reachable.""" return self._is_available @property def default_model(self) -> str | None: """Get the first available model.""" models = self.list_models() return models[0] if models else None def list_models(self, force_refresh: bool = False) -> list[str]: """Return the available model names, caching the result for quick reuse.""" if self._cached_models is not None and not force_refresh: return list(self._cached_models) try: req = Request(f"{self._host}/api/tags", method="GET") with urlopen(req, timeout=5) as response: data = json.loads(response.read().decode()) # Update availability flag on successful call self._is_available = True except (URLError, HTTPError, TimeoutError, Exception): # Return empty list instead of raising on connection failure self._is_available = False return [] models: list[str] = [] for item in data.get("models", []): name = item.get("name") or item.get("model") if name: models.append(name) self._cached_models = models return list(models) # ------------------------------------------------------------------ chat APIs def chat( self, *, model: str, messages: Iterable[Dict[str, str]], options: Dict[str, Any] | None = None, ) -> dict[str, str] | None: """Execute a blocking chat call against Ollama.""" # Return error message instead of raising when unavailable if not self._is_available: return { "role": "assistant", "content": "Ollama is not running. Start Ollama with: ollama serve", } payload = { "model": model, "messages": list(messages), "stream": False, } # Add options if provided if options: payload["options"] = options try: req = Request( f"{self._host}/api/chat", data=json.dumps(payload).encode("utf-8"), headers={"Content-Type": "application/json"}, method="POST", ) with urlopen(req, timeout=120) as response: result = json.loads(response.read().decode()) # Update availability flag on successful call self._is_available = True except (URLError, HTTPError, TimeoutError, Exception) as exc: # Update availability flag and return error message self._is_available = False return { "role": "assistant", "content": f"Unable to reach Ollama: {exc}\n\nStart Ollama with: ollama serve", } # Parse the response message = result.get("message") if not message: return {"role": "assistant", "content": ""} role = message.get("role", "assistant") content = message.get("content", "") return {"role": role, "content": content} def stream_chat( self, *, model: str, messages: Iterable[Dict[str, str]], options: Dict[str, Any] | None = None ) -> Iterator[dict[str, Any]]: """Execute a streaming chat call against Ollama. Yields dictionaries containing token data from the streaming response. Each yielded dict may contain 'message' with 'content' field for tokens. """ # Return error message instead of raising when unavailable if not self._is_available: yield { "role": "assistant", "content": "Ollama is not running. Start Ollama with: ollama serve", "done": True, } return payload = { "model": model, "messages": list(messages), "stream": True, "think": True, # Enable thinking output for thinking models } # Add options if provided if options: payload["options"] = options try: req = Request( f"{self._host}/api/chat", data=json.dumps(payload).encode("utf-8"), headers={"Content-Type": "application/json"}, method="POST", ) with urlopen(req, timeout=120) as response: # Update availability flag on successful connection self._is_available = True # Read streaming response line by line for line in response: if not line: continue try: chunk = json.loads(line.decode("utf-8")) yield chunk # Check if streaming is complete if chunk.get("done", False): break except json.JSONDecodeError: # Skip malformed JSON lines continue except (URLError, HTTPError, TimeoutError, Exception) as exc: # Update availability flag and yield error message self._is_available = False yield { "role": "assistant", "content": f"Unable to reach Ollama: {exc}\n\nStart Ollama with: ollama serve", "done": True, "error": True, } # ------------------------------------------------------------------ internals def _make_request( self, endpoint: str, method: str = "GET", data: dict | None = None ) -> dict: """Make an HTTP request to the Ollama API.""" url = f"{self._host}{endpoint}" if data: req = Request( url, data=json.dumps(data).encode("utf-8"), headers={"Content-Type": "application/json"}, method=method, ) else: req = Request(url, method=method) try: with urlopen(req, timeout=30) as response: return json.loads(response.read().decode()) except (URLError, HTTPError) as exc: raise OllamaClientError(f"Request failed: {exc}") from exc