niri-ai-sidebar/ollama_client.py

"""Client utilities for interacting with the Ollama API via direct HTTP calls."""

from __future__ import annotations

import json
from typing import Any, Dict, Iterable, Iterator
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError


class OllamaClientError(RuntimeError):
    """Base exception raised when Ollama operations fail."""


class OllamaUnavailableError(OllamaClientError):
    """Raised when the Ollama server is not available."""


class OllamaClient:
    """HTTP client for interacting with Ollama's REST API."""

    def __init__(self, host: str | None = None) -> None:
        self._host = host or "http://localhost:11434"
        self._cached_models: list[str] | None = None
        self._is_available = False

        # Check connection during initialization without raising exceptions
        self._check_connection()

    # ------------------------------------------------------------------ helpers
    def _check_connection(self) -> None:
        """Check if Ollama server is reachable and update internal flag."""
        try:
            req = Request(f"{self._host}/api/tags", method="GET")
            with urlopen(req, timeout=2) as response:
                self._is_available = response.status == 200
        except (URLError, HTTPError, TimeoutError, Exception):
            self._is_available = False

    @property
    def is_available(self) -> bool:
        """Check if Ollama server is reachable."""
        return self._is_available

    @property
    def default_model(self) -> str | None:
        """Get the first available model."""
        models = self.list_models()
        return models[0] if models else None

    def list_models(self, force_refresh: bool = False) -> list[str]:
        """Return the available model names, caching the result for quick reuse."""
        if self._cached_models is not None and not force_refresh:
            return list(self._cached_models)

        try:
            req = Request(f"{self._host}/api/tags", method="GET")
            with urlopen(req, timeout=5) as response:
                data = json.loads(response.read().decode())
                # Update availability flag on successful call
                self._is_available = True
        except (URLError, HTTPError, TimeoutError, Exception):
            # Return empty list instead of raising on connection failure
            self._is_available = False
            return []

        models: list[str] = []
        for item in data.get("models", []):
            name = item.get("name") or item.get("model")
            if name:
                models.append(name)

        self._cached_models = models
        return list(models)

    # ------------------------------------------------------------------ chat APIs
    def chat(
        self,
        *,
        model: str,
        messages: Iterable[Dict[str, str]],
        options: Dict[str, Any] | None = None,
    ) -> dict[str, str] | None:
        """Execute a blocking chat call against Ollama."""
        # Return error message instead of raising when unavailable
        if not self._is_available:
            return {
                "role": "assistant",
                "content": "Ollama is not running. Start Ollama with: ollama serve",
            }

        payload = {
            "model": model,
            "messages": list(messages),
            "stream": False,
        }

        # Add options if provided
        if options:
            payload["options"] = options

        try:
            req = Request(
                f"{self._host}/api/chat",
                data=json.dumps(payload).encode("utf-8"),
                headers={"Content-Type": "application/json"},
                method="POST",
            )
            with urlopen(req, timeout=120) as response:
                result = json.loads(response.read().decode())
                # Update availability flag on successful call
                self._is_available = True
        except (URLError, HTTPError, TimeoutError, Exception) as exc:
            # Update availability flag and return error message
            self._is_available = False
            return {
                "role": "assistant",
                "content": f"Unable to reach Ollama: {exc}\n\nStart Ollama with: ollama serve",
            }

        # Parse the response
        message = result.get("message")
        if not message:
            return {"role": "assistant", "content": ""}

        role = message.get("role", "assistant")
        content = message.get("content", "")

        return {"role": role, "content": content}

    def stream_chat(
        self, *, model: str, messages: Iterable[Dict[str, str]], options: Dict[str, Any] | None = None
    ) -> Iterator[dict[str, Any]]:
        """Execute a streaming chat call against Ollama.

        Yields dictionaries containing token data from the streaming response.
        Each yielded dict may contain 'message' with 'content' field for tokens.
        """
        # Return error message instead of raising when unavailable
        if not self._is_available:
            yield {
                "role": "assistant",
                "content": "Ollama is not running. Start Ollama with: ollama serve",
                "done": True,
            }
            return

        payload = {
            "model": model,
            "messages": list(messages),
            "stream": True,
            "think": True,  # Enable thinking output for thinking models
        }

        # Add options if provided
        if options:
            payload["options"] = options


        try:
            req = Request(
                f"{self._host}/api/chat",
                data=json.dumps(payload).encode("utf-8"),
                headers={"Content-Type": "application/json"},
                method="POST",
            )

            with urlopen(req, timeout=120) as response:
                # Update availability flag on successful connection
                self._is_available = True

                # Read streaming response line by line
                for line in response:
                    if not line:
                        continue

                    try:
                        chunk = json.loads(line.decode("utf-8"))


                        yield chunk

                        # Check if streaming is complete
                        if chunk.get("done", False):
                            break
                    except json.JSONDecodeError:
                        # Skip malformed JSON lines
                        continue

        except (URLError, HTTPError, TimeoutError, Exception) as exc:
            # Update availability flag and yield error message
            self._is_available = False
            yield {
                "role": "assistant",
                "content": f"Unable to reach Ollama: {exc}\n\nStart Ollama with: ollama serve",
                "done": True,
                "error": True,
            }

    # ------------------------------------------------------------------ internals
    def _make_request(
        self, endpoint: str, method: str = "GET", data: dict | None = None
    ) -> dict:
        """Make an HTTP request to the Ollama API."""
        url = f"{self._host}{endpoint}"

        if data:
            req = Request(
                url,
                data=json.dumps(data).encode("utf-8"),
                headers={"Content-Type": "application/json"},
                method=method,
            )
        else:
            req = Request(url, method=method)

        try:
            with urlopen(req, timeout=30) as response:
                return json.loads(response.read().decode())
        except (URLError, HTTPError) as exc:
            raise OllamaClientError(f"Request failed: {exc}") from exc