niri-ai-sidebar/aisidebar/ollama_client.py

"""Client utilities for interacting with the Ollama API via direct HTTP calls."""

from __future__ import annotations

import json
from typing import Any, Dict, Iterable, Iterator
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError


class OllamaClientError(RuntimeError):
    """Base exception raised when Ollama operations fail."""


class OllamaUnavailableError(OllamaClientError):
    """Raised when the Ollama server is not available."""


class OllamaClient:
    """HTTP client for interacting with Ollama's REST API."""

    def __init__(self, host: str | None = None) -> None:
        self._host = host or "http://localhost:11434"
        self._cached_models: list[str] | None = None

    # ------------------------------------------------------------------ helpers
    @property
    def is_available(self) -> bool:
        """Check if Ollama server is reachable."""
        try:
            req = Request(f"{self._host}/api/tags", method="GET")
            with urlopen(req, timeout=2) as response:
                return response.status == 200
        except (URLError, HTTPError, TimeoutError):
            return False

    @property
    def default_model(self) -> str | None:
        """Get the first available model."""
        models = self.list_models()
        return models[0] if models else None

    def list_models(self, force_refresh: bool = False) -> list[str]:
        """Return the available model names, caching the result for quick reuse."""
        if self._cached_models is not None and not force_refresh:
            return list(self._cached_models)

        try:
            req = Request(f"{self._host}/api/tags", method="GET")
            with urlopen(req, timeout=5) as response:
                data = json.loads(response.read().decode())
        except (URLError, HTTPError, TimeoutError) as exc:
            raise OllamaClientError(f"Failed to list models: {exc}") from exc

        models: list[str] = []
        for item in data.get("models", []):
            name = item.get("name") or item.get("model")
            if name:
                models.append(name)

        self._cached_models = models
        return list(models)

    # ------------------------------------------------------------------ chat APIs
    def chat(
        self,
        *,
        model: str,
        messages: Iterable[Dict[str, str]],
    ) -> dict[str, str] | None:
        """Execute a blocking chat call against Ollama."""
        payload = {
            "model": model,
            "messages": list(messages),
            "stream": False,
        }

        try:
            req = Request(
                f"{self._host}/api/chat",
                data=json.dumps(payload).encode("utf-8"),
                headers={"Content-Type": "application/json"},
                method="POST",
            )
            with urlopen(req, timeout=120) as response:
                result = json.loads(response.read().decode())
        except (URLError, HTTPError, TimeoutError) as exc:
            return {
                "role": "assistant",
                "content": f"Unable to reach Ollama: {exc}",
            }

        # Parse the response
        message = result.get("message")
        if not message:
            return {"role": "assistant", "content": ""}

        role = message.get("role", "assistant")
        content = message.get("content", "")

        return {"role": role, "content": content}

    def stream_chat(
        self, *, model: str, messages: Iterable[Dict[str, str]]
    ) -> Iterator[dict[str, Any]]:
        """Placeholder for streaming API - not yet implemented."""
        raise NotImplementedError("Streaming chat is not yet implemented")

    # ------------------------------------------------------------------ internals
    def _make_request(
        self, endpoint: str, method: str = "GET", data: dict | None = None
    ) -> dict:
        """Make an HTTP request to the Ollama API."""
        url = f"{self._host}{endpoint}"

        if data:
            req = Request(
                url,
                data=json.dumps(data).encode("utf-8"),
                headers={"Content-Type": "application/json"},
                method=method,
            )
        else:
            req = Request(url, method=method)

        try:
            with urlopen(req, timeout=30) as response:
                return json.loads(response.read().decode())
        except (URLError, HTTPError) as exc:
            raise OllamaClientError(f"Request failed: {exc}") from exc