- Reorganize project structure and file locations - Add ReasoningController to manage model selection and reasoning mode - Update design and requirements for reasoning mode toggle - Implement model switching between Qwen3-4B-Instruct and Qwen3-4B-Thinking models - Remove deprecated files and consolidate project layout - Add new steering and specification documentation - Clean up and remove unnecessary files and directories - Prepare for enhanced AI sidebar functionality with more flexible model handling
224 lines
7.8 KiB
Python
224 lines
7.8 KiB
Python
"""Client utilities for interacting with the Ollama API via direct HTTP calls."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from typing import Any, Dict, Iterable, Iterator
|
|
from urllib.request import Request, urlopen
|
|
from urllib.error import URLError, HTTPError
|
|
|
|
|
|
class OllamaClientError(RuntimeError):
|
|
"""Base exception raised when Ollama operations fail."""
|
|
|
|
|
|
class OllamaUnavailableError(OllamaClientError):
|
|
"""Raised when the Ollama server is not available."""
|
|
|
|
|
|
class OllamaClient:
|
|
"""HTTP client for interacting with Ollama's REST API."""
|
|
|
|
def __init__(self, host: str | None = None) -> None:
|
|
self._host = host or "http://localhost:11434"
|
|
self._cached_models: list[str] | None = None
|
|
self._is_available = False
|
|
|
|
# Check connection during initialization without raising exceptions
|
|
self._check_connection()
|
|
|
|
# ------------------------------------------------------------------ helpers
|
|
def _check_connection(self) -> None:
|
|
"""Check if Ollama server is reachable and update internal flag."""
|
|
try:
|
|
req = Request(f"{self._host}/api/tags", method="GET")
|
|
with urlopen(req, timeout=2) as response:
|
|
self._is_available = response.status == 200
|
|
except (URLError, HTTPError, TimeoutError, Exception):
|
|
self._is_available = False
|
|
|
|
@property
|
|
def is_available(self) -> bool:
|
|
"""Check if Ollama server is reachable."""
|
|
return self._is_available
|
|
|
|
@property
|
|
def default_model(self) -> str | None:
|
|
"""Get the first available model."""
|
|
models = self.list_models()
|
|
return models[0] if models else None
|
|
|
|
def list_models(self, force_refresh: bool = False) -> list[str]:
|
|
"""Return the available model names, caching the result for quick reuse."""
|
|
if self._cached_models is not None and not force_refresh:
|
|
return list(self._cached_models)
|
|
|
|
try:
|
|
req = Request(f"{self._host}/api/tags", method="GET")
|
|
with urlopen(req, timeout=5) as response:
|
|
data = json.loads(response.read().decode())
|
|
# Update availability flag on successful call
|
|
self._is_available = True
|
|
except (URLError, HTTPError, TimeoutError, Exception):
|
|
# Return empty list instead of raising on connection failure
|
|
self._is_available = False
|
|
return []
|
|
|
|
models: list[str] = []
|
|
for item in data.get("models", []):
|
|
name = item.get("name") or item.get("model")
|
|
if name:
|
|
models.append(name)
|
|
|
|
self._cached_models = models
|
|
return list(models)
|
|
|
|
# ------------------------------------------------------------------ chat APIs
|
|
def chat(
|
|
self,
|
|
*,
|
|
model: str,
|
|
messages: Iterable[Dict[str, str]],
|
|
options: Dict[str, Any] | None = None,
|
|
) -> dict[str, str] | None:
|
|
"""Execute a blocking chat call against Ollama."""
|
|
# Return error message instead of raising when unavailable
|
|
if not self._is_available:
|
|
return {
|
|
"role": "assistant",
|
|
"content": "Ollama is not running. Start Ollama with: ollama serve",
|
|
}
|
|
|
|
payload = {
|
|
"model": model,
|
|
"messages": list(messages),
|
|
"stream": False,
|
|
}
|
|
|
|
# Add options if provided
|
|
if options:
|
|
payload["options"] = options
|
|
|
|
try:
|
|
req = Request(
|
|
f"{self._host}/api/chat",
|
|
data=json.dumps(payload).encode("utf-8"),
|
|
headers={"Content-Type": "application/json"},
|
|
method="POST",
|
|
)
|
|
with urlopen(req, timeout=120) as response:
|
|
result = json.loads(response.read().decode())
|
|
# Update availability flag on successful call
|
|
self._is_available = True
|
|
except (URLError, HTTPError, TimeoutError, Exception) as exc:
|
|
# Update availability flag and return error message
|
|
self._is_available = False
|
|
return {
|
|
"role": "assistant",
|
|
"content": f"Unable to reach Ollama: {exc}\n\nStart Ollama with: ollama serve",
|
|
}
|
|
|
|
# Parse the response
|
|
message = result.get("message")
|
|
if not message:
|
|
return {"role": "assistant", "content": ""}
|
|
|
|
role = message.get("role", "assistant")
|
|
content = message.get("content", "")
|
|
|
|
return {"role": role, "content": content}
|
|
|
|
def stream_chat(
|
|
self, *, model: str, messages: Iterable[Dict[str, str]], options: Dict[str, Any] | None = None
|
|
) -> Iterator[dict[str, Any]]:
|
|
"""Execute a streaming chat call against Ollama.
|
|
|
|
Yields dictionaries containing token data from the streaming response.
|
|
Each yielded dict may contain 'message' with 'content' field for tokens.
|
|
"""
|
|
# Return error message instead of raising when unavailable
|
|
if not self._is_available:
|
|
yield {
|
|
"role": "assistant",
|
|
"content": "Ollama is not running. Start Ollama with: ollama serve",
|
|
"done": True,
|
|
}
|
|
return
|
|
|
|
payload = {
|
|
"model": model,
|
|
"messages": list(messages),
|
|
"stream": True,
|
|
"think": True, # Enable thinking output for thinking models
|
|
}
|
|
|
|
# Add options if provided
|
|
if options:
|
|
payload["options"] = options
|
|
|
|
|
|
|
|
try:
|
|
req = Request(
|
|
f"{self._host}/api/chat",
|
|
data=json.dumps(payload).encode("utf-8"),
|
|
headers={"Content-Type": "application/json"},
|
|
method="POST",
|
|
)
|
|
|
|
with urlopen(req, timeout=120) as response:
|
|
# Update availability flag on successful connection
|
|
self._is_available = True
|
|
|
|
# Read streaming response line by line
|
|
for line in response:
|
|
if not line:
|
|
continue
|
|
|
|
try:
|
|
chunk = json.loads(line.decode("utf-8"))
|
|
|
|
|
|
|
|
yield chunk
|
|
|
|
# Check if streaming is complete
|
|
if chunk.get("done", False):
|
|
break
|
|
except json.JSONDecodeError:
|
|
# Skip malformed JSON lines
|
|
continue
|
|
|
|
except (URLError, HTTPError, TimeoutError, Exception) as exc:
|
|
# Update availability flag and yield error message
|
|
self._is_available = False
|
|
yield {
|
|
"role": "assistant",
|
|
"content": f"Unable to reach Ollama: {exc}\n\nStart Ollama with: ollama serve",
|
|
"done": True,
|
|
"error": True,
|
|
}
|
|
|
|
# ------------------------------------------------------------------ internals
|
|
def _make_request(
|
|
self, endpoint: str, method: str = "GET", data: dict | None = None
|
|
) -> dict:
|
|
"""Make an HTTP request to the Ollama API."""
|
|
url = f"{self._host}{endpoint}"
|
|
|
|
if data:
|
|
req = Request(
|
|
url,
|
|
data=json.dumps(data).encode("utf-8"),
|
|
headers={"Content-Type": "application/json"},
|
|
method=method,
|
|
)
|
|
else:
|
|
req = Request(url, method=method)
|
|
|
|
try:
|
|
with urlopen(req, timeout=30) as response:
|
|
return json.loads(response.read().decode())
|
|
except (URLError, HTTPError) as exc:
|
|
raise OllamaClientError(f"Request failed: {exc}") from exc
|