refactor(aisidebar): restructure project and implement reasoning mode toggle
- Reorganize project structure and file locations - Add ReasoningController to manage model selection and reasoning mode - Update design and requirements for reasoning mode toggle - Implement model switching between Qwen3-4B-Instruct and Qwen3-4B-Thinking models - Remove deprecated files and consolidate project layout - Add new steering and specification documentation - Clean up and remove unnecessary files and directories - Prepare for enhanced AI sidebar functionality with more flexible model handling
This commit is contained in:
267
ollama_client.py
267
ollama_client.py
@@ -1,13 +1,11 @@
|
||||
"""Client utilities for interacting with the Ollama API."""
|
||||
"""Client utilities for interacting with the Ollama API via direct HTTP calls."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, Iterable, Iterator
|
||||
|
||||
try: # pragma: no cover - optional dependency may not be installed in CI
|
||||
import ollama
|
||||
except ImportError: # pragma: no cover - fallback path for environments without Ollama
|
||||
ollama = None # type: ignore[assignment]
|
||||
from urllib.request import Request, urlopen
|
||||
from urllib.error import URLError, HTTPError
|
||||
|
||||
|
||||
class OllamaClientError(RuntimeError):
|
||||
@@ -15,88 +13,60 @@ class OllamaClientError(RuntimeError):
|
||||
|
||||
|
||||
class OllamaUnavailableError(OllamaClientError):
|
||||
"""Raised when the Ollama Python SDK is not available."""
|
||||
"""Raised when the Ollama server is not available."""
|
||||
|
||||
|
||||
class OllamaClient:
|
||||
"""Thin wrapper around the Ollama Python SDK with graceful degradation."""
|
||||
"""HTTP client for interacting with Ollama's REST API."""
|
||||
|
||||
def __init__(self, host: str | None = None) -> None:
|
||||
self._host = host
|
||||
self._client = None
|
||||
self._host = host or "http://localhost:11434"
|
||||
self._cached_models: list[str] | None = None
|
||||
self._is_available = False
|
||||
|
||||
if ollama is None:
|
||||
return
|
||||
|
||||
# Try to initialize client and check connection
|
||||
try:
|
||||
if host and hasattr(ollama, "Client"):
|
||||
self._client = ollama.Client(host=host) # type: ignore[call-arg]
|
||||
|
||||
# Test connection by attempting to list models
|
||||
self._check_connection()
|
||||
except Exception:
|
||||
# Silently fail - availability flag remains False
|
||||
pass
|
||||
|
||||
# Check connection during initialization without raising exceptions
|
||||
self._check_connection()
|
||||
|
||||
# ------------------------------------------------------------------ helpers
|
||||
def _check_connection(self) -> None:
|
||||
"""Check if Ollama is available and update internal flag."""
|
||||
if ollama is None:
|
||||
self._is_available = False
|
||||
return
|
||||
|
||||
"""Check if Ollama server is reachable and update internal flag."""
|
||||
try:
|
||||
# Attempt a simple list call to verify connection
|
||||
self._call_sdk("list") # type: ignore[arg-type]
|
||||
self._is_available = True
|
||||
except Exception:
|
||||
req = Request(f"{self._host}/api/tags", method="GET")
|
||||
with urlopen(req, timeout=2) as response:
|
||||
self._is_available = response.status == 200
|
||||
except (URLError, HTTPError, TimeoutError, Exception):
|
||||
self._is_available = False
|
||||
|
||||
@property
|
||||
def is_available(self) -> bool:
|
||||
"""Check if Ollama server is reachable."""
|
||||
return self._is_available
|
||||
|
||||
@property
|
||||
def default_model(self) -> str | None:
|
||||
"""Get the first available model."""
|
||||
models = self.list_models()
|
||||
return models[0] if models else None
|
||||
|
||||
def list_models(self, force_refresh: bool = False) -> list[str]:
|
||||
"""Return the available model names, caching the result for quick reuse."""
|
||||
if not self.is_available:
|
||||
return []
|
||||
|
||||
if self._cached_models is not None and not force_refresh:
|
||||
return list(self._cached_models)
|
||||
|
||||
try:
|
||||
response = self._call_sdk("list") # type: ignore[arg-type]
|
||||
# Update availability flag on successful call
|
||||
self._is_available = True
|
||||
except OllamaClientError:
|
||||
self._is_available = False
|
||||
return []
|
||||
except Exception:
|
||||
req = Request(f"{self._host}/api/tags", method="GET")
|
||||
with urlopen(req, timeout=5) as response:
|
||||
data = json.loads(response.read().decode())
|
||||
# Update availability flag on successful call
|
||||
self._is_available = True
|
||||
except (URLError, HTTPError, TimeoutError, Exception):
|
||||
# Return empty list instead of raising on connection failure
|
||||
self._is_available = False
|
||||
return []
|
||||
|
||||
models: list[str] = []
|
||||
# Handle both dict responses (old SDK) and Pydantic objects (new SDK)
|
||||
if isinstance(response, dict):
|
||||
model_list = response.get("models", [])
|
||||
else:
|
||||
# Pydantic object
|
||||
model_list = getattr(response, "models", [])
|
||||
|
||||
for item in model_list:
|
||||
if isinstance(item, dict):
|
||||
name = item.get("name") or item.get("model")
|
||||
else:
|
||||
# Pydantic object
|
||||
name = getattr(item, "name", None) or getattr(item, "model", None)
|
||||
for item in data.get("models", []):
|
||||
name = item.get("name") or item.get("model")
|
||||
if name:
|
||||
models.append(name)
|
||||
|
||||
@@ -109,98 +79,145 @@ class OllamaClient:
|
||||
*,
|
||||
model: str,
|
||||
messages: Iterable[Dict[str, str]],
|
||||
options: Dict[str, Any] | None = None,
|
||||
) -> dict[str, str] | None:
|
||||
"""Execute a blocking chat call against Ollama."""
|
||||
if not self.is_available:
|
||||
if ollama is None:
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": "Ollama SDK is not installed; install `ollama` to enable responses.",
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": "Ollama is not running. Start Ollama with: ollama serve",
|
||||
}
|
||||
# Return error message instead of raising when unavailable
|
||||
if not self._is_available:
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": "Ollama is not running. Start Ollama with: ollama serve",
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": list(messages),
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
# Add options if provided
|
||||
if options:
|
||||
payload["options"] = options
|
||||
|
||||
try:
|
||||
result = self._call_sdk(
|
||||
"chat",
|
||||
model=model,
|
||||
messages=list(messages),
|
||||
stream=False,
|
||||
req = Request(
|
||||
f"{self._host}/api/chat",
|
||||
data=json.dumps(payload).encode("utf-8"),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
# Update availability flag on successful call
|
||||
self._is_available = True
|
||||
except OllamaClientError as exc:
|
||||
self._is_available = False
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": f"Unable to reach Ollama: {exc}\n\nStart Ollama with: ollama serve",
|
||||
}
|
||||
except Exception as exc:
|
||||
with urlopen(req, timeout=120) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
# Update availability flag on successful call
|
||||
self._is_available = True
|
||||
except (URLError, HTTPError, TimeoutError, Exception) as exc:
|
||||
# Update availability flag and return error message
|
||||
self._is_available = False
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": f"Unable to reach Ollama: {exc}\n\nStart Ollama with: ollama serve",
|
||||
}
|
||||
|
||||
# Handle both dict responses (old SDK) and Pydantic objects (new SDK)
|
||||
if isinstance(result, dict):
|
||||
message = result.get("message")
|
||||
if not message:
|
||||
return {"role": "assistant", "content": ""}
|
||||
role = message.get("role") or "assistant"
|
||||
content = message.get("content") or ""
|
||||
else:
|
||||
# Pydantic object (ollama SDK >= 0.4.0)
|
||||
message = getattr(result, "message", None)
|
||||
if not message:
|
||||
return {"role": "assistant", "content": ""}
|
||||
role = getattr(message, "role", "assistant")
|
||||
content = getattr(message, "content", "")
|
||||
# Parse the response
|
||||
message = result.get("message")
|
||||
if not message:
|
||||
return {"role": "assistant", "content": ""}
|
||||
|
||||
role = message.get("role", "assistant")
|
||||
content = message.get("content", "")
|
||||
|
||||
return {"role": role, "content": content}
|
||||
|
||||
def stream_chat(
|
||||
self, *, model: str, messages: Iterable[Dict[str, str]]
|
||||
self, *, model: str, messages: Iterable[Dict[str, str]], options: Dict[str, Any] | None = None
|
||||
) -> Iterator[dict[str, Any]]:
|
||||
"""Placeholder that exposes the streaming API for future UI hooks."""
|
||||
if not self.is_available:
|
||||
raise OllamaUnavailableError(
|
||||
"Streaming requires the Ollama Python SDK to be installed."
|
||||
)
|
||||
"""Execute a streaming chat call against Ollama.
|
||||
|
||||
Yields dictionaries containing token data from the streaming response.
|
||||
Each yielded dict may contain 'message' with 'content' field for tokens.
|
||||
"""
|
||||
# Return error message instead of raising when unavailable
|
||||
if not self._is_available:
|
||||
yield {
|
||||
"role": "assistant",
|
||||
"content": "Ollama is not running. Start Ollama with: ollama serve",
|
||||
"done": True,
|
||||
}
|
||||
return
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": list(messages),
|
||||
"stream": True,
|
||||
"think": True, # Enable thinking output for thinking models
|
||||
}
|
||||
|
||||
# Add options if provided
|
||||
if options:
|
||||
payload["options"] = options
|
||||
|
||||
|
||||
|
||||
try:
|
||||
stream = self._call_sdk(
|
||||
"chat",
|
||||
model=model,
|
||||
messages=list(messages),
|
||||
stream=True,
|
||||
req = Request(
|
||||
f"{self._host}/api/chat",
|
||||
data=json.dumps(payload).encode("utf-8"),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
except OllamaClientError as exc:
|
||||
raise OllamaClientError(f"Failed to start streaming chat: {exc}") from exc
|
||||
|
||||
with urlopen(req, timeout=120) as response:
|
||||
# Update availability flag on successful connection
|
||||
self._is_available = True
|
||||
|
||||
# Read streaming response line by line
|
||||
for line in response:
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
chunk = json.loads(line.decode("utf-8"))
|
||||
|
||||
|
||||
if not hasattr(stream, "__iter__"):
|
||||
raise OllamaClientError("Ollama returned a non-iterable stream response.")
|
||||
return iter(stream)
|
||||
|
||||
yield chunk
|
||||
|
||||
# Check if streaming is complete
|
||||
if chunk.get("done", False):
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
# Skip malformed JSON lines
|
||||
continue
|
||||
|
||||
except (URLError, HTTPError, TimeoutError, Exception) as exc:
|
||||
# Update availability flag and yield error message
|
||||
self._is_available = False
|
||||
yield {
|
||||
"role": "assistant",
|
||||
"content": f"Unable to reach Ollama: {exc}\n\nStart Ollama with: ollama serve",
|
||||
"done": True,
|
||||
"error": True,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------ internals
|
||||
def _call_sdk(self, method: str, *args: Any, **kwargs: Any) -> Any:
|
||||
if not self.is_available:
|
||||
raise OllamaUnavailableError(
|
||||
"Ollama Python SDK is not available in the environment."
|
||||
)
|
||||
def _make_request(
|
||||
self, endpoint: str, method: str = "GET", data: dict | None = None
|
||||
) -> dict:
|
||||
"""Make an HTTP request to the Ollama API."""
|
||||
url = f"{self._host}{endpoint}"
|
||||
|
||||
target = self._client if self._client is not None else ollama
|
||||
if target is None or not hasattr(target, method):
|
||||
raise OllamaClientError(
|
||||
f"Ollama SDK does not expose method '{method}'. Install or update the SDK."
|
||||
if data:
|
||||
req = Request(
|
||||
url,
|
||||
data=json.dumps(data).encode("utf-8"),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method=method,
|
||||
)
|
||||
|
||||
func = getattr(target, method)
|
||||
else:
|
||||
req = Request(url, method=method)
|
||||
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except Exception as exc: # pragma: no cover - network errors depend on runtime
|
||||
raise OllamaClientError(str(exc)) from exc
|
||||
with urlopen(req, timeout=30) as response:
|
||||
return json.loads(response.read().decode())
|
||||
except (URLError, HTTPError) as exc:
|
||||
raise OllamaClientError(f"Request failed: {exc}") from exc
|
||||
|
||||
Reference in New Issue
Block a user