refactor(aisidebar): restructure project and implement reasoning mode toggle

- Reorganize project structure and file locations - Add ReasoningController to manage model selection and reasoning mode - Update design and requirements for reasoning mode toggle - Implement model switching between Qwen3-4B-Instruct and Qwen3-4B-Thinking models - Remove deprecated files and consolidate project layout - Add new steering and specification documentation - Clean up and remove unnecessary files and directories - Prepare for enhanced AI sidebar functionality with more flexible model handling
2025-10-26 09:10:31 +01:00
parent 58bd935af0
commit 239242e2fc
73 changed files with 3094 additions and 2348 deletions
--- a/ollama_client.py
+++ b/ollama_client.py
@@ -1,13 +1,11 @@
-"""Client utilities for interacting with the Ollama API."""
+"""Client utilities for interacting with the Ollama API via direct HTTP calls."""

 from __future__ import annotations

+import json
 from typing import Any, Dict, Iterable, Iterator
-
-try:  # pragma: no cover - optional dependency may not be installed in CI
-    import ollama
-except ImportError:  # pragma: no cover - fallback path for environments without Ollama
-    ollama = None  # type: ignore[assignment]
+from urllib.request import Request, urlopen
+from urllib.error import URLError, HTTPError


 class OllamaClientError(RuntimeError):
@@ -15,88 +13,60 @@ class OllamaClientError(RuntimeError):


 class OllamaUnavailableError(OllamaClientError):
-    """Raised when the Ollama Python SDK is not available."""
+    """Raised when the Ollama server is not available."""


 class OllamaClient:
-    """Thin wrapper around the Ollama Python SDK with graceful degradation."""
+    """HTTP client for interacting with Ollama's REST API."""

    def __init__(self, host: str | None = None) -> None:
-        self._host = host
-        self._client = None
+        self._host = host or "http://localhost:11434"
        self._cached_models: list[str] | None = None
        self._is_available = False
-
-        if ollama is None:
-            return
-
-        # Try to initialize client and check connection
-        try:
-            if host and hasattr(ollama, "Client"):
-                self._client = ollama.Client(host=host)  # type: ignore[call-arg]
-            
-            # Test connection by attempting to list models
-            self._check_connection()
-        except Exception:
-            # Silently fail - availability flag remains False
-            pass
+        
+        # Check connection during initialization without raising exceptions
+        self._check_connection()

    # ------------------------------------------------------------------ helpers
    def _check_connection(self) -> None:
-        """Check if Ollama is available and update internal flag."""
-        if ollama is None:
-            self._is_available = False
-            return
-        
+        """Check if Ollama server is reachable and update internal flag."""
        try:
-            # Attempt a simple list call to verify connection
-            self._call_sdk("list")  # type: ignore[arg-type]
-            self._is_available = True
-        except Exception:
+            req = Request(f"{self._host}/api/tags", method="GET")
+            with urlopen(req, timeout=2) as response:
+                self._is_available = response.status == 200
+        except (URLError, HTTPError, TimeoutError, Exception):
            self._is_available = False
    
    @property
    def is_available(self) -> bool:
+        """Check if Ollama server is reachable."""
        return self._is_available

    @property
    def default_model(self) -> str | None:
+        """Get the first available model."""
        models = self.list_models()
        return models[0] if models else None

    def list_models(self, force_refresh: bool = False) -> list[str]:
        """Return the available model names, caching the result for quick reuse."""
-        if not self.is_available:
-            return []
-
        if self._cached_models is not None and not force_refresh:
            return list(self._cached_models)

        try:
-            response = self._call_sdk("list")  # type: ignore[arg-type]
-            # Update availability flag on successful call
-            self._is_available = True
-        except OllamaClientError:
-            self._is_available = False
-            return []
-        except Exception:
+            req = Request(f"{self._host}/api/tags", method="GET")
+            with urlopen(req, timeout=5) as response:
+                data = json.loads(response.read().decode())
+                # Update availability flag on successful call
+                self._is_available = True
+        except (URLError, HTTPError, TimeoutError, Exception):
+            # Return empty list instead of raising on connection failure
            self._is_available = False
            return []

        models: list[str] = []
-        # Handle both dict responses (old SDK) and Pydantic objects (new SDK)
-        if isinstance(response, dict):
-            model_list = response.get("models", [])
-        else:
-            # Pydantic object
-            model_list = getattr(response, "models", [])
-
-        for item in model_list:
-            if isinstance(item, dict):
-                name = item.get("name") or item.get("model")
-            else:
-                # Pydantic object
-                name = getattr(item, "name", None) or getattr(item, "model", None)
+        for item in data.get("models", []):
+            name = item.get("name") or item.get("model")
            if name:
                models.append(name)

@@ -109,98 +79,145 @@ class OllamaClient:
        *,
        model: str,
        messages: Iterable[Dict[str, str]],
+        options: Dict[str, Any] | None = None,
    ) -> dict[str, str] | None:
        """Execute a blocking chat call against Ollama."""
-        if not self.is_available:
-            if ollama is None:
-                return {
-                    "role": "assistant",
-                    "content": "Ollama SDK is not installed; install `ollama` to enable responses.",
-                }
-            else:
-                return {
-                    "role": "assistant",
-                    "content": "Ollama is not running. Start Ollama with: ollama serve",
-                }
+        # Return error message instead of raising when unavailable
+        if not self._is_available:
+            return {
+                "role": "assistant",
+                "content": "Ollama is not running. Start Ollama with: ollama serve",
+            }
+        
+        payload = {
+            "model": model,
+            "messages": list(messages),
+            "stream": False,
+        }
+        
+        # Add options if provided
+        if options:
+            payload["options"] = options

        try:
-            result = self._call_sdk(
-                "chat",
-                model=model,
-                messages=list(messages),
-                stream=False,
+            req = Request(
+                f"{self._host}/api/chat",
+                data=json.dumps(payload).encode("utf-8"),
+                headers={"Content-Type": "application/json"},
+                method="POST",
            )
-            # Update availability flag on successful call
-            self._is_available = True
-        except OllamaClientError as exc:
-            self._is_available = False
-            return {
-                "role": "assistant",
-                "content": f"Unable to reach Ollama: {exc}\n\nStart Ollama with: ollama serve",
-            }
-        except Exception as exc:
+            with urlopen(req, timeout=120) as response:
+                result = json.loads(response.read().decode())
+                # Update availability flag on successful call
+                self._is_available = True
+        except (URLError, HTTPError, TimeoutError, Exception) as exc:
+            # Update availability flag and return error message
            self._is_available = False
            return {
                "role": "assistant",
                "content": f"Unable to reach Ollama: {exc}\n\nStart Ollama with: ollama serve",
            }

-        # Handle both dict responses (old SDK) and Pydantic objects (new SDK)
-        if isinstance(result, dict):
-            message = result.get("message")
-            if not message:
-                return {"role": "assistant", "content": ""}
-            role = message.get("role") or "assistant"
-            content = message.get("content") or ""
-        else:
-            # Pydantic object (ollama SDK >= 0.4.0)
-            message = getattr(result, "message", None)
-            if not message:
-                return {"role": "assistant", "content": ""}
-            role = getattr(message, "role", "assistant")
-            content = getattr(message, "content", "")
+        # Parse the response
+        message = result.get("message")
+        if not message:
+            return {"role": "assistant", "content": ""}
+
+        role = message.get("role", "assistant")
+        content = message.get("content", "")

        return {"role": role, "content": content}

    def stream_chat(
-        self, *, model: str, messages: Iterable[Dict[str, str]]
+        self, *, model: str, messages: Iterable[Dict[str, str]], options: Dict[str, Any] | None = None
    ) -> Iterator[dict[str, Any]]:
-        """Placeholder that exposes the streaming API for future UI hooks."""
-        if not self.is_available:
-            raise OllamaUnavailableError(
-                "Streaming requires the Ollama Python SDK to be installed."
-            )
+        """Execute a streaming chat call against Ollama.
+        
+        Yields dictionaries containing token data from the streaming response.
+        Each yielded dict may contain 'message' with 'content' field for tokens.
+        """
+        # Return error message instead of raising when unavailable
+        if not self._is_available:
+            yield {
+                "role": "assistant",
+                "content": "Ollama is not running. Start Ollama with: ollama serve",
+                "done": True,
+            }
+            return
+        
+        payload = {
+            "model": model,
+            "messages": list(messages),
+            "stream": True,
+            "think": True,  # Enable thinking output for thinking models
+        }
+        
+        # Add options if provided
+        if options:
+            payload["options"] = options
+
+

        try:
-            stream = self._call_sdk(
-                "chat",
-                model=model,
-                messages=list(messages),
-                stream=True,
+            req = Request(
+                f"{self._host}/api/chat",
+                data=json.dumps(payload).encode("utf-8"),
+                headers={"Content-Type": "application/json"},
+                method="POST",
            )
-        except OllamaClientError as exc:
-            raise OllamaClientError(f"Failed to start streaming chat: {exc}") from exc
+            
+            with urlopen(req, timeout=120) as response:
+                # Update availability flag on successful connection
+                self._is_available = True
+                
+                # Read streaming response line by line
+                for line in response:
+                    if not line:
+                        continue
+                    
+                    try:
+                        chunk = json.loads(line.decode("utf-8"))
+                        

-        if not hasattr(stream, "__iter__"):
-            raise OllamaClientError("Ollama returned a non-iterable stream response.")
-        return iter(stream)
+                        
+                        yield chunk
+                        
+                        # Check if streaming is complete
+                        if chunk.get("done", False):
+                            break
+                    except json.JSONDecodeError:
+                        # Skip malformed JSON lines
+                        continue
+                        
+        except (URLError, HTTPError, TimeoutError, Exception) as exc:
+            # Update availability flag and yield error message
+            self._is_available = False
+            yield {
+                "role": "assistant",
+                "content": f"Unable to reach Ollama: {exc}\n\nStart Ollama with: ollama serve",
+                "done": True,
+                "error": True,
+            }

    # ------------------------------------------------------------------ internals
-    def _call_sdk(self, method: str, *args: Any, **kwargs: Any) -> Any:
-        if not self.is_available:
-            raise OllamaUnavailableError(
-                "Ollama Python SDK is not available in the environment."
-            )
+    def _make_request(
+        self, endpoint: str, method: str = "GET", data: dict | None = None
+    ) -> dict:
+        """Make an HTTP request to the Ollama API."""
+        url = f"{self._host}{endpoint}"

-        target = self._client if self._client is not None else ollama
-        if target is None or not hasattr(target, method):
-            raise OllamaClientError(
-                f"Ollama SDK does not expose method '{method}'. Install or update the SDK."
+        if data:
+            req = Request(
+                url,
+                data=json.dumps(data).encode("utf-8"),
+                headers={"Content-Type": "application/json"},
+                method=method,
            )
-
-        func = getattr(target, method)
+        else:
+            req = Request(url, method=method)

        try:
-            return func(*args, **kwargs)
-        except Exception as exc:  # pragma: no cover - network errors depend on runtime
-            raise OllamaClientError(str(exc)) from exc
+            with urlopen(req, timeout=30) as response:
+                return json.loads(response.read().decode())
+        except (URLError, HTTPError) as exc:
+            raise OllamaClientError(f"Request failed: {exc}") from exc