niri-ai-sidebar/streaming_handler.py

"""Streaming response handler for progressive token display."""

from __future__ import annotations

from gi.repository import GLib
from ignis import widgets


class StreamingHandler:
    """Manages streaming response display with token-by-token updates."""

    def __init__(self, message_widget: widgets.Label, scroller: widgets.Scroll,
                 thinking_widget: widgets.Label = None, thinking_box: widgets.Box = None):
        """Initialize streaming handler.

        Args:
            message_widget: The label widget to update with streaming content
            scroller: The scroll widget to manage auto-scrolling
            thinking_widget: Optional label widget for thinking content
            thinking_box: Optional box container for thinking section
        """
        self._widget = message_widget
        self._thinking_widget = thinking_widget
        self._thinking_box = thinking_box
        self._scroller = scroller
        self._buffer = ""
        self._thinking_buffer = ""  # Separate buffer for thinking content
        self._token_buffer = []
        self._thinking_token_buffer = []
        self._is_streaming = False
        self._buffer_size = 3  # Accumulate 3-5 tokens before UI update

    def start_stream(self) -> None:
        """Initialize streaming state."""
        self._buffer = ""
        self._thinking_buffer = ""
        self._token_buffer = []
        self._thinking_token_buffer = []
        self._is_streaming = True
        # Set initial text with streaming indicator
        self._widget.label = "Assistant: ..."

    def append_token(self, token: str) -> None:
        """Add token to buffer and update UI via GLib.idle_add.

        Args:
            token: The token string to append
        """
        if not self._is_streaming:
            return

        # Add token to buffer
        self._token_buffer.append(token)

        # Update UI when buffer reaches threshold
        if len(self._token_buffer) >= self._buffer_size:
            self._flush_buffer()

    def append_thinking_token(self, token: str) -> None:
        """Add thinking token to separate buffer.

        Args:
            token: The thinking token string to append
        """
        if not self._is_streaming:
            return

        # Add token to thinking buffer
        self._thinking_token_buffer.append(token)

        # Update UI when buffer reaches threshold
        if len(self._thinking_token_buffer) >= self._buffer_size:
            self._flush_thinking_buffer()

    def _flush_buffer(self) -> None:
        """Flush accumulated tokens to UI."""
        if not self._token_buffer:
            return

        # Combine buffered tokens
        tokens = "".join(self._token_buffer)
        self._buffer += tokens
        self._token_buffer = []

        # Schedule UI update on main thread
        GLib.idle_add(self._update_ui, priority=GLib.PRIORITY_DEFAULT)

    def _flush_thinking_buffer(self) -> None:
        """Flush accumulated thinking tokens to UI."""
        if not self._thinking_token_buffer:
            return

        # Combine buffered thinking tokens
        tokens = "".join(self._thinking_token_buffer)
        self._thinking_buffer += tokens
        self._thinking_token_buffer = []

        # Schedule UI update on main thread
        GLib.idle_add(self._update_ui, priority=GLib.PRIORITY_DEFAULT)

    def _update_ui(self) -> bool:
        """Update the widget label with current buffer content.

        Returns:
            False to indicate this is a one-time callback
        """
        # Update thinking widget if present
        if self._thinking_widget and self._thinking_buffer:
            self._thinking_widget.label = self._thinking_buffer + "..."
            if self._thinking_box:
                self._thinking_box.set_visible(True)

        # Update main response label with streaming indicator
        self._widget.label = f"Assistant: {self._buffer}..."

        # Auto-scroll to bottom
        self._scroll_to_bottom()

        return False  # Don't repeat this callback

    def _scroll_to_bottom(self) -> None:
        """Scroll to the latest content."""
        adjustment = self._scroller.get_vadjustment()
        if adjustment:
            adjustment.set_value(adjustment.get_upper() - adjustment.get_page_size())

    def finish_stream(self) -> tuple[str, str]:
        """Finalize streaming and return complete content.

        Returns:
            Tuple of (thinking_content, main_content)
        """
        self._is_streaming = False

        # Flush any remaining tokens
        if self._token_buffer:
            self._buffer += "".join(self._token_buffer)
            self._token_buffer = []

        if self._thinking_token_buffer:
            self._thinking_buffer += "".join(self._thinking_token_buffer)
            self._thinking_token_buffer = []

        # Final UI update without streaming indicator
        final_content = self._buffer
        final_thinking = self._thinking_buffer
        GLib.idle_add(self._finalize_ui, final_thinking, final_content, priority=GLib.PRIORITY_DEFAULT)

        return (final_thinking, final_content)

    def _finalize_ui(self, thinking: str, content: str) -> bool:
        """Update UI with final content without streaming indicator.

        Args:
            thinking: The final thinking content
            content: The final complete content

        Returns:
            False to indicate this is a one-time callback
        """
        # Update thinking widget if present
        if self._thinking_widget and thinking:
            self._thinking_widget.label = thinking
            if self._thinking_box:
                self._thinking_box.set_visible(True)

        # Update main response without streaming indicator
        self._widget.label = f"Assistant: {content}"
        self._scroll_to_bottom()
        return False

    def parse_reasoning_content(self, content: str) -> tuple[str | None, str]:
        """Parse reasoning content from response if present.

        Looks for common reasoning patterns like:
        - <think>...</think> or <thinking>...</thinking> tags
        - [Reasoning: ...] markers
        - Other model-specific formats

        Args:
            content: The complete response content

        Returns:
            Tuple of (reasoning_content, main_content)
            If no reasoning found, returns (None, original_content)
        """
        import re

        # Pattern 1a: <think>...</think> tags (shorter form)
        think_pattern = r'<think>(.*?)</think>\s*(.*)'
        match = re.search(think_pattern, content, re.DOTALL | re.IGNORECASE)
        if match:
            reasoning = match.group(1).strip()
            main_content = match.group(2).strip()
            return (reasoning, main_content)

        # Pattern 1b: <thinking>...</thinking> tags (longer form)
        thinking_pattern = r'<thinking>(.*?)</thinking>\s*(.*)'
        match = re.search(thinking_pattern, content, re.DOTALL | re.IGNORECASE)
        if match:
            reasoning = match.group(1).strip()
            main_content = match.group(2).strip()
            return (reasoning, main_content)

        # Pattern 2: [Reasoning: ...] followed by [Answer: ...]
        reasoning_pattern = r'\[Reasoning:?\s*(.*?)\]\s*\[Answer:?\s*(.*?)\]'
        match = re.search(reasoning_pattern, content, re.DOTALL | re.IGNORECASE)
        if match:
            reasoning = match.group(1).strip()
            main_content = match.group(2).strip()
            return (reasoning, main_content)

        # Pattern 3: "Reasoning:" followed by "Answer:" or "Conclusion:"
        reasoning_pattern2 = r'Reasoning:\s*(.*?)\s*(?:Answer|Conclusion):\s*(.*)'
        match = re.search(reasoning_pattern2, content, re.DOTALL | re.IGNORECASE)
        if match:
            reasoning = match.group(1).strip()
            main_content = match.group(2).strip()
            return (reasoning, main_content)

        # No reasoning pattern found
        return (None, content)