Phase 5: AI provider system with local and cloud support

- Implement AIProvider base interface with chat() and is_available() - Add LocalProvider connecting to bundled llama-server via OpenAI SDK - Add OpenAIProvider for direct OpenAI API access - Add AnthropicProvider for Anthropic Claude API - Add LiteLLMProvider for multi-provider gateway - Build AIProviderService with provider routing, auto-selection, and transcript context injection - Add ai.chat IPC handler supporting chat, list_providers, set_provider, and configure actions - Add ai_chat, ai_list_providers, ai_configure Tauri commands - Build interactive AIChatPanel with message history, quick actions (Summarize, Action Items), and transcript context awareness - Tests: 30 Python, 6 Rust, 0 Svelte errors Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 16:25:10 -08:00
parent 415a648a2b
commit d67625cd5a
11 changed files with 735 additions and 28 deletions
--- a/python/voice_to_notes/ipc/handlers.py
+++ b/python/voice_to_notes/ipc/handlers.py
@@ -135,6 +135,83 @@ def make_export_handler() -> HandlerFunc:
    return handler


+def make_ai_chat_handler() -> HandlerFunc:
+    """Create an AI chat handler with persistent AIProviderService."""
+    from voice_to_notes.services.ai_provider import create_default_service
+
+    service = create_default_service()
+
+    def handler(msg: IPCMessage) -> IPCMessage:
+        payload = msg.payload
+        action = payload.get("action", "chat")
+
+        if action == "list_providers":
+            return IPCMessage(
+                id=msg.id,
+                type="ai.providers",
+                payload={"providers": service.list_providers()},
+            )
+
+        if action == "set_provider":
+            service.set_active(payload["provider"])
+            return IPCMessage(
+                id=msg.id,
+                type="ai.provider_set",
+                payload={"provider": payload["provider"]},
+            )
+
+        if action == "configure":
+            # Re-create a provider with custom settings
+            provider_name = payload.get("provider", "")
+            config = payload.get("config", {})
+            if provider_name == "local":
+                from voice_to_notes.providers.local_provider import LocalProvider
+
+                service.register_provider("local", LocalProvider(
+                    base_url=config.get("base_url", "http://localhost:8080"),
+                    model=config.get("model", "local"),
+                ))
+            elif provider_name == "openai":
+                from voice_to_notes.providers.openai_provider import OpenAIProvider
+
+                service.register_provider("openai", OpenAIProvider(
+                    api_key=config.get("api_key"),
+                    model=config.get("model", "gpt-4o-mini"),
+                ))
+            elif provider_name == "anthropic":
+                from voice_to_notes.providers.anthropic_provider import AnthropicProvider
+
+                service.register_provider("anthropic", AnthropicProvider(
+                    api_key=config.get("api_key"),
+                    model=config.get("model", "claude-sonnet-4-6"),
+                ))
+            elif provider_name == "litellm":
+                from voice_to_notes.providers.litellm_provider import LiteLLMProvider
+
+                service.register_provider("litellm", LiteLLMProvider(
+                    model=config.get("model", "gpt-4o-mini"),
+                ))
+            return IPCMessage(
+                id=msg.id,
+                type="ai.configured",
+                payload={"provider": provider_name},
+            )
+
+        # Default: chat
+        response = service.chat(
+            messages=payload.get("messages", []),
+            transcript_context=payload.get("transcript_context", ""),
+            **{k: v for k, v in payload.items() if k not in ("action", "messages", "transcript_context")},
+        )
+        return IPCMessage(
+            id=msg.id,
+            type="ai.response",
+            payload={"response": response},
+        )
+
+    return handler
+
+
 def hardware_detect_handler(msg: IPCMessage) -> IPCMessage:
    """Detect hardware capabilities and return recommendations."""
    from voice_to_notes.hardware.detect import detect_hardware
--- a/python/voice_to_notes/main.py
+++ b/python/voice_to_notes/main.py
@@ -8,6 +8,7 @@ import sys
 from voice_to_notes.ipc.handlers import (
    HandlerRegistry,
    hardware_detect_handler,
+    make_ai_chat_handler,
    make_diarize_handler,
    make_export_handler,
    make_pipeline_handler,
@@ -27,6 +28,7 @@ def create_registry() -> HandlerRegistry:
    registry.register("diarize.start", make_diarize_handler())
    registry.register("pipeline.start", make_pipeline_handler())
    registry.register("export.start", make_export_handler())
+    registry.register("ai.chat", make_ai_chat_handler())
    return registry


--- a/python/voice_to_notes/providers/anthropic_provider.py
+++ b/python/voice_to_notes/providers/anthropic_provider.py
@@ -2,4 +2,68 @@

 from __future__ import annotations

-# TODO: Implement Anthropic provider
+import os
+from typing import Any
+
+from voice_to_notes.providers.base import AIProvider
+
+
+class AnthropicProvider(AIProvider):
+    """Connects to the Anthropic API."""
+
+    def __init__(
+        self,
+        api_key: str | None = None,
+        model: str = "claude-sonnet-4-6",
+    ) -> None:
+        self._api_key = api_key or os.environ.get("ANTHROPIC_API_KEY", "")
+        self._model = model
+        self._client: Any = None
+
+    def _ensure_client(self) -> Any:
+        if self._client is not None:
+            return self._client
+
+        if not self._api_key:
+            raise RuntimeError(
+                "Anthropic API key not configured. Set ANTHROPIC_API_KEY or provide it in settings."
+            )
+
+        try:
+            import anthropic
+
+            self._client = anthropic.Anthropic(api_key=self._api_key)
+        except ImportError:
+            raise RuntimeError("anthropic package is required. Install with: pip install anthropic")
+        return self._client
+
+    def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
+        client = self._ensure_client()
+
+        # Anthropic expects a system message separately
+        system_msg = ""
+        chat_messages = []
+        for msg in messages:
+            if msg.get("role") == "system":
+                system_msg = msg.get("content", "")
+            else:
+                chat_messages.append(msg)
+
+        create_kwargs: dict[str, Any] = {
+            "model": kwargs.get("model", self._model),
+            "messages": chat_messages,
+            "max_tokens": kwargs.get("max_tokens", 2048),
+        }
+        if system_msg:
+            create_kwargs["system"] = system_msg
+
+        response = client.messages.create(**create_kwargs)
+        # Anthropic returns content blocks
+        return "".join(block.text for block in response.content if hasattr(block, "text"))
+
+    def is_available(self) -> bool:
+        return bool(self._api_key)
+
+    @property
+    def name(self) -> str:
+        return "Anthropic"
--- a/python/voice_to_notes/providers/base.py
+++ b/python/voice_to_notes/providers/base.py
@@ -3,7 +3,6 @@
 from __future__ import annotations

 from abc import ABC, abstractmethod
-from collections.abc import AsyncIterator
 from typing import Any


@@ -11,13 +10,17 @@ class AIProvider(ABC):
    """Base interface for all AI providers."""

    @abstractmethod
-    async def chat(self, messages: list[dict[str, Any]], config: dict[str, Any]) -> str:
-        """Send a chat completion request and return the response."""
+    def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
+        """Send a chat completion request and return the full response text."""
        ...

    @abstractmethod
-    async def stream(
-        self, messages: list[dict[str, Any]], config: dict[str, Any]
-    ) -> AsyncIterator[str]:
-        """Send a streaming chat request, yielding tokens as they arrive."""
+    def is_available(self) -> bool:
+        """Check if this provider is configured and available."""
+        ...
+
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Provider display name."""
        ...
--- a/python/voice_to_notes/providers/litellm_provider.py
+++ b/python/voice_to_notes/providers/litellm_provider.py
@@ -2,4 +2,41 @@

 from __future__ import annotations

-# TODO: Implement LiteLLM provider
+from typing import Any
+
+from voice_to_notes.providers.base import AIProvider
+
+
+class LiteLLMProvider(AIProvider):
+    """Routes through LiteLLM for access to 100+ LLM providers."""
+
+    def __init__(self, model: str = "gpt-4o-mini", **kwargs: Any) -> None:
+        self._model = model
+        self._extra_kwargs = kwargs
+
+    def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
+        try:
+            import litellm
+        except ImportError:
+            raise RuntimeError("litellm package is required. Install with: pip install litellm")
+
+        merged_kwargs = {**self._extra_kwargs, **kwargs}
+        response = litellm.completion(
+            model=merged_kwargs.get("model", self._model),
+            messages=messages,
+            temperature=merged_kwargs.get("temperature", 0.7),
+            max_tokens=merged_kwargs.get("max_tokens", 2048),
+        )
+        return response.choices[0].message.content or ""
+
+    def is_available(self) -> bool:
+        try:
+            import litellm  # noqa: F401
+
+            return True
+        except ImportError:
+            return False
+
+    @property
+    def name(self) -> str:
+        return "LiteLLM"
--- a/python/voice_to_notes/providers/local_provider.py
+++ b/python/voice_to_notes/providers/local_provider.py
@@ -2,8 +2,57 @@

 from __future__ import annotations

+import sys
+from typing import Any

-# TODO: Implement local provider
-# - Connect to llama-server on localhost:{port}
-# - Use openai SDK with custom base_url
-# - Support chat and streaming
+from voice_to_notes.providers.base import AIProvider
+
+
+class LocalProvider(AIProvider):
+    """Connects to bundled llama-server via its OpenAI-compatible API."""
+
+    def __init__(self, base_url: str = "http://localhost:8080", model: str = "local") -> None:
+        self._base_url = base_url.rstrip("/")
+        self._model = model
+        self._client: Any = None
+
+    def _ensure_client(self) -> Any:
+        if self._client is not None:
+            return self._client
+
+        try:
+            from openai import OpenAI
+
+            self._client = OpenAI(
+                base_url=f"{self._base_url}/v1",
+                api_key="not-needed",  # llama-server doesn't require an API key
+            )
+        except ImportError:
+            raise RuntimeError(
+                "openai package is required for local AI. Install with: pip install openai"
+            )
+        return self._client
+
+    def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
+        client = self._ensure_client()
+        response = client.chat.completions.create(
+            model=self._model,
+            messages=messages,
+            temperature=kwargs.get("temperature", 0.7),
+            max_tokens=kwargs.get("max_tokens", 2048),
+        )
+        return response.choices[0].message.content or ""
+
+    def is_available(self) -> bool:
+        try:
+            import urllib.request
+
+            req = urllib.request.Request(f"{self._base_url}/health", method="GET")
+            with urllib.request.urlopen(req, timeout=2) as resp:
+                return resp.status == 200
+        except Exception:
+            return False
+
+    @property
+    def name(self) -> str:
+        return "Local (llama-server)"
--- a/python/voice_to_notes/providers/openai_provider.py
+++ b/python/voice_to_notes/providers/openai_provider.py
@@ -2,4 +2,52 @@

 from __future__ import annotations

-# TODO: Implement OpenAI provider
+import os
+from typing import Any
+
+from voice_to_notes.providers.base import AIProvider
+
+
+class OpenAIProvider(AIProvider):
+    """Connects to the OpenAI API."""
+
+    def __init__(
+        self,
+        api_key: str | None = None,
+        model: str = "gpt-4o-mini",
+    ) -> None:
+        self._api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
+        self._model = model
+        self._client: Any = None
+
+    def _ensure_client(self) -> Any:
+        if self._client is not None:
+            return self._client
+
+        if not self._api_key:
+            raise RuntimeError("OpenAI API key not configured. Set OPENAI_API_KEY or provide it in settings.")
+
+        try:
+            from openai import OpenAI
+
+            self._client = OpenAI(api_key=self._api_key)
+        except ImportError:
+            raise RuntimeError("openai package is required. Install with: pip install openai")
+        return self._client
+
+    def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
+        client = self._ensure_client()
+        response = client.chat.completions.create(
+            model=kwargs.get("model", self._model),
+            messages=messages,
+            temperature=kwargs.get("temperature", 0.7),
+            max_tokens=kwargs.get("max_tokens", 2048),
+        )
+        return response.choices[0].message.content or ""
+
+    def is_available(self) -> bool:
+        return bool(self._api_key)
+
+    @property
+    def name(self) -> str:
+        return "OpenAI"
--- a/python/voice_to_notes/services/ai_provider.py
+++ b/python/voice_to_notes/services/ai_provider.py
@@ -2,12 +2,103 @@

 from __future__ import annotations

+import sys
+from typing import Any
+
+from voice_to_notes.providers.base import AIProvider
+

 class AIProviderService:
-    """Manages AI provider selection and routes chat/summarize requests."""
+    """Manages AI provider selection and routes chat requests."""

-    # TODO: Implement provider routing
-    # - Select provider based on config (local, openai, anthropic, litellm)
-    # - Forward chat messages
-    # - Handle streaming responses
-    pass
+    def __init__(self) -> None:
+        self._providers: dict[str, AIProvider] = {}
+        self._active_provider: str | None = None
+
+    def register_provider(self, name: str, provider: AIProvider) -> None:
+        """Register an AI provider."""
+        self._providers[name] = provider
+
+    def set_active(self, name: str) -> None:
+        """Set the active provider by name."""
+        if name not in self._providers:
+            raise ValueError(f"Unknown provider: {name}. Available: {list(self._providers.keys())}")
+        self._active_provider = name
+
+    def get_active(self) -> AIProvider | None:
+        """Get the currently active provider."""
+        if self._active_provider:
+            return self._providers.get(self._active_provider)
+        # Auto-select first available provider
+        for name, provider in self._providers.items():
+            if provider.is_available():
+                self._active_provider = name
+                return provider
+        return None
+
+    def list_providers(self) -> list[dict[str, Any]]:
+        """List all registered providers with their status."""
+        return [
+            {
+                "name": name,
+                "display_name": provider.name,
+                "available": provider.is_available(),
+                "active": name == self._active_provider,
+            }
+            for name, provider in self._providers.items()
+        ]
+
+    def chat(
+        self,
+        messages: list[dict[str, str]],
+        transcript_context: str = "",
+        **kwargs: Any,
+    ) -> str:
+        """Send a chat request to the active provider.
+
+        Automatically prepends transcript context as a system message if provided.
+        """
+        provider = self.get_active()
+        if provider is None:
+            raise RuntimeError(
+                "No AI provider available. Configure a provider in settings or start the local AI server."
+            )
+
+        # Build messages with transcript context
+        full_messages: list[dict[str, str]] = []
+        if transcript_context:
+            full_messages.append({
+                "role": "system",
+                "content": (
+                    "You are a helpful assistant analyzing a transcript. "
+                    "Here is the transcript for context:\n\n"
+                    f"{transcript_context}\n\n"
+                    "Answer the user's questions about this transcript. "
+                    "Be concise and helpful."
+                ),
+            })
+
+        full_messages.extend(messages)
+
+        print(
+            f"[sidecar] AI chat via {provider.name}, {len(full_messages)} messages",
+            file=sys.stderr,
+            flush=True,
+        )
+
+        return provider.chat(full_messages, **kwargs)
+
+
+def create_default_service() -> AIProviderService:
+    """Create an AIProviderService with all supported providers registered."""
+    from voice_to_notes.providers.anthropic_provider import AnthropicProvider
+    from voice_to_notes.providers.litellm_provider import LiteLLMProvider
+    from voice_to_notes.providers.local_provider import LocalProvider
+    from voice_to_notes.providers.openai_provider import OpenAIProvider
+
+    service = AIProviderService()
+    service.register_provider("local", LocalProvider())
+    service.register_provider("openai", OpenAIProvider())
+    service.register_provider("anthropic", AnthropicProvider())
+    service.register_provider("litellm", LiteLLMProvider())
+    return service