diff --git a/python/voice_to_notes/ipc/handlers.py b/python/voice_to_notes/ipc/handlers.py index 9863b91..5d27734 100644 --- a/python/voice_to_notes/ipc/handlers.py +++ b/python/voice_to_notes/ipc/handlers.py @@ -135,6 +135,83 @@ def make_export_handler() -> HandlerFunc: return handler +def make_ai_chat_handler() -> HandlerFunc: + """Create an AI chat handler with persistent AIProviderService.""" + from voice_to_notes.services.ai_provider import create_default_service + + service = create_default_service() + + def handler(msg: IPCMessage) -> IPCMessage: + payload = msg.payload + action = payload.get("action", "chat") + + if action == "list_providers": + return IPCMessage( + id=msg.id, + type="ai.providers", + payload={"providers": service.list_providers()}, + ) + + if action == "set_provider": + service.set_active(payload["provider"]) + return IPCMessage( + id=msg.id, + type="ai.provider_set", + payload={"provider": payload["provider"]}, + ) + + if action == "configure": + # Re-create a provider with custom settings + provider_name = payload.get("provider", "") + config = payload.get("config", {}) + if provider_name == "local": + from voice_to_notes.providers.local_provider import LocalProvider + + service.register_provider("local", LocalProvider( + base_url=config.get("base_url", "http://localhost:8080"), + model=config.get("model", "local"), + )) + elif provider_name == "openai": + from voice_to_notes.providers.openai_provider import OpenAIProvider + + service.register_provider("openai", OpenAIProvider( + api_key=config.get("api_key"), + model=config.get("model", "gpt-4o-mini"), + )) + elif provider_name == "anthropic": + from voice_to_notes.providers.anthropic_provider import AnthropicProvider + + service.register_provider("anthropic", AnthropicProvider( + api_key=config.get("api_key"), + model=config.get("model", "claude-sonnet-4-6"), + )) + elif provider_name == "litellm": + from voice_to_notes.providers.litellm_provider import LiteLLMProvider + + service.register_provider("litellm", LiteLLMProvider( + model=config.get("model", "gpt-4o-mini"), + )) + return IPCMessage( + id=msg.id, + type="ai.configured", + payload={"provider": provider_name}, + ) + + # Default: chat + response = service.chat( + messages=payload.get("messages", []), + transcript_context=payload.get("transcript_context", ""), + **{k: v for k, v in payload.items() if k not in ("action", "messages", "transcript_context")}, + ) + return IPCMessage( + id=msg.id, + type="ai.response", + payload={"response": response}, + ) + + return handler + + def hardware_detect_handler(msg: IPCMessage) -> IPCMessage: """Detect hardware capabilities and return recommendations.""" from voice_to_notes.hardware.detect import detect_hardware diff --git a/python/voice_to_notes/main.py b/python/voice_to_notes/main.py index af4e1cf..fedff95 100644 --- a/python/voice_to_notes/main.py +++ b/python/voice_to_notes/main.py @@ -8,6 +8,7 @@ import sys from voice_to_notes.ipc.handlers import ( HandlerRegistry, hardware_detect_handler, + make_ai_chat_handler, make_diarize_handler, make_export_handler, make_pipeline_handler, @@ -27,6 +28,7 @@ def create_registry() -> HandlerRegistry: registry.register("diarize.start", make_diarize_handler()) registry.register("pipeline.start", make_pipeline_handler()) registry.register("export.start", make_export_handler()) + registry.register("ai.chat", make_ai_chat_handler()) return registry diff --git a/python/voice_to_notes/providers/anthropic_provider.py b/python/voice_to_notes/providers/anthropic_provider.py index d93c856..23f5c54 100644 --- a/python/voice_to_notes/providers/anthropic_provider.py +++ b/python/voice_to_notes/providers/anthropic_provider.py @@ -2,4 +2,68 @@ from __future__ import annotations -# TODO: Implement Anthropic provider +import os +from typing import Any + +from voice_to_notes.providers.base import AIProvider + + +class AnthropicProvider(AIProvider): + """Connects to the Anthropic API.""" + + def __init__( + self, + api_key: str | None = None, + model: str = "claude-sonnet-4-6", + ) -> None: + self._api_key = api_key or os.environ.get("ANTHROPIC_API_KEY", "") + self._model = model + self._client: Any = None + + def _ensure_client(self) -> Any: + if self._client is not None: + return self._client + + if not self._api_key: + raise RuntimeError( + "Anthropic API key not configured. Set ANTHROPIC_API_KEY or provide it in settings." + ) + + try: + import anthropic + + self._client = anthropic.Anthropic(api_key=self._api_key) + except ImportError: + raise RuntimeError("anthropic package is required. Install with: pip install anthropic") + return self._client + + def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str: + client = self._ensure_client() + + # Anthropic expects a system message separately + system_msg = "" + chat_messages = [] + for msg in messages: + if msg.get("role") == "system": + system_msg = msg.get("content", "") + else: + chat_messages.append(msg) + + create_kwargs: dict[str, Any] = { + "model": kwargs.get("model", self._model), + "messages": chat_messages, + "max_tokens": kwargs.get("max_tokens", 2048), + } + if system_msg: + create_kwargs["system"] = system_msg + + response = client.messages.create(**create_kwargs) + # Anthropic returns content blocks + return "".join(block.text for block in response.content if hasattr(block, "text")) + + def is_available(self) -> bool: + return bool(self._api_key) + + @property + def name(self) -> str: + return "Anthropic" diff --git a/python/voice_to_notes/providers/base.py b/python/voice_to_notes/providers/base.py index eb70b15..6cd9337 100644 --- a/python/voice_to_notes/providers/base.py +++ b/python/voice_to_notes/providers/base.py @@ -3,7 +3,6 @@ from __future__ import annotations from abc import ABC, abstractmethod -from collections.abc import AsyncIterator from typing import Any @@ -11,13 +10,17 @@ class AIProvider(ABC): """Base interface for all AI providers.""" @abstractmethod - async def chat(self, messages: list[dict[str, Any]], config: dict[str, Any]) -> str: - """Send a chat completion request and return the response.""" + def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str: + """Send a chat completion request and return the full response text.""" ... @abstractmethod - async def stream( - self, messages: list[dict[str, Any]], config: dict[str, Any] - ) -> AsyncIterator[str]: - """Send a streaming chat request, yielding tokens as they arrive.""" + def is_available(self) -> bool: + """Check if this provider is configured and available.""" + ... + + @property + @abstractmethod + def name(self) -> str: + """Provider display name.""" ... diff --git a/python/voice_to_notes/providers/litellm_provider.py b/python/voice_to_notes/providers/litellm_provider.py index 0d2d610..fd48a0a 100644 --- a/python/voice_to_notes/providers/litellm_provider.py +++ b/python/voice_to_notes/providers/litellm_provider.py @@ -2,4 +2,41 @@ from __future__ import annotations -# TODO: Implement LiteLLM provider +from typing import Any + +from voice_to_notes.providers.base import AIProvider + + +class LiteLLMProvider(AIProvider): + """Routes through LiteLLM for access to 100+ LLM providers.""" + + def __init__(self, model: str = "gpt-4o-mini", **kwargs: Any) -> None: + self._model = model + self._extra_kwargs = kwargs + + def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str: + try: + import litellm + except ImportError: + raise RuntimeError("litellm package is required. Install with: pip install litellm") + + merged_kwargs = {**self._extra_kwargs, **kwargs} + response = litellm.completion( + model=merged_kwargs.get("model", self._model), + messages=messages, + temperature=merged_kwargs.get("temperature", 0.7), + max_tokens=merged_kwargs.get("max_tokens", 2048), + ) + return response.choices[0].message.content or "" + + def is_available(self) -> bool: + try: + import litellm # noqa: F401 + + return True + except ImportError: + return False + + @property + def name(self) -> str: + return "LiteLLM" diff --git a/python/voice_to_notes/providers/local_provider.py b/python/voice_to_notes/providers/local_provider.py index 7873891..ac904aa 100644 --- a/python/voice_to_notes/providers/local_provider.py +++ b/python/voice_to_notes/providers/local_provider.py @@ -2,8 +2,57 @@ from __future__ import annotations +import sys +from typing import Any -# TODO: Implement local provider -# - Connect to llama-server on localhost:{port} -# - Use openai SDK with custom base_url -# - Support chat and streaming +from voice_to_notes.providers.base import AIProvider + + +class LocalProvider(AIProvider): + """Connects to bundled llama-server via its OpenAI-compatible API.""" + + def __init__(self, base_url: str = "http://localhost:8080", model: str = "local") -> None: + self._base_url = base_url.rstrip("/") + self._model = model + self._client: Any = None + + def _ensure_client(self) -> Any: + if self._client is not None: + return self._client + + try: + from openai import OpenAI + + self._client = OpenAI( + base_url=f"{self._base_url}/v1", + api_key="not-needed", # llama-server doesn't require an API key + ) + except ImportError: + raise RuntimeError( + "openai package is required for local AI. Install with: pip install openai" + ) + return self._client + + def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str: + client = self._ensure_client() + response = client.chat.completions.create( + model=self._model, + messages=messages, + temperature=kwargs.get("temperature", 0.7), + max_tokens=kwargs.get("max_tokens", 2048), + ) + return response.choices[0].message.content or "" + + def is_available(self) -> bool: + try: + import urllib.request + + req = urllib.request.Request(f"{self._base_url}/health", method="GET") + with urllib.request.urlopen(req, timeout=2) as resp: + return resp.status == 200 + except Exception: + return False + + @property + def name(self) -> str: + return "Local (llama-server)" diff --git a/python/voice_to_notes/providers/openai_provider.py b/python/voice_to_notes/providers/openai_provider.py index 93c95a1..a7f74f6 100644 --- a/python/voice_to_notes/providers/openai_provider.py +++ b/python/voice_to_notes/providers/openai_provider.py @@ -2,4 +2,52 @@ from __future__ import annotations -# TODO: Implement OpenAI provider +import os +from typing import Any + +from voice_to_notes.providers.base import AIProvider + + +class OpenAIProvider(AIProvider): + """Connects to the OpenAI API.""" + + def __init__( + self, + api_key: str | None = None, + model: str = "gpt-4o-mini", + ) -> None: + self._api_key = api_key or os.environ.get("OPENAI_API_KEY", "") + self._model = model + self._client: Any = None + + def _ensure_client(self) -> Any: + if self._client is not None: + return self._client + + if not self._api_key: + raise RuntimeError("OpenAI API key not configured. Set OPENAI_API_KEY or provide it in settings.") + + try: + from openai import OpenAI + + self._client = OpenAI(api_key=self._api_key) + except ImportError: + raise RuntimeError("openai package is required. Install with: pip install openai") + return self._client + + def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str: + client = self._ensure_client() + response = client.chat.completions.create( + model=kwargs.get("model", self._model), + messages=messages, + temperature=kwargs.get("temperature", 0.7), + max_tokens=kwargs.get("max_tokens", 2048), + ) + return response.choices[0].message.content or "" + + def is_available(self) -> bool: + return bool(self._api_key) + + @property + def name(self) -> str: + return "OpenAI" diff --git a/python/voice_to_notes/services/ai_provider.py b/python/voice_to_notes/services/ai_provider.py index 1d5bfb6..6fa4f51 100644 --- a/python/voice_to_notes/services/ai_provider.py +++ b/python/voice_to_notes/services/ai_provider.py @@ -2,12 +2,103 @@ from __future__ import annotations +import sys +from typing import Any + +from voice_to_notes.providers.base import AIProvider + class AIProviderService: - """Manages AI provider selection and routes chat/summarize requests.""" + """Manages AI provider selection and routes chat requests.""" - # TODO: Implement provider routing - # - Select provider based on config (local, openai, anthropic, litellm) - # - Forward chat messages - # - Handle streaming responses - pass + def __init__(self) -> None: + self._providers: dict[str, AIProvider] = {} + self._active_provider: str | None = None + + def register_provider(self, name: str, provider: AIProvider) -> None: + """Register an AI provider.""" + self._providers[name] = provider + + def set_active(self, name: str) -> None: + """Set the active provider by name.""" + if name not in self._providers: + raise ValueError(f"Unknown provider: {name}. Available: {list(self._providers.keys())}") + self._active_provider = name + + def get_active(self) -> AIProvider | None: + """Get the currently active provider.""" + if self._active_provider: + return self._providers.get(self._active_provider) + # Auto-select first available provider + for name, provider in self._providers.items(): + if provider.is_available(): + self._active_provider = name + return provider + return None + + def list_providers(self) -> list[dict[str, Any]]: + """List all registered providers with their status.""" + return [ + { + "name": name, + "display_name": provider.name, + "available": provider.is_available(), + "active": name == self._active_provider, + } + for name, provider in self._providers.items() + ] + + def chat( + self, + messages: list[dict[str, str]], + transcript_context: str = "", + **kwargs: Any, + ) -> str: + """Send a chat request to the active provider. + + Automatically prepends transcript context as a system message if provided. + """ + provider = self.get_active() + if provider is None: + raise RuntimeError( + "No AI provider available. Configure a provider in settings or start the local AI server." + ) + + # Build messages with transcript context + full_messages: list[dict[str, str]] = [] + if transcript_context: + full_messages.append({ + "role": "system", + "content": ( + "You are a helpful assistant analyzing a transcript. " + "Here is the transcript for context:\n\n" + f"{transcript_context}\n\n" + "Answer the user's questions about this transcript. " + "Be concise and helpful." + ), + }) + + full_messages.extend(messages) + + print( + f"[sidecar] AI chat via {provider.name}, {len(full_messages)} messages", + file=sys.stderr, + flush=True, + ) + + return provider.chat(full_messages, **kwargs) + + +def create_default_service() -> AIProviderService: + """Create an AIProviderService with all supported providers registered.""" + from voice_to_notes.providers.anthropic_provider import AnthropicProvider + from voice_to_notes.providers.litellm_provider import LiteLLMProvider + from voice_to_notes.providers.local_provider import LocalProvider + from voice_to_notes.providers.openai_provider import OpenAIProvider + + service = AIProviderService() + service.register_provider("local", LocalProvider()) + service.register_provider("openai", OpenAIProvider()) + service.register_provider("anthropic", AnthropicProvider()) + service.register_provider("litellm", LiteLLMProvider()) + return service diff --git a/src-tauri/src/commands/ai.rs b/src-tauri/src/commands/ai.rs index 8b271fa..fa2c283 100644 --- a/src-tauri/src/commands/ai.rs +++ b/src-tauri/src/commands/ai.rs @@ -1,2 +1,91 @@ -// AI provider commands — chat, summarize via Python sidecar -// TODO: Implement when AI provider service is built +use serde_json::{json, Value}; + +use crate::sidecar::messages::IPCMessage; +use crate::sidecar::SidecarManager; + +fn get_sidecar() -> Result { + let python_path = std::env::current_dir() + .map_err(|e| e.to_string())? + .join("../python") + .canonicalize() + .map_err(|e| format!("Cannot find python directory: {e}"))?; + + let manager = SidecarManager::new(); + manager.start(&python_path.to_string_lossy())?; + Ok(manager) +} + +/// Send a chat message to the AI provider via the Python sidecar. +#[tauri::command] +pub fn ai_chat( + messages: Value, + transcript_context: Option, + provider: Option, +) -> Result { + let manager = get_sidecar()?; + + let request_id = uuid::Uuid::new_v4().to_string(); + let mut payload = json!({ + "action": "chat", + "messages": messages, + "transcript_context": transcript_context.unwrap_or_default(), + }); + + // If a specific provider is requested, set it first + if let Some(p) = provider { + let set_msg = IPCMessage::new( + &uuid::Uuid::new_v4().to_string(), + "ai.chat", + json!({ "action": "set_provider", "provider": p }), + ); + let _ = manager.send_and_receive(&set_msg)?; + } + + let msg = IPCMessage::new(&request_id, "ai.chat", payload); + let response = manager.send_and_receive(&msg)?; + + if response.msg_type == "error" { + return Err(format!( + "AI error: {}", + response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown") + )); + } + + Ok(response.payload) +} + +/// List available AI providers. +#[tauri::command] +pub fn ai_list_providers() -> Result { + let manager = get_sidecar()?; + + let request_id = uuid::Uuid::new_v4().to_string(); + let msg = IPCMessage::new( + &request_id, + "ai.chat", + json!({ "action": "list_providers" }), + ); + + let response = manager.send_and_receive(&msg)?; + Ok(response.payload) +} + +/// Configure an AI provider with API key/settings. +#[tauri::command] +pub fn ai_configure(provider: String, config: Value) -> Result { + let manager = get_sidecar()?; + + let request_id = uuid::Uuid::new_v4().to_string(); + let msg = IPCMessage::new( + &request_id, + "ai.chat", + json!({ + "action": "configure", + "provider": provider, + "config": config, + }), + ); + + let response = manager.send_and_receive(&msg)?; + Ok(response.payload) +} diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 9edeff7..cd5b641 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -3,6 +3,7 @@ pub mod db; pub mod sidecar; pub mod state; +use commands::ai::{ai_chat, ai_configure, ai_list_providers}; use commands::export::export_transcript; use commands::project::{create_project, get_project, list_projects}; use commands::transcribe::{run_pipeline, transcribe_file}; @@ -19,6 +20,9 @@ pub fn run() { transcribe_file, run_pipeline, export_transcript, + ai_chat, + ai_list_providers, + ai_configure, ]) .run(tauri::generate_context!()) .expect("error while running tauri application"); diff --git a/src/lib/components/AIChatPanel.svelte b/src/lib/components/AIChatPanel.svelte index a8b3ce1..81a328f 100644 --- a/src/lib/components/AIChatPanel.svelte +++ b/src/lib/components/AIChatPanel.svelte @@ -1,18 +1,261 @@ + +
-

AI Chat

-

Ask questions about the transcript, generate summaries

+
+

AI Chat

+ {#if messages.length > 0} + + {/if} +
+ +
+ {#if messages.length === 0} +
+

Ask questions about the transcript

+ {#if $segments.length > 0} +
+ + +
+ {/if} +
+ {:else} + {#each messages as msg} +
+
{msg.content}
+
+ {/each} + {#if isLoading} +
+
Thinking...
+
+ {/if} + {/if} +
+ +
+ + +