Phase 5: AI provider system with local and cloud support

- Implement AIProvider base interface with chat() and is_available()
- Add LocalProvider connecting to bundled llama-server via OpenAI SDK
- Add OpenAIProvider for direct OpenAI API access
- Add AnthropicProvider for Anthropic Claude API
- Add LiteLLMProvider for multi-provider gateway
- Build AIProviderService with provider routing, auto-selection,
  and transcript context injection
- Add ai.chat IPC handler supporting chat, list_providers, set_provider,
  and configure actions
- Add ai_chat, ai_list_providers, ai_configure Tauri commands
- Build interactive AIChatPanel with message history, quick actions
  (Summarize, Action Items), and transcript context awareness
- Tests: 30 Python, 6 Rust, 0 Svelte errors

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-26 16:25:10 -08:00
parent 415a648a2b
commit d67625cd5a
11 changed files with 735 additions and 28 deletions

View File

@@ -135,6 +135,83 @@ def make_export_handler() -> HandlerFunc:
return handler return handler
def make_ai_chat_handler() -> HandlerFunc:
"""Create an AI chat handler with persistent AIProviderService."""
from voice_to_notes.services.ai_provider import create_default_service
service = create_default_service()
def handler(msg: IPCMessage) -> IPCMessage:
payload = msg.payload
action = payload.get("action", "chat")
if action == "list_providers":
return IPCMessage(
id=msg.id,
type="ai.providers",
payload={"providers": service.list_providers()},
)
if action == "set_provider":
service.set_active(payload["provider"])
return IPCMessage(
id=msg.id,
type="ai.provider_set",
payload={"provider": payload["provider"]},
)
if action == "configure":
# Re-create a provider with custom settings
provider_name = payload.get("provider", "")
config = payload.get("config", {})
if provider_name == "local":
from voice_to_notes.providers.local_provider import LocalProvider
service.register_provider("local", LocalProvider(
base_url=config.get("base_url", "http://localhost:8080"),
model=config.get("model", "local"),
))
elif provider_name == "openai":
from voice_to_notes.providers.openai_provider import OpenAIProvider
service.register_provider("openai", OpenAIProvider(
api_key=config.get("api_key"),
model=config.get("model", "gpt-4o-mini"),
))
elif provider_name == "anthropic":
from voice_to_notes.providers.anthropic_provider import AnthropicProvider
service.register_provider("anthropic", AnthropicProvider(
api_key=config.get("api_key"),
model=config.get("model", "claude-sonnet-4-6"),
))
elif provider_name == "litellm":
from voice_to_notes.providers.litellm_provider import LiteLLMProvider
service.register_provider("litellm", LiteLLMProvider(
model=config.get("model", "gpt-4o-mini"),
))
return IPCMessage(
id=msg.id,
type="ai.configured",
payload={"provider": provider_name},
)
# Default: chat
response = service.chat(
messages=payload.get("messages", []),
transcript_context=payload.get("transcript_context", ""),
**{k: v for k, v in payload.items() if k not in ("action", "messages", "transcript_context")},
)
return IPCMessage(
id=msg.id,
type="ai.response",
payload={"response": response},
)
return handler
def hardware_detect_handler(msg: IPCMessage) -> IPCMessage: def hardware_detect_handler(msg: IPCMessage) -> IPCMessage:
"""Detect hardware capabilities and return recommendations.""" """Detect hardware capabilities and return recommendations."""
from voice_to_notes.hardware.detect import detect_hardware from voice_to_notes.hardware.detect import detect_hardware

View File

@@ -8,6 +8,7 @@ import sys
from voice_to_notes.ipc.handlers import ( from voice_to_notes.ipc.handlers import (
HandlerRegistry, HandlerRegistry,
hardware_detect_handler, hardware_detect_handler,
make_ai_chat_handler,
make_diarize_handler, make_diarize_handler,
make_export_handler, make_export_handler,
make_pipeline_handler, make_pipeline_handler,
@@ -27,6 +28,7 @@ def create_registry() -> HandlerRegistry:
registry.register("diarize.start", make_diarize_handler()) registry.register("diarize.start", make_diarize_handler())
registry.register("pipeline.start", make_pipeline_handler()) registry.register("pipeline.start", make_pipeline_handler())
registry.register("export.start", make_export_handler()) registry.register("export.start", make_export_handler())
registry.register("ai.chat", make_ai_chat_handler())
return registry return registry

View File

@@ -2,4 +2,68 @@
from __future__ import annotations from __future__ import annotations
# TODO: Implement Anthropic provider import os
from typing import Any
from voice_to_notes.providers.base import AIProvider
class AnthropicProvider(AIProvider):
"""Connects to the Anthropic API."""
def __init__(
self,
api_key: str | None = None,
model: str = "claude-sonnet-4-6",
) -> None:
self._api_key = api_key or os.environ.get("ANTHROPIC_API_KEY", "")
self._model = model
self._client: Any = None
def _ensure_client(self) -> Any:
if self._client is not None:
return self._client
if not self._api_key:
raise RuntimeError(
"Anthropic API key not configured. Set ANTHROPIC_API_KEY or provide it in settings."
)
try:
import anthropic
self._client = anthropic.Anthropic(api_key=self._api_key)
except ImportError:
raise RuntimeError("anthropic package is required. Install with: pip install anthropic")
return self._client
def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
client = self._ensure_client()
# Anthropic expects a system message separately
system_msg = ""
chat_messages = []
for msg in messages:
if msg.get("role") == "system":
system_msg = msg.get("content", "")
else:
chat_messages.append(msg)
create_kwargs: dict[str, Any] = {
"model": kwargs.get("model", self._model),
"messages": chat_messages,
"max_tokens": kwargs.get("max_tokens", 2048),
}
if system_msg:
create_kwargs["system"] = system_msg
response = client.messages.create(**create_kwargs)
# Anthropic returns content blocks
return "".join(block.text for block in response.content if hasattr(block, "text"))
def is_available(self) -> bool:
return bool(self._api_key)
@property
def name(self) -> str:
return "Anthropic"

View File

@@ -3,7 +3,6 @@
from __future__ import annotations from __future__ import annotations
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from collections.abc import AsyncIterator
from typing import Any from typing import Any
@@ -11,13 +10,17 @@ class AIProvider(ABC):
"""Base interface for all AI providers.""" """Base interface for all AI providers."""
@abstractmethod @abstractmethod
async def chat(self, messages: list[dict[str, Any]], config: dict[str, Any]) -> str: def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
"""Send a chat completion request and return the response.""" """Send a chat completion request and return the full response text."""
... ...
@abstractmethod @abstractmethod
async def stream( def is_available(self) -> bool:
self, messages: list[dict[str, Any]], config: dict[str, Any] """Check if this provider is configured and available."""
) -> AsyncIterator[str]: ...
"""Send a streaming chat request, yielding tokens as they arrive."""
@property
@abstractmethod
def name(self) -> str:
"""Provider display name."""
... ...

View File

@@ -2,4 +2,41 @@
from __future__ import annotations from __future__ import annotations
# TODO: Implement LiteLLM provider from typing import Any
from voice_to_notes.providers.base import AIProvider
class LiteLLMProvider(AIProvider):
"""Routes through LiteLLM for access to 100+ LLM providers."""
def __init__(self, model: str = "gpt-4o-mini", **kwargs: Any) -> None:
self._model = model
self._extra_kwargs = kwargs
def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
try:
import litellm
except ImportError:
raise RuntimeError("litellm package is required. Install with: pip install litellm")
merged_kwargs = {**self._extra_kwargs, **kwargs}
response = litellm.completion(
model=merged_kwargs.get("model", self._model),
messages=messages,
temperature=merged_kwargs.get("temperature", 0.7),
max_tokens=merged_kwargs.get("max_tokens", 2048),
)
return response.choices[0].message.content or ""
def is_available(self) -> bool:
try:
import litellm # noqa: F401
return True
except ImportError:
return False
@property
def name(self) -> str:
return "LiteLLM"

View File

@@ -2,8 +2,57 @@
from __future__ import annotations from __future__ import annotations
import sys
from typing import Any
# TODO: Implement local provider from voice_to_notes.providers.base import AIProvider
# - Connect to llama-server on localhost:{port}
# - Use openai SDK with custom base_url
# - Support chat and streaming class LocalProvider(AIProvider):
"""Connects to bundled llama-server via its OpenAI-compatible API."""
def __init__(self, base_url: str = "http://localhost:8080", model: str = "local") -> None:
self._base_url = base_url.rstrip("/")
self._model = model
self._client: Any = None
def _ensure_client(self) -> Any:
if self._client is not None:
return self._client
try:
from openai import OpenAI
self._client = OpenAI(
base_url=f"{self._base_url}/v1",
api_key="not-needed", # llama-server doesn't require an API key
)
except ImportError:
raise RuntimeError(
"openai package is required for local AI. Install with: pip install openai"
)
return self._client
def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
client = self._ensure_client()
response = client.chat.completions.create(
model=self._model,
messages=messages,
temperature=kwargs.get("temperature", 0.7),
max_tokens=kwargs.get("max_tokens", 2048),
)
return response.choices[0].message.content or ""
def is_available(self) -> bool:
try:
import urllib.request
req = urllib.request.Request(f"{self._base_url}/health", method="GET")
with urllib.request.urlopen(req, timeout=2) as resp:
return resp.status == 200
except Exception:
return False
@property
def name(self) -> str:
return "Local (llama-server)"

View File

@@ -2,4 +2,52 @@
from __future__ import annotations from __future__ import annotations
# TODO: Implement OpenAI provider import os
from typing import Any
from voice_to_notes.providers.base import AIProvider
class OpenAIProvider(AIProvider):
"""Connects to the OpenAI API."""
def __init__(
self,
api_key: str | None = None,
model: str = "gpt-4o-mini",
) -> None:
self._api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
self._model = model
self._client: Any = None
def _ensure_client(self) -> Any:
if self._client is not None:
return self._client
if not self._api_key:
raise RuntimeError("OpenAI API key not configured. Set OPENAI_API_KEY or provide it in settings.")
try:
from openai import OpenAI
self._client = OpenAI(api_key=self._api_key)
except ImportError:
raise RuntimeError("openai package is required. Install with: pip install openai")
return self._client
def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
client = self._ensure_client()
response = client.chat.completions.create(
model=kwargs.get("model", self._model),
messages=messages,
temperature=kwargs.get("temperature", 0.7),
max_tokens=kwargs.get("max_tokens", 2048),
)
return response.choices[0].message.content or ""
def is_available(self) -> bool:
return bool(self._api_key)
@property
def name(self) -> str:
return "OpenAI"

View File

@@ -2,12 +2,103 @@
from __future__ import annotations from __future__ import annotations
import sys
from typing import Any
from voice_to_notes.providers.base import AIProvider
class AIProviderService: class AIProviderService:
"""Manages AI provider selection and routes chat/summarize requests.""" """Manages AI provider selection and routes chat requests."""
# TODO: Implement provider routing def __init__(self) -> None:
# - Select provider based on config (local, openai, anthropic, litellm) self._providers: dict[str, AIProvider] = {}
# - Forward chat messages self._active_provider: str | None = None
# - Handle streaming responses
pass def register_provider(self, name: str, provider: AIProvider) -> None:
"""Register an AI provider."""
self._providers[name] = provider
def set_active(self, name: str) -> None:
"""Set the active provider by name."""
if name not in self._providers:
raise ValueError(f"Unknown provider: {name}. Available: {list(self._providers.keys())}")
self._active_provider = name
def get_active(self) -> AIProvider | None:
"""Get the currently active provider."""
if self._active_provider:
return self._providers.get(self._active_provider)
# Auto-select first available provider
for name, provider in self._providers.items():
if provider.is_available():
self._active_provider = name
return provider
return None
def list_providers(self) -> list[dict[str, Any]]:
"""List all registered providers with their status."""
return [
{
"name": name,
"display_name": provider.name,
"available": provider.is_available(),
"active": name == self._active_provider,
}
for name, provider in self._providers.items()
]
def chat(
self,
messages: list[dict[str, str]],
transcript_context: str = "",
**kwargs: Any,
) -> str:
"""Send a chat request to the active provider.
Automatically prepends transcript context as a system message if provided.
"""
provider = self.get_active()
if provider is None:
raise RuntimeError(
"No AI provider available. Configure a provider in settings or start the local AI server."
)
# Build messages with transcript context
full_messages: list[dict[str, str]] = []
if transcript_context:
full_messages.append({
"role": "system",
"content": (
"You are a helpful assistant analyzing a transcript. "
"Here is the transcript for context:\n\n"
f"{transcript_context}\n\n"
"Answer the user's questions about this transcript. "
"Be concise and helpful."
),
})
full_messages.extend(messages)
print(
f"[sidecar] AI chat via {provider.name}, {len(full_messages)} messages",
file=sys.stderr,
flush=True,
)
return provider.chat(full_messages, **kwargs)
def create_default_service() -> AIProviderService:
"""Create an AIProviderService with all supported providers registered."""
from voice_to_notes.providers.anthropic_provider import AnthropicProvider
from voice_to_notes.providers.litellm_provider import LiteLLMProvider
from voice_to_notes.providers.local_provider import LocalProvider
from voice_to_notes.providers.openai_provider import OpenAIProvider
service = AIProviderService()
service.register_provider("local", LocalProvider())
service.register_provider("openai", OpenAIProvider())
service.register_provider("anthropic", AnthropicProvider())
service.register_provider("litellm", LiteLLMProvider())
return service

View File

@@ -1,2 +1,91 @@
// AI provider commands — chat, summarize via Python sidecar use serde_json::{json, Value};
// TODO: Implement when AI provider service is built
use crate::sidecar::messages::IPCMessage;
use crate::sidecar::SidecarManager;
fn get_sidecar() -> Result<SidecarManager, String> {
let python_path = std::env::current_dir()
.map_err(|e| e.to_string())?
.join("../python")
.canonicalize()
.map_err(|e| format!("Cannot find python directory: {e}"))?;
let manager = SidecarManager::new();
manager.start(&python_path.to_string_lossy())?;
Ok(manager)
}
/// Send a chat message to the AI provider via the Python sidecar.
#[tauri::command]
pub fn ai_chat(
messages: Value,
transcript_context: Option<String>,
provider: Option<String>,
) -> Result<Value, String> {
let manager = get_sidecar()?;
let request_id = uuid::Uuid::new_v4().to_string();
let mut payload = json!({
"action": "chat",
"messages": messages,
"transcript_context": transcript_context.unwrap_or_default(),
});
// If a specific provider is requested, set it first
if let Some(p) = provider {
let set_msg = IPCMessage::new(
&uuid::Uuid::new_v4().to_string(),
"ai.chat",
json!({ "action": "set_provider", "provider": p }),
);
let _ = manager.send_and_receive(&set_msg)?;
}
let msg = IPCMessage::new(&request_id, "ai.chat", payload);
let response = manager.send_and_receive(&msg)?;
if response.msg_type == "error" {
return Err(format!(
"AI error: {}",
response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
));
}
Ok(response.payload)
}
/// List available AI providers.
#[tauri::command]
pub fn ai_list_providers() -> Result<Value, String> {
let manager = get_sidecar()?;
let request_id = uuid::Uuid::new_v4().to_string();
let msg = IPCMessage::new(
&request_id,
"ai.chat",
json!({ "action": "list_providers" }),
);
let response = manager.send_and_receive(&msg)?;
Ok(response.payload)
}
/// Configure an AI provider with API key/settings.
#[tauri::command]
pub fn ai_configure(provider: String, config: Value) -> Result<Value, String> {
let manager = get_sidecar()?;
let request_id = uuid::Uuid::new_v4().to_string();
let msg = IPCMessage::new(
&request_id,
"ai.chat",
json!({
"action": "configure",
"provider": provider,
"config": config,
}),
);
let response = manager.send_and_receive(&msg)?;
Ok(response.payload)
}

View File

@@ -3,6 +3,7 @@ pub mod db;
pub mod sidecar; pub mod sidecar;
pub mod state; pub mod state;
use commands::ai::{ai_chat, ai_configure, ai_list_providers};
use commands::export::export_transcript; use commands::export::export_transcript;
use commands::project::{create_project, get_project, list_projects}; use commands::project::{create_project, get_project, list_projects};
use commands::transcribe::{run_pipeline, transcribe_file}; use commands::transcribe::{run_pipeline, transcribe_file};
@@ -19,6 +20,9 @@ pub fn run() {
transcribe_file, transcribe_file,
run_pipeline, run_pipeline,
export_transcript, export_transcript,
ai_chat,
ai_list_providers,
ai_configure,
]) ])
.run(tauri::generate_context!()) .run(tauri::generate_context!())
.expect("error while running tauri application"); .expect("error while running tauri application");

View File

@@ -1,18 +1,261 @@
<script lang="ts">
import { invoke } from '@tauri-apps/api/core';
import { segments, speakers } from '$lib/stores/transcript';
interface ChatMessage {
role: 'user' | 'assistant';
content: string;
}
let messages = $state<ChatMessage[]>([]);
let inputText = $state('');
let isLoading = $state(false);
let chatContainer: HTMLDivElement;
function getTranscriptContext(): string {
const segs = $segments;
const spks = $speakers;
if (segs.length === 0) return '';
return segs.map(seg => {
const speaker = spks.find(s => s.id === seg.speaker_id);
const name = speaker?.display_name || speaker?.label || 'Unknown';
return `[${name}]: ${seg.text}`;
}).join('\n');
}
async function sendMessage() {
const text = inputText.trim();
if (!text || isLoading) return;
messages = [...messages, { role: 'user', content: text }];
inputText = '';
isLoading = true;
// Auto-scroll to bottom
requestAnimationFrame(() => {
if (chatContainer) chatContainer.scrollTop = chatContainer.scrollHeight;
});
try {
const chatMessages = messages.map(m => ({
role: m.role,
content: m.content,
}));
const result = await invoke<{ response: string }>('ai_chat', {
messages: chatMessages,
transcriptContext: getTranscriptContext(),
});
messages = [...messages, { role: 'assistant', content: result.response }];
} catch (err) {
messages = [...messages, {
role: 'assistant',
content: `Error: ${err}`,
}];
} finally {
isLoading = false;
requestAnimationFrame(() => {
if (chatContainer) chatContainer.scrollTop = chatContainer.scrollHeight;
});
}
}
function handleKeydown(e: KeyboardEvent) {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
sendMessage();
}
}
function clearChat() {
messages = [];
}
// Quick action buttons
async function summarize() {
inputText = 'Please summarize this transcript in bullet points.';
await sendMessage();
}
async function extractActions() {
inputText = 'What action items or follow-ups were discussed?';
await sendMessage();
}
</script>
<div class="ai-chat-panel"> <div class="ai-chat-panel">
<div class="panel-header">
<h3>AI Chat</h3> <h3>AI Chat</h3>
<p class="placeholder">Ask questions about the transcript, generate summaries</p> {#if messages.length > 0}
<button class="clear-btn" onclick={clearChat} title="Clear chat">Clear</button>
{/if}
</div>
<div class="chat-messages" bind:this={chatContainer}>
{#if messages.length === 0}
<div class="empty-state">
<p>Ask questions about the transcript</p>
{#if $segments.length > 0}
<div class="quick-actions">
<button class="quick-btn" onclick={summarize}>Summarize</button>
<button class="quick-btn" onclick={extractActions}>Action Items</button>
</div>
{/if}
</div>
{:else}
{#each messages as msg}
<div class="message {msg.role}">
<div class="message-content">{msg.content}</div>
</div>
{/each}
{#if isLoading}
<div class="message assistant loading">
<div class="message-content">Thinking...</div>
</div>
{/if}
{/if}
</div>
<div class="chat-input">
<textarea
class="input-textarea"
placeholder="Ask about the transcript..."
bind:value={inputText}
onkeydown={handleKeydown}
disabled={isLoading}
></textarea>
<button
class="send-btn"
onclick={sendMessage}
disabled={isLoading || !inputText.trim()}
>
Send
</button>
</div>
</div> </div>
<style> <style>
.ai-chat-panel { .ai-chat-panel {
padding: 1rem; flex: 1;
display: flex;
flex-direction: column;
background: #16213e; background: #16213e;
border-radius: 8px; border-radius: 8px;
color: #e0e0e0; color: #e0e0e0;
min-height: 0;
} }
h3 { margin: 0 0 0.5rem; } .panel-header {
.placeholder { display: flex;
align-items: center;
justify-content: space-between;
padding: 0.75rem 1rem 0.5rem;
}
.panel-header h3 {
margin: 0;
font-size: 0.95rem;
}
.clear-btn {
background: none;
border: 1px solid #4a5568;
color: #999;
padding: 0.15rem 0.5rem;
border-radius: 3px;
cursor: pointer;
font-size: 0.7rem;
}
.clear-btn:hover {
color: #e0e0e0;
border-color: #e94560;
}
.chat-messages {
flex: 1;
overflow-y: auto;
padding: 0 0.75rem;
min-height: 0;
}
.empty-state {
text-align: center;
color: #666; color: #666;
font-size: 0.875rem; font-size: 0.8rem;
padding: 1rem 0;
}
.quick-actions {
display: flex;
gap: 0.5rem;
justify-content: center;
margin-top: 0.5rem;
}
.quick-btn {
background: rgba(233, 69, 96, 0.15);
border: 1px solid rgba(233, 69, 96, 0.3);
color: #e94560;
padding: 0.3rem 0.6rem;
border-radius: 4px;
cursor: pointer;
font-size: 0.75rem;
}
.quick-btn:hover {
background: rgba(233, 69, 96, 0.25);
}
.message {
margin-bottom: 0.5rem;
padding: 0.5rem 0.75rem;
border-radius: 6px;
font-size: 0.8rem;
line-height: 1.4;
}
.message.user {
background: rgba(233, 69, 96, 0.15);
margin-left: 1rem;
}
.message.assistant {
background: rgba(255, 255, 255, 0.05);
margin-right: 1rem;
}
.message.loading {
opacity: 0.6;
font-style: italic;
}
.chat-input {
display: flex;
gap: 0.5rem;
padding: 0.5rem 0.75rem 0.75rem;
}
.input-textarea {
flex: 1;
background: #1a1a2e;
color: #e0e0e0;
border: 1px solid #4a5568;
border-radius: 4px;
padding: 0.4rem 0.5rem;
font-family: inherit;
font-size: 0.8rem;
resize: none;
min-height: 2rem;
max-height: 4rem;
}
.input-textarea:focus {
outline: none;
border-color: #e94560;
}
.send-btn {
background: #e94560;
border: none;
color: white;
padding: 0.4rem 0.75rem;
border-radius: 4px;
cursor: pointer;
font-size: 0.8rem;
font-weight: 500;
align-self: flex-end;
}
.send-btn:hover:not(:disabled) {
background: #d63851;
}
.send-btn:disabled {
opacity: 0.5;
cursor: not-allowed;
} }
</style> </style>