Phase 1 foundation: Tauri shell, Python sidecar, SQLite database

Tauri v2 + Svelte + TypeScript frontend:
- App shell with workspace layout (waveform, transcript, speakers, AI chat)
- Placeholder components for all major UI areas
- Typed stores (project, transcript, playback, AI)
- TypeScript interfaces matching the database schema
- Tauri bridge service with typed invoke wrappers
- svelte-check passes with 0 errors

Rust backend:
- Tauri v2 app entry point with command registration
- SQLite database layer (rusqlite with bundled SQLite)
  - Full schema: projects, media_files, speakers, segments, words,
    ai_outputs, annotations (with indexes)
  - Model structs with serde serialization
  - CRUD queries for projects, speakers, segments, words
  - Segment text editing preserves original text
  - Schema versioning for future migrations
  - 6 tests passing
- Command stubs for project, transcribe, export, AI, settings, system
- App state management

Python sidecar:
- JSON-line IPC protocol (stdin/stdout)
- Message types: IPCMessage, progress, error, ready
- Handler registry with routing and error handling
- Ping/pong handler for connectivity testing
- Service stubs: transcribe, diarize, pipeline, AI, export
- Provider stubs: local (llama-server), OpenAI, Anthropic, LiteLLM
- Hardware detection stubs
- 14 tests passing, ruff clean

Also adds:
- Testing strategy document (docs/TESTING.md)
- Validation script (scripts/validate.sh)
- Updated .gitignore for Svelte, Rust, Python artifacts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-26 15:16:06 -08:00
parent c450ef3c0c
commit 503cc6c0cf
95 changed files with 9607 additions and 0 deletions

33
python/pyproject.toml Normal file
View File

@@ -0,0 +1,33 @@
[build-system]
requires = ["setuptools>=68.0"]
build-backend = "setuptools.build_meta"
[project]
name = "voice-to-notes"
version = "0.1.0"
description = "Python sidecar for Voice to Notes — transcription, diarization, and AI services"
requires-python = ">=3.11"
license = "MIT"
dependencies = []
[project.optional-dependencies]
dev = [
"ruff>=0.8.0",
"pytest>=8.0.0",
"pytest-asyncio>=0.24.0",
]
[tool.ruff]
target-version = "py311"
line-length = 100
[tool.ruff.lint]
select = ["E", "W", "F", "I", "B", "UP", "RUF"]
[tool.ruff.format]
quote-style = "double"
[tool.pytest.ini_options]
testpaths = ["tests"]
asyncio_mode = "auto"

0
python/tests/__init__.py Normal file
View File

View File

@@ -0,0 +1,43 @@
"""Tests for message handler routing."""
from voice_to_notes.ipc.handlers import HandlerRegistry, ping_handler
from voice_to_notes.ipc.messages import IPCMessage
def test_ping_handler():
msg = IPCMessage(id="req-1", type="ping", payload={"hello": "world"})
response = ping_handler(msg)
assert response.type == "pong"
assert response.id == "req-1"
assert response.payload["echo"] == {"hello": "world"}
def test_registry_routes_to_handler():
registry = HandlerRegistry()
registry.register("ping", ping_handler)
msg = IPCMessage(id="req-1", type="ping", payload={})
response = registry.handle(msg)
assert response is not None
assert response.type == "pong"
def test_registry_unknown_type():
registry = HandlerRegistry()
msg = IPCMessage(id="req-1", type="nonexistent", payload={})
response = registry.handle(msg)
assert response is not None
assert response.type == "error"
assert response.payload["code"] == "unknown_type"
def test_registry_handler_exception():
def bad_handler(msg: IPCMessage) -> IPCMessage:
raise ValueError("something broke")
registry = HandlerRegistry()
registry.register("bad", bad_handler)
msg = IPCMessage(id="req-1", type="bad", payload={})
response = registry.handle(msg)
assert response is not None
assert response.type == "error"
assert response.payload["code"] == "handler_error"

View File

@@ -0,0 +1,50 @@
"""Tests for IPC message types."""
from voice_to_notes.ipc.messages import (
IPCMessage,
error_message,
progress_message,
ready_message,
)
def test_ipc_message_to_dict():
msg = IPCMessage(id="req-1", type="ping", payload={"key": "value"})
d = msg.to_dict()
assert d == {"id": "req-1", "type": "ping", "payload": {"key": "value"}}
def test_ipc_message_from_dict():
data = {"id": "req-1", "type": "ping", "payload": {"key": "value"}}
msg = IPCMessage.from_dict(data)
assert msg.id == "req-1"
assert msg.type == "ping"
assert msg.payload == {"key": "value"}
def test_ipc_message_from_dict_missing_fields():
msg = IPCMessage.from_dict({})
assert msg.id == ""
assert msg.type == ""
assert msg.payload == {}
def test_progress_message():
msg = progress_message("req-1", 50, "transcribing", "Processing...")
assert msg.type == "progress"
assert msg.payload["percent"] == 50
assert msg.payload["stage"] == "transcribing"
def test_error_message():
msg = error_message("req-1", "not_found", "File not found")
assert msg.type == "error"
assert msg.payload["code"] == "not_found"
assert msg.payload["message"] == "File not found"
def test_ready_message():
msg = ready_message()
assert msg.type == "ready"
assert msg.id == "system"
assert "version" in msg.payload

View File

@@ -0,0 +1,38 @@
"""Tests for IPC protocol JSON-line encoding/decoding."""
import io
import json
from voice_to_notes.ipc.messages import IPCMessage
from voice_to_notes.ipc.protocol import read_message, write_message
def test_write_message(capsys):
msg = IPCMessage(id="req-1", type="pong", payload={"ok": True})
write_message(msg)
captured = capsys.readouterr()
parsed = json.loads(captured.out.strip())
assert parsed["id"] == "req-1"
assert parsed["type"] == "pong"
assert parsed["payload"]["ok"] is True
def test_read_message(monkeypatch):
line = json.dumps({"id": "req-1", "type": "ping", "payload": {}}) + "\n"
monkeypatch.setattr("sys.stdin", io.StringIO(line))
msg = read_message()
assert msg is not None
assert msg.id == "req-1"
assert msg.type == "ping"
def test_read_message_eof(monkeypatch):
monkeypatch.setattr("sys.stdin", io.StringIO(""))
msg = read_message()
assert msg is None
def test_read_message_invalid_json(monkeypatch):
monkeypatch.setattr("sys.stdin", io.StringIO("not json\n"))
msg = read_message()
assert msg is None

View File

@@ -0,0 +1,3 @@
"""Voice to Notes — Python sidecar for transcription, diarization, and AI services."""
__version__ = "0.1.0"

View File

@@ -0,0 +1 @@
"""Hardware detection and model selection."""

View File

@@ -0,0 +1,9 @@
"""GPU/CPU detection and VRAM estimation."""
from __future__ import annotations
# TODO: Implement hardware detection
# - Check torch.cuda.is_available()
# - Detect VRAM size
# - Detect CPU cores and available RAM
# - Return recommended model configuration

View File

@@ -0,0 +1,7 @@
"""Model selection logic based on available hardware."""
from __future__ import annotations
# TODO: Implement model selection
# - Map hardware capabilities to recommended models
# - Support user overrides from settings

View File

@@ -0,0 +1 @@
"""IPC protocol layer for JSON-line communication with the Rust backend."""

View File

@@ -0,0 +1,39 @@
"""Message handler registry and routing."""
from __future__ import annotations
import sys
from collections.abc import Callable
from voice_to_notes.ipc.messages import IPCMessage, error_message
# Handler function type: takes a message, returns a response message
HandlerFunc = Callable[[IPCMessage], IPCMessage | None]
class HandlerRegistry:
"""Registry mapping message types to handler functions."""
def __init__(self) -> None:
self._handlers: dict[str, HandlerFunc] = {}
def register(self, message_type: str, handler: HandlerFunc) -> None:
"""Register a handler for a message type."""
self._handlers[message_type] = handler
def handle(self, msg: IPCMessage) -> IPCMessage | None:
"""Route a message to its handler. Returns a response or error."""
handler = self._handlers.get(msg.type)
if handler is None:
print(f"[sidecar] Unknown message type: {msg.type}", file=sys.stderr, flush=True)
return error_message(msg.id, "unknown_type", f"Unknown message type: {msg.type}")
try:
return handler(msg)
except Exception as e:
print(f"[sidecar] Handler error for {msg.type}: {e}", file=sys.stderr, flush=True)
return error_message(msg.id, "handler_error", str(e))
def ping_handler(msg: IPCMessage) -> IPCMessage:
"""Simple ping handler for testing connectivity."""
return IPCMessage(id=msg.id, type="pong", payload={"echo": msg.payload})

View File

@@ -0,0 +1,46 @@
"""IPC message type definitions."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
@dataclass
class IPCMessage:
"""A message exchanged between Rust and Python via JSON-line protocol."""
id: str
type: str
payload: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {"id": self.id, "type": self.type, "payload": self.payload}
@classmethod
def from_dict(cls, data: dict[str, Any]) -> IPCMessage:
return cls(
id=data.get("id", ""),
type=data.get("type", ""),
payload=data.get("payload", {}),
)
def progress_message(request_id: str, percent: int, stage: str, message: str) -> IPCMessage:
return IPCMessage(
id=request_id,
type="progress",
payload={"percent": percent, "stage": stage, "message": message},
)
def error_message(request_id: str, code: str, message: str) -> IPCMessage:
return IPCMessage(
id=request_id,
type="error",
payload={"code": code, "message": message},
)
def ready_message() -> IPCMessage:
return IPCMessage(id="system", type="ready", payload={"version": "0.1.0"})

View File

@@ -0,0 +1,47 @@
"""JSON-line protocol reader/writer over stdin/stdout."""
from __future__ import annotations
import json
import sys
from typing import Any
from voice_to_notes.ipc.messages import IPCMessage
def read_message() -> IPCMessage | None:
"""Read a single JSON-line message from stdin. Returns None on EOF."""
try:
line = sys.stdin.readline()
if not line:
return None # EOF
line = line.strip()
if not line:
return None
data = json.loads(line)
return IPCMessage.from_dict(data)
except json.JSONDecodeError as e:
_log(f"Invalid JSON: {e}")
return None
except Exception as e:
_log(f"Read error: {e}")
return None
def write_message(msg: IPCMessage) -> None:
"""Write a JSON-line message to stdout."""
line = json.dumps(msg.to_dict(), separators=(",", ":"))
sys.stdout.write(line + "\n")
sys.stdout.flush()
def write_dict(data: dict[str, Any]) -> None:
"""Write a raw dict as a JSON-line message to stdout."""
line = json.dumps(data, separators=(",", ":"))
sys.stdout.write(line + "\n")
sys.stdout.flush()
def _log(message: str) -> None:
"""Log to stderr (stdout is reserved for IPC)."""
print(f"[sidecar] {message}", file=sys.stderr, flush=True)

View File

@@ -0,0 +1,52 @@
"""Main entry point for the Voice to Notes Python sidecar."""
from __future__ import annotations
import signal
import sys
from voice_to_notes.ipc.handlers import HandlerRegistry, ping_handler
from voice_to_notes.ipc.messages import ready_message
from voice_to_notes.ipc.protocol import read_message, write_message
def create_registry() -> HandlerRegistry:
"""Set up the message handler registry."""
registry = HandlerRegistry()
registry.register("ping", ping_handler)
# TODO: Register transcribe, diarize, pipeline, ai, export handlers
return registry
def main() -> None:
"""Main loop: read messages from stdin, dispatch to handlers, write responses to stdout."""
# Handle clean shutdown
def shutdown(signum: int, frame: object) -> None:
print("[sidecar] Shutting down...", file=sys.stderr, flush=True)
sys.exit(0)
signal.signal(signal.SIGTERM, shutdown)
signal.signal(signal.SIGINT, shutdown)
registry = create_registry()
# Signal to Rust that we're ready
write_message(ready_message())
print("[sidecar] Ready and waiting for messages", file=sys.stderr, flush=True)
# Message loop
while True:
msg = read_message()
if msg is None:
# EOF — parent closed stdin, time to exit
print("[sidecar] EOF on stdin, exiting", file=sys.stderr, flush=True)
break
response = registry.handle(msg)
if response is not None:
write_message(response)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1 @@
"""AI provider adapters — local (llama-server), LiteLLM, OpenAI, Anthropic."""

View File

@@ -0,0 +1,5 @@
"""Anthropic provider — direct Anthropic SDK integration."""
from __future__ import annotations
# TODO: Implement Anthropic provider

View File

@@ -0,0 +1,23 @@
"""Abstract base class for AI providers."""
from __future__ import annotations
from abc import ABC, abstractmethod
from collections.abc import AsyncIterator
from typing import Any
class AIProvider(ABC):
"""Base interface for all AI providers."""
@abstractmethod
async def chat(self, messages: list[dict[str, Any]], config: dict[str, Any]) -> str:
"""Send a chat completion request and return the response."""
...
@abstractmethod
async def stream(
self, messages: list[dict[str, Any]], config: dict[str, Any]
) -> AsyncIterator[str]:
"""Send a streaming chat request, yielding tokens as they arrive."""
...

View File

@@ -0,0 +1,5 @@
"""LiteLLM provider — multi-provider gateway."""
from __future__ import annotations
# TODO: Implement LiteLLM provider

View File

@@ -0,0 +1,9 @@
"""Local AI provider — bundled llama-server (OpenAI-compatible API)."""
from __future__ import annotations
# TODO: Implement local provider
# - Connect to llama-server on localhost:{port}
# - Use openai SDK with custom base_url
# - Support chat and streaming

View File

@@ -0,0 +1,5 @@
"""OpenAI provider — direct OpenAI SDK integration."""
from __future__ import annotations
# TODO: Implement OpenAI provider

View File

@@ -0,0 +1 @@
"""Service layer — transcription, diarization, AI, and export."""

View File

@@ -0,0 +1,13 @@
"""AI provider service — routes requests to configured provider."""
from __future__ import annotations
class AIProviderService:
"""Manages AI provider selection and routes chat/summarize requests."""
# TODO: Implement provider routing
# - Select provider based on config (local, openai, anthropic, litellm)
# - Forward chat messages
# - Handle streaming responses
pass

View File

@@ -0,0 +1,13 @@
"""Diarization service — pyannote.audio speaker identification."""
from __future__ import annotations
class DiarizeService:
"""Handles speaker diarization via pyannote.audio."""
# TODO: Implement pyannote.audio integration
# - Load community-1 model
# - Run diarization on audio
# - Return speaker segments with timestamps
pass

View File

@@ -0,0 +1,14 @@
"""Export service — caption and text document generation."""
from __future__ import annotations
class ExportService:
"""Handles export to SRT, WebVTT, ASS, plain text, and Markdown."""
# TODO: Implement pysubs2 integration
# - SRT with [Speaker]: prefix
# - WebVTT with <v Speaker> voice tags
# - ASS with named styles per speaker
# - Plain text and Markdown with speaker labels
pass

View File

@@ -0,0 +1,14 @@
"""Combined transcription + diarization pipeline."""
from __future__ import annotations
class PipelineService:
"""Runs the full WhisperX-style pipeline: transcribe -> align -> diarize -> merge."""
# TODO: Implement combined pipeline
# 1. faster-whisper transcription
# 2. wav2vec2 word-level alignment
# 3. pyannote diarization
# 4. Merge words with speaker segments
pass

View File

@@ -0,0 +1,13 @@
"""Transcription service — faster-whisper + wav2vec2 pipeline."""
from __future__ import annotations
class TranscribeService:
"""Handles audio transcription via faster-whisper."""
# TODO: Implement faster-whisper integration
# - Load model based on hardware detection
# - Transcribe audio with word-level timestamps
# - Report progress via IPC
pass

View File

@@ -0,0 +1 @@
"""Utility modules."""