Phase 1 foundation: Tauri shell, Python sidecar, SQLite database
Tauri v2 + Svelte + TypeScript frontend:
- App shell with workspace layout (waveform, transcript, speakers, AI chat)
- Placeholder components for all major UI areas
- Typed stores (project, transcript, playback, AI)
- TypeScript interfaces matching the database schema
- Tauri bridge service with typed invoke wrappers
- svelte-check passes with 0 errors
Rust backend:
- Tauri v2 app entry point with command registration
- SQLite database layer (rusqlite with bundled SQLite)
- Full schema: projects, media_files, speakers, segments, words,
ai_outputs, annotations (with indexes)
- Model structs with serde serialization
- CRUD queries for projects, speakers, segments, words
- Segment text editing preserves original text
- Schema versioning for future migrations
- 6 tests passing
- Command stubs for project, transcribe, export, AI, settings, system
- App state management
Python sidecar:
- JSON-line IPC protocol (stdin/stdout)
- Message types: IPCMessage, progress, error, ready
- Handler registry with routing and error handling
- Ping/pong handler for connectivity testing
- Service stubs: transcribe, diarize, pipeline, AI, export
- Provider stubs: local (llama-server), OpenAI, Anthropic, LiteLLM
- Hardware detection stubs
- 14 tests passing, ruff clean
Also adds:
- Testing strategy document (docs/TESTING.md)
- Validation script (scripts/validate.sh)
- Updated .gitignore for Svelte, Rust, Python artifacts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
3
python/voice_to_notes/__init__.py
Normal file
3
python/voice_to_notes/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""Voice to Notes — Python sidecar for transcription, diarization, and AI services."""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
1
python/voice_to_notes/hardware/__init__.py
Normal file
1
python/voice_to_notes/hardware/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Hardware detection and model selection."""
|
||||
9
python/voice_to_notes/hardware/detect.py
Normal file
9
python/voice_to_notes/hardware/detect.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""GPU/CPU detection and VRAM estimation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# TODO: Implement hardware detection
|
||||
# - Check torch.cuda.is_available()
|
||||
# - Detect VRAM size
|
||||
# - Detect CPU cores and available RAM
|
||||
# - Return recommended model configuration
|
||||
7
python/voice_to_notes/hardware/models.py
Normal file
7
python/voice_to_notes/hardware/models.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""Model selection logic based on available hardware."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# TODO: Implement model selection
|
||||
# - Map hardware capabilities to recommended models
|
||||
# - Support user overrides from settings
|
||||
1
python/voice_to_notes/ipc/__init__.py
Normal file
1
python/voice_to_notes/ipc/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""IPC protocol layer for JSON-line communication with the Rust backend."""
|
||||
39
python/voice_to_notes/ipc/handlers.py
Normal file
39
python/voice_to_notes/ipc/handlers.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""Message handler registry and routing."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from collections.abc import Callable
|
||||
|
||||
from voice_to_notes.ipc.messages import IPCMessage, error_message
|
||||
|
||||
# Handler function type: takes a message, returns a response message
|
||||
HandlerFunc = Callable[[IPCMessage], IPCMessage | None]
|
||||
|
||||
|
||||
class HandlerRegistry:
|
||||
"""Registry mapping message types to handler functions."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._handlers: dict[str, HandlerFunc] = {}
|
||||
|
||||
def register(self, message_type: str, handler: HandlerFunc) -> None:
|
||||
"""Register a handler for a message type."""
|
||||
self._handlers[message_type] = handler
|
||||
|
||||
def handle(self, msg: IPCMessage) -> IPCMessage | None:
|
||||
"""Route a message to its handler. Returns a response or error."""
|
||||
handler = self._handlers.get(msg.type)
|
||||
if handler is None:
|
||||
print(f"[sidecar] Unknown message type: {msg.type}", file=sys.stderr, flush=True)
|
||||
return error_message(msg.id, "unknown_type", f"Unknown message type: {msg.type}")
|
||||
try:
|
||||
return handler(msg)
|
||||
except Exception as e:
|
||||
print(f"[sidecar] Handler error for {msg.type}: {e}", file=sys.stderr, flush=True)
|
||||
return error_message(msg.id, "handler_error", str(e))
|
||||
|
||||
|
||||
def ping_handler(msg: IPCMessage) -> IPCMessage:
|
||||
"""Simple ping handler for testing connectivity."""
|
||||
return IPCMessage(id=msg.id, type="pong", payload={"echo": msg.payload})
|
||||
46
python/voice_to_notes/ipc/messages.py
Normal file
46
python/voice_to_notes/ipc/messages.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""IPC message type definitions."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class IPCMessage:
|
||||
"""A message exchanged between Rust and Python via JSON-line protocol."""
|
||||
|
||||
id: str
|
||||
type: str
|
||||
payload: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {"id": self.id, "type": self.type, "payload": self.payload}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> IPCMessage:
|
||||
return cls(
|
||||
id=data.get("id", ""),
|
||||
type=data.get("type", ""),
|
||||
payload=data.get("payload", {}),
|
||||
)
|
||||
|
||||
|
||||
def progress_message(request_id: str, percent: int, stage: str, message: str) -> IPCMessage:
|
||||
return IPCMessage(
|
||||
id=request_id,
|
||||
type="progress",
|
||||
payload={"percent": percent, "stage": stage, "message": message},
|
||||
)
|
||||
|
||||
|
||||
def error_message(request_id: str, code: str, message: str) -> IPCMessage:
|
||||
return IPCMessage(
|
||||
id=request_id,
|
||||
type="error",
|
||||
payload={"code": code, "message": message},
|
||||
)
|
||||
|
||||
|
||||
def ready_message() -> IPCMessage:
|
||||
return IPCMessage(id="system", type="ready", payload={"version": "0.1.0"})
|
||||
47
python/voice_to_notes/ipc/protocol.py
Normal file
47
python/voice_to_notes/ipc/protocol.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""JSON-line protocol reader/writer over stdin/stdout."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
from voice_to_notes.ipc.messages import IPCMessage
|
||||
|
||||
|
||||
def read_message() -> IPCMessage | None:
|
||||
"""Read a single JSON-line message from stdin. Returns None on EOF."""
|
||||
try:
|
||||
line = sys.stdin.readline()
|
||||
if not line:
|
||||
return None # EOF
|
||||
line = line.strip()
|
||||
if not line:
|
||||
return None
|
||||
data = json.loads(line)
|
||||
return IPCMessage.from_dict(data)
|
||||
except json.JSONDecodeError as e:
|
||||
_log(f"Invalid JSON: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
_log(f"Read error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def write_message(msg: IPCMessage) -> None:
|
||||
"""Write a JSON-line message to stdout."""
|
||||
line = json.dumps(msg.to_dict(), separators=(",", ":"))
|
||||
sys.stdout.write(line + "\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def write_dict(data: dict[str, Any]) -> None:
|
||||
"""Write a raw dict as a JSON-line message to stdout."""
|
||||
line = json.dumps(data, separators=(",", ":"))
|
||||
sys.stdout.write(line + "\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def _log(message: str) -> None:
|
||||
"""Log to stderr (stdout is reserved for IPC)."""
|
||||
print(f"[sidecar] {message}", file=sys.stderr, flush=True)
|
||||
52
python/voice_to_notes/main.py
Normal file
52
python/voice_to_notes/main.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""Main entry point for the Voice to Notes Python sidecar."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import signal
|
||||
import sys
|
||||
|
||||
from voice_to_notes.ipc.handlers import HandlerRegistry, ping_handler
|
||||
from voice_to_notes.ipc.messages import ready_message
|
||||
from voice_to_notes.ipc.protocol import read_message, write_message
|
||||
|
||||
|
||||
def create_registry() -> HandlerRegistry:
|
||||
"""Set up the message handler registry."""
|
||||
registry = HandlerRegistry()
|
||||
registry.register("ping", ping_handler)
|
||||
# TODO: Register transcribe, diarize, pipeline, ai, export handlers
|
||||
return registry
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Main loop: read messages from stdin, dispatch to handlers, write responses to stdout."""
|
||||
|
||||
# Handle clean shutdown
|
||||
def shutdown(signum: int, frame: object) -> None:
|
||||
print("[sidecar] Shutting down...", file=sys.stderr, flush=True)
|
||||
sys.exit(0)
|
||||
|
||||
signal.signal(signal.SIGTERM, shutdown)
|
||||
signal.signal(signal.SIGINT, shutdown)
|
||||
|
||||
registry = create_registry()
|
||||
|
||||
# Signal to Rust that we're ready
|
||||
write_message(ready_message())
|
||||
print("[sidecar] Ready and waiting for messages", file=sys.stderr, flush=True)
|
||||
|
||||
# Message loop
|
||||
while True:
|
||||
msg = read_message()
|
||||
if msg is None:
|
||||
# EOF — parent closed stdin, time to exit
|
||||
print("[sidecar] EOF on stdin, exiting", file=sys.stderr, flush=True)
|
||||
break
|
||||
|
||||
response = registry.handle(msg)
|
||||
if response is not None:
|
||||
write_message(response)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
1
python/voice_to_notes/providers/__init__.py
Normal file
1
python/voice_to_notes/providers/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""AI provider adapters — local (llama-server), LiteLLM, OpenAI, Anthropic."""
|
||||
5
python/voice_to_notes/providers/anthropic_provider.py
Normal file
5
python/voice_to_notes/providers/anthropic_provider.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Anthropic provider — direct Anthropic SDK integration."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# TODO: Implement Anthropic provider
|
||||
23
python/voice_to_notes/providers/base.py
Normal file
23
python/voice_to_notes/providers/base.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""Abstract base class for AI providers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import AsyncIterator
|
||||
from typing import Any
|
||||
|
||||
|
||||
class AIProvider(ABC):
|
||||
"""Base interface for all AI providers."""
|
||||
|
||||
@abstractmethod
|
||||
async def chat(self, messages: list[dict[str, Any]], config: dict[str, Any]) -> str:
|
||||
"""Send a chat completion request and return the response."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def stream(
|
||||
self, messages: list[dict[str, Any]], config: dict[str, Any]
|
||||
) -> AsyncIterator[str]:
|
||||
"""Send a streaming chat request, yielding tokens as they arrive."""
|
||||
...
|
||||
5
python/voice_to_notes/providers/litellm_provider.py
Normal file
5
python/voice_to_notes/providers/litellm_provider.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""LiteLLM provider — multi-provider gateway."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# TODO: Implement LiteLLM provider
|
||||
9
python/voice_to_notes/providers/local_provider.py
Normal file
9
python/voice_to_notes/providers/local_provider.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""Local AI provider — bundled llama-server (OpenAI-compatible API)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
# TODO: Implement local provider
|
||||
# - Connect to llama-server on localhost:{port}
|
||||
# - Use openai SDK with custom base_url
|
||||
# - Support chat and streaming
|
||||
5
python/voice_to_notes/providers/openai_provider.py
Normal file
5
python/voice_to_notes/providers/openai_provider.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""OpenAI provider — direct OpenAI SDK integration."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# TODO: Implement OpenAI provider
|
||||
1
python/voice_to_notes/services/__init__.py
Normal file
1
python/voice_to_notes/services/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Service layer — transcription, diarization, AI, and export."""
|
||||
13
python/voice_to_notes/services/ai_provider.py
Normal file
13
python/voice_to_notes/services/ai_provider.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""AI provider service — routes requests to configured provider."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class AIProviderService:
|
||||
"""Manages AI provider selection and routes chat/summarize requests."""
|
||||
|
||||
# TODO: Implement provider routing
|
||||
# - Select provider based on config (local, openai, anthropic, litellm)
|
||||
# - Forward chat messages
|
||||
# - Handle streaming responses
|
||||
pass
|
||||
13
python/voice_to_notes/services/diarize.py
Normal file
13
python/voice_to_notes/services/diarize.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""Diarization service — pyannote.audio speaker identification."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class DiarizeService:
|
||||
"""Handles speaker diarization via pyannote.audio."""
|
||||
|
||||
# TODO: Implement pyannote.audio integration
|
||||
# - Load community-1 model
|
||||
# - Run diarization on audio
|
||||
# - Return speaker segments with timestamps
|
||||
pass
|
||||
14
python/voice_to_notes/services/export.py
Normal file
14
python/voice_to_notes/services/export.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""Export service — caption and text document generation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class ExportService:
|
||||
"""Handles export to SRT, WebVTT, ASS, plain text, and Markdown."""
|
||||
|
||||
# TODO: Implement pysubs2 integration
|
||||
# - SRT with [Speaker]: prefix
|
||||
# - WebVTT with <v Speaker> voice tags
|
||||
# - ASS with named styles per speaker
|
||||
# - Plain text and Markdown with speaker labels
|
||||
pass
|
||||
14
python/voice_to_notes/services/pipeline.py
Normal file
14
python/voice_to_notes/services/pipeline.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""Combined transcription + diarization pipeline."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class PipelineService:
|
||||
"""Runs the full WhisperX-style pipeline: transcribe -> align -> diarize -> merge."""
|
||||
|
||||
# TODO: Implement combined pipeline
|
||||
# 1. faster-whisper transcription
|
||||
# 2. wav2vec2 word-level alignment
|
||||
# 3. pyannote diarization
|
||||
# 4. Merge words with speaker segments
|
||||
pass
|
||||
13
python/voice_to_notes/services/transcribe.py
Normal file
13
python/voice_to_notes/services/transcribe.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""Transcription service — faster-whisper + wav2vec2 pipeline."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class TranscribeService:
|
||||
"""Handles audio transcription via faster-whisper."""
|
||||
|
||||
# TODO: Implement faster-whisper integration
|
||||
# - Load model based on hardware detection
|
||||
# - Transcribe audio with word-level timestamps
|
||||
# - Report progress via IPC
|
||||
pass
|
||||
1
python/voice_to_notes/utils/__init__.py
Normal file
1
python/voice_to_notes/utils/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Utility modules."""
|
||||
Reference in New Issue
Block a user