From 415a648a2b4cd60dc3a01143032d0e2399cd6ab0 Mon Sep 17 00:00:00 2001 From: Josh Knapp Date: Thu, 26 Feb 2026 16:18:54 -0800 Subject: [PATCH] Phase 4: Export to SRT, WebVTT, ASS, plain text, and Markdown - Implement ExportService using pysubs2 for caption formats (SRT, VTT, ASS) and custom formatters for plain text and Markdown - SRT exports with [Speaker]: prefix, WebVTT with voice tags, ASS with color-coded speaker styles - Plain text groups by speaker with labels, Markdown adds timestamps - Add export.start IPC handler and export_transcript Tauri command - Add export dropdown menu in header (appears after transcription) - Uses native save dialog for output file selection - Add pysubs2 dependency - Tests: 30 Python (6 export tests), 6 Rust, 0 Svelte errors Co-Authored-By: Claude Opus 4.6 --- python/pyproject.toml | 1 + python/tests/test_export.py | 133 ++++++++++++++ python/voice_to_notes/ipc/handlers.py | 18 ++ python/voice_to_notes/main.py | 2 + python/voice_to_notes/services/export.py | 223 ++++++++++++++++++++++- src-tauri/src/commands/export.rs | 51 +++++- src-tauri/src/lib.rs | 2 + src/lib/services/tauri-bridge.ts | 21 +++ src/routes/+page.svelte | 115 +++++++++++- 9 files changed, 557 insertions(+), 9 deletions(-) create mode 100644 python/tests/test_export.py diff --git a/python/pyproject.toml b/python/pyproject.toml index 1a06066..62f118a 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -12,6 +12,7 @@ license = "MIT" dependencies = [ "faster-whisper>=1.1.0", "pyannote.audio>=3.1.0", + "pysubs2>=1.7.0", ] [project.optional-dependencies] diff --git a/python/tests/test_export.py b/python/tests/test_export.py new file mode 100644 index 0000000..01b5e02 --- /dev/null +++ b/python/tests/test_export.py @@ -0,0 +1,133 @@ +"""Tests for the export service.""" + +import os +import tempfile + +from voice_to_notes.services.export import ( + ExportRequest, + ExportSegment, + ExportService, + make_export_request, +) + + +def _make_segments(): + return [ + ExportSegment(text="Hello there", start_ms=0, end_ms=2000, speaker="SPEAKER_00"), + ExportSegment(text="How are you?", start_ms=2500, end_ms=4500, speaker="SPEAKER_01"), + ExportSegment(text="I'm fine, thanks", start_ms=5000, end_ms=7500, speaker="SPEAKER_00"), + ] + + +def _speaker_map(): + return {"SPEAKER_00": "Alice", "SPEAKER_01": "Bob"} + + +def test_export_srt(): + service = ExportService() + with tempfile.NamedTemporaryFile(suffix=".srt", delete=False) as f: + path = f.name + try: + req = ExportRequest( + segments=_make_segments(), + speakers=_speaker_map(), + format="srt", + output_path=path, + ) + result = service.export(req) + assert result == path + content = open(path, encoding="utf-8").read() + assert "[Alice]:" in content + assert "[Bob]:" in content + assert "Hello there" in content + finally: + os.unlink(path) + + +def test_export_vtt(): + service = ExportService() + with tempfile.NamedTemporaryFile(suffix=".vtt", delete=False) as f: + path = f.name + try: + req = ExportRequest( + segments=_make_segments(), + speakers=_speaker_map(), + format="vtt", + output_path=path, + ) + result = service.export(req) + content = open(path, encoding="utf-8").read() + assert "" in content + assert "" in content + finally: + os.unlink(path) + + +def test_export_txt(): + service = ExportService() + with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as f: + path = f.name + try: + req = ExportRequest( + segments=_make_segments(), + speakers=_speaker_map(), + format="txt", + output_path=path, + title="Test Transcript", + ) + result = service.export(req) + content = open(path, encoding="utf-8").read() + assert "Test Transcript" in content + assert "Alice:" in content + assert "Bob:" in content + assert "Hello there" in content + finally: + os.unlink(path) + + +def test_export_md(): + service = ExportService() + with tempfile.NamedTemporaryFile(suffix=".md", delete=False) as f: + path = f.name + try: + req = ExportRequest( + segments=_make_segments(), + speakers=_speaker_map(), + format="md", + output_path=path, + title="Test Transcript", + ) + result = service.export(req) + content = open(path, encoding="utf-8").read() + assert "# Test Transcript" in content + assert "**Alice**" in content + assert "**Bob**" in content + finally: + os.unlink(path) + + +def test_make_export_request(): + payload = { + "segments": [ + {"text": "Hello", "start_ms": 0, "end_ms": 1000, "speaker": "SPK_0"}, + ], + "speakers": {"SPK_0": "Alice"}, + "format": "srt", + "output_path": "/tmp/test.srt", + "title": "Test", + } + req = make_export_request(payload) + assert len(req.segments) == 1 + assert req.segments[0].speaker == "SPK_0" + assert req.speakers["SPK_0"] == "Alice" + assert req.format == "srt" + + +def test_export_unsupported_format(): + service = ExportService() + req = ExportRequest(format="xyz") + try: + service.export(req) + assert False, "Should have raised ValueError" + except ValueError as e: + assert "Unsupported" in str(e) diff --git a/python/voice_to_notes/ipc/handlers.py b/python/voice_to_notes/ipc/handlers.py index a74bb77..9863b91 100644 --- a/python/voice_to_notes/ipc/handlers.py +++ b/python/voice_to_notes/ipc/handlers.py @@ -117,6 +117,24 @@ def make_pipeline_handler() -> HandlerFunc: return handler +def make_export_handler() -> HandlerFunc: + """Create an export handler.""" + from voice_to_notes.services.export import ExportService, make_export_request + + service = ExportService() + + def handler(msg: IPCMessage) -> IPCMessage: + request = make_export_request(msg.payload) + output_path = service.export(request) + return IPCMessage( + id=msg.id, + type="export.result", + payload={"output_path": output_path, "format": request.format}, + ) + + return handler + + def hardware_detect_handler(msg: IPCMessage) -> IPCMessage: """Detect hardware capabilities and return recommendations.""" from voice_to_notes.hardware.detect import detect_hardware diff --git a/python/voice_to_notes/main.py b/python/voice_to_notes/main.py index e2d3c5d..af4e1cf 100644 --- a/python/voice_to_notes/main.py +++ b/python/voice_to_notes/main.py @@ -9,6 +9,7 @@ from voice_to_notes.ipc.handlers import ( HandlerRegistry, hardware_detect_handler, make_diarize_handler, + make_export_handler, make_pipeline_handler, make_transcribe_handler, ping_handler, @@ -25,6 +26,7 @@ def create_registry() -> HandlerRegistry: registry.register("hardware.detect", hardware_detect_handler) registry.register("diarize.start", make_diarize_handler()) registry.register("pipeline.start", make_pipeline_handler()) + registry.register("export.start", make_export_handler()) return registry diff --git a/python/voice_to_notes/services/export.py b/python/voice_to_notes/services/export.py index fd6f071..3600547 100644 --- a/python/voice_to_notes/services/export.py +++ b/python/voice_to_notes/services/export.py @@ -2,13 +2,224 @@ from __future__ import annotations +import os +from dataclasses import dataclass, field +from typing import Any + +import pysubs2 + + +@dataclass +class ExportSegment: + """A segment ready for export.""" + + text: str + start_ms: int + end_ms: int + speaker: str | None = None + + +@dataclass +class ExportRequest: + """Input for export operations.""" + + segments: list[ExportSegment] = field(default_factory=list) + speakers: dict[str, str] = field(default_factory=dict) # id → display_name + format: str = "srt" # srt, vtt, ass, txt, md + output_path: str = "" + title: str = "" + class ExportService: """Handles export to SRT, WebVTT, ASS, plain text, and Markdown.""" - # TODO: Implement pysubs2 integration - # - SRT with [Speaker]: prefix - # - WebVTT with voice tags - # - ASS with named styles per speaker - # - Plain text and Markdown with speaker labels - pass + def export(self, request: ExportRequest) -> str: + """Export segments to the requested format. + + Returns the output file path. + """ + fmt = request.format.lower() + if fmt == "srt": + return self._export_srt(request) + elif fmt in ("vtt", "webvtt"): + return self._export_vtt(request) + elif fmt == "ass": + return self._export_ass(request) + elif fmt == "txt": + return self._export_txt(request) + elif fmt == "md": + return self._export_md(request) + else: + raise ValueError(f"Unsupported export format: {fmt}") + + def _get_speaker_name(self, speaker: str | None, speakers: dict[str, str]) -> str: + """Resolve speaker ID to display name.""" + if not speaker: + return "Unknown" + return speakers.get(speaker, speaker) + + def _export_srt(self, request: ExportRequest) -> str: + """Export to SubRip (.srt) format with speaker prefixes.""" + subs = pysubs2.SSAFile() + for seg in request.segments: + name = self._get_speaker_name(seg.speaker, request.speakers) + text = f"[{name}]: {seg.text}" if seg.speaker else seg.text + event = pysubs2.SSAEvent( + start=seg.start_ms, + end=seg.end_ms, + text=text, + ) + subs.append(event) + + path = request.output_path or "export.srt" + subs.save(path, format_="srt") + return path + + def _export_vtt(self, request: ExportRequest) -> str: + """Export to WebVTT (.vtt) format with voice tags.""" + subs = pysubs2.SSAFile() + for seg in request.segments: + name = self._get_speaker_name(seg.speaker, request.speakers) + # WebVTT voice tags: text + text = f"{seg.text}" if seg.speaker else seg.text + event = pysubs2.SSAEvent( + start=seg.start_ms, + end=seg.end_ms, + text=text, + ) + subs.append(event) + + path = request.output_path or "export.vtt" + subs.save(path, format_="vtt") + return path + + def _export_ass(self, request: ExportRequest) -> str: + """Export to Advanced SubStation Alpha (.ass) with speaker styles.""" + subs = pysubs2.SSAFile() + + # Create a style per speaker with distinct colors + colors = [ + "&H0000FFFF", # Yellow + "&H00FF00FF", # Magenta + "&H00FFFF00", # Cyan + "&H000000FF", # Red + "&H0000FF00", # Green + "&H00FF0000", # Blue + "&H0080FF80", # Light green + "&H00FF8080", # Light blue + ] + + speaker_styles: dict[str, str] = {} + unique_speakers = sorted(set( + seg.speaker for seg in request.segments if seg.speaker + )) + + for i, spk in enumerate(unique_speakers): + name = self._get_speaker_name(spk, request.speakers) + style_name = name.replace(" ", "_") + style = pysubs2.SSAStyle() + style.primarycolor = pysubs2.Color(*self._parse_ass_color(colors[i % len(colors)])) + style.fontsize = 20 + style.bold = True + subs.styles[style_name] = style + speaker_styles[spk] = style_name + + for seg in request.segments: + style = speaker_styles.get(seg.speaker or "", "Default") + event = pysubs2.SSAEvent( + start=seg.start_ms, + end=seg.end_ms, + text=seg.text, + style=style, + ) + subs.append(event) + + path = request.output_path or "export.ass" + subs.save(path, format_="ass") + return path + + def _parse_ass_color(self, color_str: str) -> tuple[int, int, int, int]: + """Parse ASS color string &HAABBGGRR to (r, g, b, a).""" + # Strip &H prefix + hex_str = color_str.replace("&H", "").replace("&h", "") + val = int(hex_str, 16) + a = (val >> 24) & 0xFF + b = (val >> 16) & 0xFF + g = (val >> 8) & 0xFF + r = val & 0xFF + return (r, g, b, a) + + def _export_txt(self, request: ExportRequest) -> str: + """Export to plain text with speaker labels.""" + lines: list[str] = [] + if request.title: + lines.append(request.title) + lines.append("=" * len(request.title)) + lines.append("") + + current_speaker: str | None = None + for seg in request.segments: + name = self._get_speaker_name(seg.speaker, request.speakers) + if seg.speaker != current_speaker: + if lines and lines[-1] != "": + lines.append("") + lines.append(f"{name}:") + current_speaker = seg.speaker + lines.append(f" {seg.text}") + + path = request.output_path or "export.txt" + with open(path, "w", encoding="utf-8") as f: + f.write("\n".join(lines) + "\n") + return path + + def _export_md(self, request: ExportRequest) -> str: + """Export to Markdown with speaker headers and timestamps.""" + lines: list[str] = [] + if request.title: + lines.append(f"# {request.title}") + lines.append("") + + current_speaker: str | None = None + for seg in request.segments: + name = self._get_speaker_name(seg.speaker, request.speakers) + if seg.speaker != current_speaker: + lines.append("") + lines.append(f"**{name}** _{self._format_timestamp(seg.start_ms)}_") + lines.append("") + current_speaker = seg.speaker + lines.append(seg.text) + + path = request.output_path or "export.md" + with open(path, "w", encoding="utf-8") as f: + f.write("\n".join(lines) + "\n") + return path + + def _format_timestamp(self, ms: int) -> str: + """Format milliseconds as H:MM:SS or M:SS.""" + total_seconds = ms // 1000 + h = total_seconds // 3600 + m = (total_seconds % 3600) // 60 + s = total_seconds % 60 + if h > 0: + return f"{h}:{m:02d}:{s:02d}" + return f"{m}:{s:02d}" + + +def make_export_request(payload: dict[str, Any]) -> ExportRequest: + """Create an ExportRequest from IPC payload.""" + segments = [ + ExportSegment( + text=seg["text"], + start_ms=seg["start_ms"], + end_ms=seg["end_ms"], + speaker=seg.get("speaker"), + ) + for seg in payload.get("segments", []) + ] + return ExportRequest( + segments=segments, + speakers=payload.get("speakers", {}), + format=payload.get("format", "srt"), + output_path=payload.get("output_path", ""), + title=payload.get("title", ""), + ) diff --git a/src-tauri/src/commands/export.rs b/src-tauri/src/commands/export.rs index 7b3b24f..4b9bf35 100644 --- a/src-tauri/src/commands/export.rs +++ b/src-tauri/src/commands/export.rs @@ -1,2 +1,49 @@ -// Export commands — trigger caption/text export via Python sidecar -// TODO: Implement when export service is built +use serde_json::{json, Value}; + +use crate::sidecar::messages::IPCMessage; +use crate::sidecar::SidecarManager; + +/// Export transcript to caption/text format via the Python sidecar. +#[tauri::command] +pub fn export_transcript( + segments: Value, + speakers: Value, + format: String, + output_path: String, + title: Option, +) -> Result { + let python_path = std::env::current_dir() + .map_err(|e| e.to_string())? + .join("../python") + .canonicalize() + .map_err(|e| format!("Cannot find python directory: {e}"))?; + + let python_path_str = python_path.to_string_lossy().to_string(); + + let manager = SidecarManager::new(); + manager.start(&python_path_str)?; + + let request_id = uuid::Uuid::new_v4().to_string(); + let msg = IPCMessage::new( + &request_id, + "export.start", + json!({ + "segments": segments, + "speakers": speakers, + "format": format, + "output_path": output_path, + "title": title.unwrap_or_default(), + }), + ); + + let response = manager.send_and_receive(&msg)?; + + if response.msg_type == "error" { + return Err(format!( + "Export error: {}", + response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown") + )); + } + + Ok(response.payload) +} diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index dfd73fb..9edeff7 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -3,6 +3,7 @@ pub mod db; pub mod sidecar; pub mod state; +use commands::export::export_transcript; use commands::project::{create_project, get_project, list_projects}; use commands::transcribe::{run_pipeline, transcribe_file}; @@ -17,6 +18,7 @@ pub fn run() { list_projects, transcribe_file, run_pipeline, + export_transcript, ]) .run(tauri::generate_context!()) .expect("error while running tauri application"); diff --git a/src/lib/services/tauri-bridge.ts b/src/lib/services/tauri-bridge.ts index c13da24..1025104 100644 --- a/src/lib/services/tauri-bridge.ts +++ b/src/lib/services/tauri-bridge.ts @@ -47,6 +47,27 @@ export interface PipelineResult extends TranscriptionResult { num_speakers: number; } +export interface ExportResult { + output_path: string; + format: string; +} + +export async function exportTranscript( + segments: Array<{ text: string; start_ms: number; end_ms: number; speaker: string | null }>, + speakers: Record, + format: string, + outputPath: string, + title?: string, +): Promise { + return invoke('export_transcript', { + segments, + speakers, + format, + outputPath, + title, + }); +} + export async function runPipeline( filePath: string, options?: { diff --git a/src/routes/+page.svelte b/src/routes/+page.svelte index b62b0e9..1c534bc 100644 --- a/src/routes/+page.svelte +++ b/src/routes/+page.svelte @@ -1,6 +1,6 @@
@@ -117,6 +167,22 @@ + {#if $segments.length > 0} +
+ + {#if showExportMenu} +
+ {#each exportFormats as fmt} + + {/each} +
+ {/if} +
+ {/if}
@@ -164,6 +230,53 @@ .import-btn:hover { background: #d63851; } + .header-actions { + display: flex; + gap: 0.5rem; + align-items: center; + } + .export-dropdown { + position: relative; + } + .export-btn { + background: #0f3460; + border: 1px solid #4a5568; + color: #e0e0e0; + padding: 0.5rem 1rem; + border-radius: 6px; + cursor: pointer; + font-size: 0.875rem; + font-weight: 500; + } + .export-btn:hover { + background: #1a4a7a; + } + .export-menu { + position: absolute; + top: 100%; + right: 0; + margin-top: 0.25rem; + background: #16213e; + border: 1px solid #4a5568; + border-radius: 6px; + overflow: hidden; + z-index: 10; + min-width: 220px; + } + .export-option { + display: block; + width: 100%; + background: none; + border: none; + color: #e0e0e0; + padding: 0.5rem 1rem; + text-align: left; + cursor: pointer; + font-size: 0.8rem; + } + .export-option:hover { + background: rgba(233, 69, 96, 0.2); + } .workspace { display: flex; gap: 1rem;