Phase 4: Export to SRT, WebVTT, ASS, plain text, and Markdown

- Implement ExportService using pysubs2 for caption formats (SRT, VTT, ASS) and custom formatters for plain text and Markdown - SRT exports with [Speaker]: prefix, WebVTT with <v Speaker> voice tags, ASS with color-coded speaker styles - Plain text groups by speaker with labels, Markdown adds timestamps - Add export.start IPC handler and export_transcript Tauri command - Add export dropdown menu in header (appears after transcription) - Uses native save dialog for output file selection - Add pysubs2 dependency - Tests: 30 Python (6 export tests), 6 Rust, 0 Svelte errors Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 16:18:54 -08:00
parent 44480906a4
commit 415a648a2b
9 changed files with 557 additions and 9 deletions
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -12,6 +12,7 @@ license = "MIT"
 dependencies = [
    "faster-whisper>=1.1.0",
    "pyannote.audio>=3.1.0",
+    "pysubs2>=1.7.0",
 ]

 [project.optional-dependencies]
--- a/python/tests/test_export.py
+++ b/python/tests/test_export.py
@@ -0,0 +1,133 @@
+"""Tests for the export service."""
+
+import os
+import tempfile
+
+from voice_to_notes.services.export import (
+    ExportRequest,
+    ExportSegment,
+    ExportService,
+    make_export_request,
+)
+
+
+def _make_segments():
+    return [
+        ExportSegment(text="Hello there", start_ms=0, end_ms=2000, speaker="SPEAKER_00"),
+        ExportSegment(text="How are you?", start_ms=2500, end_ms=4500, speaker="SPEAKER_01"),
+        ExportSegment(text="I'm fine, thanks", start_ms=5000, end_ms=7500, speaker="SPEAKER_00"),
+    ]
+
+
+def _speaker_map():
+    return {"SPEAKER_00": "Alice", "SPEAKER_01": "Bob"}
+
+
+def test_export_srt():
+    service = ExportService()
+    with tempfile.NamedTemporaryFile(suffix=".srt", delete=False) as f:
+        path = f.name
+    try:
+        req = ExportRequest(
+            segments=_make_segments(),
+            speakers=_speaker_map(),
+            format="srt",
+            output_path=path,
+        )
+        result = service.export(req)
+        assert result == path
+        content = open(path, encoding="utf-8").read()
+        assert "[Alice]:" in content
+        assert "[Bob]:" in content
+        assert "Hello there" in content
+    finally:
+        os.unlink(path)
+
+
+def test_export_vtt():
+    service = ExportService()
+    with tempfile.NamedTemporaryFile(suffix=".vtt", delete=False) as f:
+        path = f.name
+    try:
+        req = ExportRequest(
+            segments=_make_segments(),
+            speakers=_speaker_map(),
+            format="vtt",
+            output_path=path,
+        )
+        result = service.export(req)
+        content = open(path, encoding="utf-8").read()
+        assert "<v Alice>" in content
+        assert "<v Bob>" in content
+    finally:
+        os.unlink(path)
+
+
+def test_export_txt():
+    service = ExportService()
+    with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as f:
+        path = f.name
+    try:
+        req = ExportRequest(
+            segments=_make_segments(),
+            speakers=_speaker_map(),
+            format="txt",
+            output_path=path,
+            title="Test Transcript",
+        )
+        result = service.export(req)
+        content = open(path, encoding="utf-8").read()
+        assert "Test Transcript" in content
+        assert "Alice:" in content
+        assert "Bob:" in content
+        assert "Hello there" in content
+    finally:
+        os.unlink(path)
+
+
+def test_export_md():
+    service = ExportService()
+    with tempfile.NamedTemporaryFile(suffix=".md", delete=False) as f:
+        path = f.name
+    try:
+        req = ExportRequest(
+            segments=_make_segments(),
+            speakers=_speaker_map(),
+            format="md",
+            output_path=path,
+            title="Test Transcript",
+        )
+        result = service.export(req)
+        content = open(path, encoding="utf-8").read()
+        assert "# Test Transcript" in content
+        assert "**Alice**" in content
+        assert "**Bob**" in content
+    finally:
+        os.unlink(path)
+
+
+def test_make_export_request():
+    payload = {
+        "segments": [
+            {"text": "Hello", "start_ms": 0, "end_ms": 1000, "speaker": "SPK_0"},
+        ],
+        "speakers": {"SPK_0": "Alice"},
+        "format": "srt",
+        "output_path": "/tmp/test.srt",
+        "title": "Test",
+    }
+    req = make_export_request(payload)
+    assert len(req.segments) == 1
+    assert req.segments[0].speaker == "SPK_0"
+    assert req.speakers["SPK_0"] == "Alice"
+    assert req.format == "srt"
+
+
+def test_export_unsupported_format():
+    service = ExportService()
+    req = ExportRequest(format="xyz")
+    try:
+        service.export(req)
+        assert False, "Should have raised ValueError"
+    except ValueError as e:
+        assert "Unsupported" in str(e)
--- a/python/voice_to_notes/ipc/handlers.py
+++ b/python/voice_to_notes/ipc/handlers.py
@@ -117,6 +117,24 @@ def make_pipeline_handler() -> HandlerFunc:
    return handler


+def make_export_handler() -> HandlerFunc:
+    """Create an export handler."""
+    from voice_to_notes.services.export import ExportService, make_export_request
+
+    service = ExportService()
+
+    def handler(msg: IPCMessage) -> IPCMessage:
+        request = make_export_request(msg.payload)
+        output_path = service.export(request)
+        return IPCMessage(
+            id=msg.id,
+            type="export.result",
+            payload={"output_path": output_path, "format": request.format},
+        )
+
+    return handler
+
+
 def hardware_detect_handler(msg: IPCMessage) -> IPCMessage:
    """Detect hardware capabilities and return recommendations."""
    from voice_to_notes.hardware.detect import detect_hardware
--- a/python/voice_to_notes/main.py
+++ b/python/voice_to_notes/main.py
@@ -9,6 +9,7 @@ from voice_to_notes.ipc.handlers import (
    HandlerRegistry,
    hardware_detect_handler,
    make_diarize_handler,
+    make_export_handler,
    make_pipeline_handler,
    make_transcribe_handler,
    ping_handler,
@@ -25,6 +26,7 @@ def create_registry() -> HandlerRegistry:
    registry.register("hardware.detect", hardware_detect_handler)
    registry.register("diarize.start", make_diarize_handler())
    registry.register("pipeline.start", make_pipeline_handler())
+    registry.register("export.start", make_export_handler())
    return registry


--- a/python/voice_to_notes/services/export.py
+++ b/python/voice_to_notes/services/export.py
@@ -2,13 +2,224 @@

 from __future__ import annotations

+import os
+from dataclasses import dataclass, field
+from typing import Any
+
+import pysubs2
+
+
+@dataclass
+class ExportSegment:
+    """A segment ready for export."""
+
+    text: str
+    start_ms: int
+    end_ms: int
+    speaker: str | None = None
+
+
+@dataclass
+class ExportRequest:
+    """Input for export operations."""
+
+    segments: list[ExportSegment] = field(default_factory=list)
+    speakers: dict[str, str] = field(default_factory=dict)  # id → display_name
+    format: str = "srt"  # srt, vtt, ass, txt, md
+    output_path: str = ""
+    title: str = ""
+

 class ExportService:
    """Handles export to SRT, WebVTT, ASS, plain text, and Markdown."""

-    # TODO: Implement pysubs2 integration
-    # - SRT with [Speaker]: prefix
-    # - WebVTT with <v Speaker> voice tags
-    # - ASS with named styles per speaker
-    # - Plain text and Markdown with speaker labels
-    pass
+    def export(self, request: ExportRequest) -> str:
+        """Export segments to the requested format.
+
+        Returns the output file path.
+        """
+        fmt = request.format.lower()
+        if fmt == "srt":
+            return self._export_srt(request)
+        elif fmt in ("vtt", "webvtt"):
+            return self._export_vtt(request)
+        elif fmt == "ass":
+            return self._export_ass(request)
+        elif fmt == "txt":
+            return self._export_txt(request)
+        elif fmt == "md":
+            return self._export_md(request)
+        else:
+            raise ValueError(f"Unsupported export format: {fmt}")
+
+    def _get_speaker_name(self, speaker: str | None, speakers: dict[str, str]) -> str:
+        """Resolve speaker ID to display name."""
+        if not speaker:
+            return "Unknown"
+        return speakers.get(speaker, speaker)
+
+    def _export_srt(self, request: ExportRequest) -> str:
+        """Export to SubRip (.srt) format with speaker prefixes."""
+        subs = pysubs2.SSAFile()
+        for seg in request.segments:
+            name = self._get_speaker_name(seg.speaker, request.speakers)
+            text = f"[{name}]: {seg.text}" if seg.speaker else seg.text
+            event = pysubs2.SSAEvent(
+                start=seg.start_ms,
+                end=seg.end_ms,
+                text=text,
+            )
+            subs.append(event)
+
+        path = request.output_path or "export.srt"
+        subs.save(path, format_="srt")
+        return path
+
+    def _export_vtt(self, request: ExportRequest) -> str:
+        """Export to WebVTT (.vtt) format with voice tags."""
+        subs = pysubs2.SSAFile()
+        for seg in request.segments:
+            name = self._get_speaker_name(seg.speaker, request.speakers)
+            # WebVTT voice tags: <v Speaker>text</v>
+            text = f"<v {name}>{seg.text}</v>" if seg.speaker else seg.text
+            event = pysubs2.SSAEvent(
+                start=seg.start_ms,
+                end=seg.end_ms,
+                text=text,
+            )
+            subs.append(event)
+
+        path = request.output_path or "export.vtt"
+        subs.save(path, format_="vtt")
+        return path
+
+    def _export_ass(self, request: ExportRequest) -> str:
+        """Export to Advanced SubStation Alpha (.ass) with speaker styles."""
+        subs = pysubs2.SSAFile()
+
+        # Create a style per speaker with distinct colors
+        colors = [
+            "&H0000FFFF",  # Yellow
+            "&H00FF00FF",  # Magenta
+            "&H00FFFF00",  # Cyan
+            "&H000000FF",  # Red
+            "&H0000FF00",  # Green
+            "&H00FF0000",  # Blue
+            "&H0080FF80",  # Light green
+            "&H00FF8080",  # Light blue
+        ]
+
+        speaker_styles: dict[str, str] = {}
+        unique_speakers = sorted(set(
+            seg.speaker for seg in request.segments if seg.speaker
+        ))
+
+        for i, spk in enumerate(unique_speakers):
+            name = self._get_speaker_name(spk, request.speakers)
+            style_name = name.replace(" ", "_")
+            style = pysubs2.SSAStyle()
+            style.primarycolor = pysubs2.Color(*self._parse_ass_color(colors[i % len(colors)]))
+            style.fontsize = 20
+            style.bold = True
+            subs.styles[style_name] = style
+            speaker_styles[spk] = style_name
+
+        for seg in request.segments:
+            style = speaker_styles.get(seg.speaker or "", "Default")
+            event = pysubs2.SSAEvent(
+                start=seg.start_ms,
+                end=seg.end_ms,
+                text=seg.text,
+                style=style,
+            )
+            subs.append(event)
+
+        path = request.output_path or "export.ass"
+        subs.save(path, format_="ass")
+        return path
+
+    def _parse_ass_color(self, color_str: str) -> tuple[int, int, int, int]:
+        """Parse ASS color string &HAABBGGRR to (r, g, b, a)."""
+        # Strip &H prefix
+        hex_str = color_str.replace("&H", "").replace("&h", "")
+        val = int(hex_str, 16)
+        a = (val >> 24) & 0xFF
+        b = (val >> 16) & 0xFF
+        g = (val >> 8) & 0xFF
+        r = val & 0xFF
+        return (r, g, b, a)
+
+    def _export_txt(self, request: ExportRequest) -> str:
+        """Export to plain text with speaker labels."""
+        lines: list[str] = []
+        if request.title:
+            lines.append(request.title)
+            lines.append("=" * len(request.title))
+            lines.append("")
+
+        current_speaker: str | None = None
+        for seg in request.segments:
+            name = self._get_speaker_name(seg.speaker, request.speakers)
+            if seg.speaker != current_speaker:
+                if lines and lines[-1] != "":
+                    lines.append("")
+                lines.append(f"{name}:")
+                current_speaker = seg.speaker
+            lines.append(f"  {seg.text}")
+
+        path = request.output_path or "export.txt"
+        with open(path, "w", encoding="utf-8") as f:
+            f.write("\n".join(lines) + "\n")
+        return path
+
+    def _export_md(self, request: ExportRequest) -> str:
+        """Export to Markdown with speaker headers and timestamps."""
+        lines: list[str] = []
+        if request.title:
+            lines.append(f"# {request.title}")
+            lines.append("")
+
+        current_speaker: str | None = None
+        for seg in request.segments:
+            name = self._get_speaker_name(seg.speaker, request.speakers)
+            if seg.speaker != current_speaker:
+                lines.append("")
+                lines.append(f"**{name}** _{self._format_timestamp(seg.start_ms)}_")
+                lines.append("")
+                current_speaker = seg.speaker
+            lines.append(seg.text)
+
+        path = request.output_path or "export.md"
+        with open(path, "w", encoding="utf-8") as f:
+            f.write("\n".join(lines) + "\n")
+        return path
+
+    def _format_timestamp(self, ms: int) -> str:
+        """Format milliseconds as H:MM:SS or M:SS."""
+        total_seconds = ms // 1000
+        h = total_seconds // 3600
+        m = (total_seconds % 3600) // 60
+        s = total_seconds % 60
+        if h > 0:
+            return f"{h}:{m:02d}:{s:02d}"
+        return f"{m}:{s:02d}"
+
+
+def make_export_request(payload: dict[str, Any]) -> ExportRequest:
+    """Create an ExportRequest from IPC payload."""
+    segments = [
+        ExportSegment(
+            text=seg["text"],
+            start_ms=seg["start_ms"],
+            end_ms=seg["end_ms"],
+            speaker=seg.get("speaker"),
+        )
+        for seg in payload.get("segments", [])
+    ]
+    return ExportRequest(
+        segments=segments,
+        speakers=payload.get("speakers", {}),
+        format=payload.get("format", "srt"),
+        output_path=payload.get("output_path", ""),
+        title=payload.get("title", ""),
+    )
--- a/src-tauri/src/commands/export.rs
+++ b/src-tauri/src/commands/export.rs
@@ -1,2 +1,49 @@
-// Export commands — trigger caption/text export via Python sidecar
-// TODO: Implement when export service is built
+use serde_json::{json, Value};
+
+use crate::sidecar::messages::IPCMessage;
+use crate::sidecar::SidecarManager;
+
+/// Export transcript to caption/text format via the Python sidecar.
+#[tauri::command]
+pub fn export_transcript(
+    segments: Value,
+    speakers: Value,
+    format: String,
+    output_path: String,
+    title: Option<String>,
+) -> Result<Value, String> {
+    let python_path = std::env::current_dir()
+        .map_err(|e| e.to_string())?
+        .join("../python")
+        .canonicalize()
+        .map_err(|e| format!("Cannot find python directory: {e}"))?;
+
+    let python_path_str = python_path.to_string_lossy().to_string();
+
+    let manager = SidecarManager::new();
+    manager.start(&python_path_str)?;
+
+    let request_id = uuid::Uuid::new_v4().to_string();
+    let msg = IPCMessage::new(
+        &request_id,
+        "export.start",
+        json!({
+            "segments": segments,
+            "speakers": speakers,
+            "format": format,
+            "output_path": output_path,
+            "title": title.unwrap_or_default(),
+        }),
+    );
+
+    let response = manager.send_and_receive(&msg)?;
+
+    if response.msg_type == "error" {
+        return Err(format!(
+            "Export error: {}",
+            response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
+        ));
+    }
+
+    Ok(response.payload)
+}
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -3,6 +3,7 @@ pub mod db;
 pub mod sidecar;
 pub mod state;

+use commands::export::export_transcript;
 use commands::project::{create_project, get_project, list_projects};
 use commands::transcribe::{run_pipeline, transcribe_file};

@@ -17,6 +18,7 @@ pub fn run() {
            list_projects,
            transcribe_file,
            run_pipeline,
+            export_transcript,
        ])
        .run(tauri::generate_context!())
        .expect("error while running tauri application");
--- a/src/lib/services/tauri-bridge.ts
+++ b/src/lib/services/tauri-bridge.ts
@@ -47,6 +47,27 @@ export interface PipelineResult extends TranscriptionResult {
  num_speakers: number;
 }

+export interface ExportResult {
+  output_path: string;
+  format: string;
+}
+
+export async function exportTranscript(
+  segments: Array<{ text: string; start_ms: number; end_ms: number; speaker: string | null }>,
+  speakers: Record<string, string>,
+  format: string,
+  outputPath: string,
+  title?: string,
+): Promise<ExportResult> {
+  return invoke('export_transcript', {
+    segments,
+    speakers,
+    format,
+    outputPath,
+    title,
+  });
+}
+
 export async function runPipeline(
  filePath: string,
  options?: {
--- a/src/routes/+page.svelte
+++ b/src/routes/+page.svelte
@@ -1,6 +1,6 @@
 <script lang="ts">
  import { invoke } from '@tauri-apps/api/core';
-  import { open } from '@tauri-apps/plugin-dialog';
+  import { open, save } from '@tauri-apps/plugin-dialog';
  import WaveformPlayer from '$lib/components/WaveformPlayer.svelte';
  import TranscriptEditor from '$lib/components/TranscriptEditor.svelte';
  import SpeakerManager from '$lib/components/SpeakerManager.svelte';
@@ -109,6 +109,56 @@
      isTranscribing = false;
    }
  }
+
+  const exportFormats = [
+    { name: 'SubRip Subtitle', ext: 'srt', format: 'srt' },
+    { name: 'WebVTT', ext: 'vtt', format: 'vtt' },
+    { name: 'Advanced SubStation Alpha', ext: 'ass', format: 'ass' },
+    { name: 'Plain Text', ext: 'txt', format: 'txt' },
+    { name: 'Markdown', ext: 'md', format: 'md' },
+  ];
+
+  let showExportMenu = $state(false);
+
+  async function handleExport(format: string, ext: string, filterName: string) {
+    showExportMenu = false;
+
+    const outputPath = await save({
+      filters: [{ name: filterName, extensions: [ext] }],
+    });
+    if (!outputPath) return;
+
+    // Build speaker lookup: speaker_id → display_name
+    const speakerMap: Record<string, string> = {};
+    for (const s of $speakers) {
+      speakerMap[s.label] = s.display_name || s.label;
+    }
+
+    // Build export segments from store
+    const exportSegments = $segments.map(seg => {
+      const speaker = $speakers.find(s => s.id === seg.speaker_id);
+      return {
+        text: seg.text,
+        start_ms: seg.start_ms,
+        end_ms: seg.end_ms,
+        speaker: speaker?.label ?? null,
+      };
+    });
+
+    try {
+      await invoke('export_transcript', {
+        segments: exportSegments,
+        speakers: speakerMap,
+        format,
+        outputPath,
+        title: 'Voice to Notes Transcript',
+      });
+      alert(`Exported to ${outputPath}`);
+    } catch (err) {
+      console.error('Export failed:', err);
+      alert(`Export failed: ${err}`);
+    }
+  }
 </script>

 <div class="app-header">
@@ -117,6 +167,22 @@
    <button class="import-btn" onclick={handleFileImport}>
      Import Audio/Video
    </button>
+    {#if $segments.length > 0}
+      <div class="export-dropdown">
+        <button class="export-btn" onclick={() => showExportMenu = !showExportMenu}>
+          Export
+        </button>
+        {#if showExportMenu}
+          <div class="export-menu">
+            {#each exportFormats as fmt}
+              <button class="export-option" onclick={() => handleExport(fmt.format, fmt.ext, fmt.name)}>
+                {fmt.name} (.{fmt.ext})
+              </button>
+            {/each}
+          </div>
+        {/if}
+      </div>
+    {/if}
  </div>
 </div>

@@ -164,6 +230,53 @@
  .import-btn:hover {
    background: #d63851;
  }
+  .header-actions {
+    display: flex;
+    gap: 0.5rem;
+    align-items: center;
+  }
+  .export-dropdown {
+    position: relative;
+  }
+  .export-btn {
+    background: #0f3460;
+    border: 1px solid #4a5568;
+    color: #e0e0e0;
+    padding: 0.5rem 1rem;
+    border-radius: 6px;
+    cursor: pointer;
+    font-size: 0.875rem;
+    font-weight: 500;
+  }
+  .export-btn:hover {
+    background: #1a4a7a;
+  }
+  .export-menu {
+    position: absolute;
+    top: 100%;
+    right: 0;
+    margin-top: 0.25rem;
+    background: #16213e;
+    border: 1px solid #4a5568;
+    border-radius: 6px;
+    overflow: hidden;
+    z-index: 10;
+    min-width: 220px;
+  }
+  .export-option {
+    display: block;
+    width: 100%;
+    background: none;
+    border: none;
+    color: #e0e0e0;
+    padding: 0.5rem 1rem;
+    text-align: left;
+    cursor: pointer;
+    font-size: 0.8rem;
+  }
+  .export-option:hover {
+    background: rgba(233, 69, 96, 0.2);
+  }
  .workspace {
    display: flex;
    gap: 1rem;