Phase 4: Export to SRT, WebVTT, ASS, plain text, and Markdown

- Implement ExportService using pysubs2 for caption formats (SRT, VTT, ASS)
  and custom formatters for plain text and Markdown
- SRT exports with [Speaker]: prefix, WebVTT with <v Speaker> voice tags,
  ASS with color-coded speaker styles
- Plain text groups by speaker with labels, Markdown adds timestamps
- Add export.start IPC handler and export_transcript Tauri command
- Add export dropdown menu in header (appears after transcription)
- Uses native save dialog for output file selection
- Add pysubs2 dependency
- Tests: 30 Python (6 export tests), 6 Rust, 0 Svelte errors

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-26 16:18:54 -08:00
parent 44480906a4
commit 415a648a2b
9 changed files with 557 additions and 9 deletions

View File

@@ -12,6 +12,7 @@ license = "MIT"
dependencies = [
"faster-whisper>=1.1.0",
"pyannote.audio>=3.1.0",
"pysubs2>=1.7.0",
]
[project.optional-dependencies]

133
python/tests/test_export.py Normal file
View File

@@ -0,0 +1,133 @@
"""Tests for the export service."""
import os
import tempfile
from voice_to_notes.services.export import (
ExportRequest,
ExportSegment,
ExportService,
make_export_request,
)
def _make_segments():
return [
ExportSegment(text="Hello there", start_ms=0, end_ms=2000, speaker="SPEAKER_00"),
ExportSegment(text="How are you?", start_ms=2500, end_ms=4500, speaker="SPEAKER_01"),
ExportSegment(text="I'm fine, thanks", start_ms=5000, end_ms=7500, speaker="SPEAKER_00"),
]
def _speaker_map():
return {"SPEAKER_00": "Alice", "SPEAKER_01": "Bob"}
def test_export_srt():
service = ExportService()
with tempfile.NamedTemporaryFile(suffix=".srt", delete=False) as f:
path = f.name
try:
req = ExportRequest(
segments=_make_segments(),
speakers=_speaker_map(),
format="srt",
output_path=path,
)
result = service.export(req)
assert result == path
content = open(path, encoding="utf-8").read()
assert "[Alice]:" in content
assert "[Bob]:" in content
assert "Hello there" in content
finally:
os.unlink(path)
def test_export_vtt():
service = ExportService()
with tempfile.NamedTemporaryFile(suffix=".vtt", delete=False) as f:
path = f.name
try:
req = ExportRequest(
segments=_make_segments(),
speakers=_speaker_map(),
format="vtt",
output_path=path,
)
result = service.export(req)
content = open(path, encoding="utf-8").read()
assert "<v Alice>" in content
assert "<v Bob>" in content
finally:
os.unlink(path)
def test_export_txt():
service = ExportService()
with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as f:
path = f.name
try:
req = ExportRequest(
segments=_make_segments(),
speakers=_speaker_map(),
format="txt",
output_path=path,
title="Test Transcript",
)
result = service.export(req)
content = open(path, encoding="utf-8").read()
assert "Test Transcript" in content
assert "Alice:" in content
assert "Bob:" in content
assert "Hello there" in content
finally:
os.unlink(path)
def test_export_md():
service = ExportService()
with tempfile.NamedTemporaryFile(suffix=".md", delete=False) as f:
path = f.name
try:
req = ExportRequest(
segments=_make_segments(),
speakers=_speaker_map(),
format="md",
output_path=path,
title="Test Transcript",
)
result = service.export(req)
content = open(path, encoding="utf-8").read()
assert "# Test Transcript" in content
assert "**Alice**" in content
assert "**Bob**" in content
finally:
os.unlink(path)
def test_make_export_request():
payload = {
"segments": [
{"text": "Hello", "start_ms": 0, "end_ms": 1000, "speaker": "SPK_0"},
],
"speakers": {"SPK_0": "Alice"},
"format": "srt",
"output_path": "/tmp/test.srt",
"title": "Test",
}
req = make_export_request(payload)
assert len(req.segments) == 1
assert req.segments[0].speaker == "SPK_0"
assert req.speakers["SPK_0"] == "Alice"
assert req.format == "srt"
def test_export_unsupported_format():
service = ExportService()
req = ExportRequest(format="xyz")
try:
service.export(req)
assert False, "Should have raised ValueError"
except ValueError as e:
assert "Unsupported" in str(e)

View File

@@ -117,6 +117,24 @@ def make_pipeline_handler() -> HandlerFunc:
return handler
def make_export_handler() -> HandlerFunc:
"""Create an export handler."""
from voice_to_notes.services.export import ExportService, make_export_request
service = ExportService()
def handler(msg: IPCMessage) -> IPCMessage:
request = make_export_request(msg.payload)
output_path = service.export(request)
return IPCMessage(
id=msg.id,
type="export.result",
payload={"output_path": output_path, "format": request.format},
)
return handler
def hardware_detect_handler(msg: IPCMessage) -> IPCMessage:
"""Detect hardware capabilities and return recommendations."""
from voice_to_notes.hardware.detect import detect_hardware

View File

@@ -9,6 +9,7 @@ from voice_to_notes.ipc.handlers import (
HandlerRegistry,
hardware_detect_handler,
make_diarize_handler,
make_export_handler,
make_pipeline_handler,
make_transcribe_handler,
ping_handler,
@@ -25,6 +26,7 @@ def create_registry() -> HandlerRegistry:
registry.register("hardware.detect", hardware_detect_handler)
registry.register("diarize.start", make_diarize_handler())
registry.register("pipeline.start", make_pipeline_handler())
registry.register("export.start", make_export_handler())
return registry

View File

@@ -2,13 +2,224 @@
from __future__ import annotations
import os
from dataclasses import dataclass, field
from typing import Any
import pysubs2
@dataclass
class ExportSegment:
"""A segment ready for export."""
text: str
start_ms: int
end_ms: int
speaker: str | None = None
@dataclass
class ExportRequest:
"""Input for export operations."""
segments: list[ExportSegment] = field(default_factory=list)
speakers: dict[str, str] = field(default_factory=dict) # id → display_name
format: str = "srt" # srt, vtt, ass, txt, md
output_path: str = ""
title: str = ""
class ExportService:
"""Handles export to SRT, WebVTT, ASS, plain text, and Markdown."""
# TODO: Implement pysubs2 integration
# - SRT with [Speaker]: prefix
# - WebVTT with <v Speaker> voice tags
# - ASS with named styles per speaker
# - Plain text and Markdown with speaker labels
pass
def export(self, request: ExportRequest) -> str:
"""Export segments to the requested format.
Returns the output file path.
"""
fmt = request.format.lower()
if fmt == "srt":
return self._export_srt(request)
elif fmt in ("vtt", "webvtt"):
return self._export_vtt(request)
elif fmt == "ass":
return self._export_ass(request)
elif fmt == "txt":
return self._export_txt(request)
elif fmt == "md":
return self._export_md(request)
else:
raise ValueError(f"Unsupported export format: {fmt}")
def _get_speaker_name(self, speaker: str | None, speakers: dict[str, str]) -> str:
"""Resolve speaker ID to display name."""
if not speaker:
return "Unknown"
return speakers.get(speaker, speaker)
def _export_srt(self, request: ExportRequest) -> str:
"""Export to SubRip (.srt) format with speaker prefixes."""
subs = pysubs2.SSAFile()
for seg in request.segments:
name = self._get_speaker_name(seg.speaker, request.speakers)
text = f"[{name}]: {seg.text}" if seg.speaker else seg.text
event = pysubs2.SSAEvent(
start=seg.start_ms,
end=seg.end_ms,
text=text,
)
subs.append(event)
path = request.output_path or "export.srt"
subs.save(path, format_="srt")
return path
def _export_vtt(self, request: ExportRequest) -> str:
"""Export to WebVTT (.vtt) format with voice tags."""
subs = pysubs2.SSAFile()
for seg in request.segments:
name = self._get_speaker_name(seg.speaker, request.speakers)
# WebVTT voice tags: <v Speaker>text</v>
text = f"<v {name}>{seg.text}</v>" if seg.speaker else seg.text
event = pysubs2.SSAEvent(
start=seg.start_ms,
end=seg.end_ms,
text=text,
)
subs.append(event)
path = request.output_path or "export.vtt"
subs.save(path, format_="vtt")
return path
def _export_ass(self, request: ExportRequest) -> str:
"""Export to Advanced SubStation Alpha (.ass) with speaker styles."""
subs = pysubs2.SSAFile()
# Create a style per speaker with distinct colors
colors = [
"&H0000FFFF", # Yellow
"&H00FF00FF", # Magenta
"&H00FFFF00", # Cyan
"&H000000FF", # Red
"&H0000FF00", # Green
"&H00FF0000", # Blue
"&H0080FF80", # Light green
"&H00FF8080", # Light blue
]
speaker_styles: dict[str, str] = {}
unique_speakers = sorted(set(
seg.speaker for seg in request.segments if seg.speaker
))
for i, spk in enumerate(unique_speakers):
name = self._get_speaker_name(spk, request.speakers)
style_name = name.replace(" ", "_")
style = pysubs2.SSAStyle()
style.primarycolor = pysubs2.Color(*self._parse_ass_color(colors[i % len(colors)]))
style.fontsize = 20
style.bold = True
subs.styles[style_name] = style
speaker_styles[spk] = style_name
for seg in request.segments:
style = speaker_styles.get(seg.speaker or "", "Default")
event = pysubs2.SSAEvent(
start=seg.start_ms,
end=seg.end_ms,
text=seg.text,
style=style,
)
subs.append(event)
path = request.output_path or "export.ass"
subs.save(path, format_="ass")
return path
def _parse_ass_color(self, color_str: str) -> tuple[int, int, int, int]:
"""Parse ASS color string &HAABBGGRR to (r, g, b, a)."""
# Strip &H prefix
hex_str = color_str.replace("&H", "").replace("&h", "")
val = int(hex_str, 16)
a = (val >> 24) & 0xFF
b = (val >> 16) & 0xFF
g = (val >> 8) & 0xFF
r = val & 0xFF
return (r, g, b, a)
def _export_txt(self, request: ExportRequest) -> str:
"""Export to plain text with speaker labels."""
lines: list[str] = []
if request.title:
lines.append(request.title)
lines.append("=" * len(request.title))
lines.append("")
current_speaker: str | None = None
for seg in request.segments:
name = self._get_speaker_name(seg.speaker, request.speakers)
if seg.speaker != current_speaker:
if lines and lines[-1] != "":
lines.append("")
lines.append(f"{name}:")
current_speaker = seg.speaker
lines.append(f" {seg.text}")
path = request.output_path or "export.txt"
with open(path, "w", encoding="utf-8") as f:
f.write("\n".join(lines) + "\n")
return path
def _export_md(self, request: ExportRequest) -> str:
"""Export to Markdown with speaker headers and timestamps."""
lines: list[str] = []
if request.title:
lines.append(f"# {request.title}")
lines.append("")
current_speaker: str | None = None
for seg in request.segments:
name = self._get_speaker_name(seg.speaker, request.speakers)
if seg.speaker != current_speaker:
lines.append("")
lines.append(f"**{name}** _{self._format_timestamp(seg.start_ms)}_")
lines.append("")
current_speaker = seg.speaker
lines.append(seg.text)
path = request.output_path or "export.md"
with open(path, "w", encoding="utf-8") as f:
f.write("\n".join(lines) + "\n")
return path
def _format_timestamp(self, ms: int) -> str:
"""Format milliseconds as H:MM:SS or M:SS."""
total_seconds = ms // 1000
h = total_seconds // 3600
m = (total_seconds % 3600) // 60
s = total_seconds % 60
if h > 0:
return f"{h}:{m:02d}:{s:02d}"
return f"{m}:{s:02d}"
def make_export_request(payload: dict[str, Any]) -> ExportRequest:
"""Create an ExportRequest from IPC payload."""
segments = [
ExportSegment(
text=seg["text"],
start_ms=seg["start_ms"],
end_ms=seg["end_ms"],
speaker=seg.get("speaker"),
)
for seg in payload.get("segments", [])
]
return ExportRequest(
segments=segments,
speakers=payload.get("speakers", {}),
format=payload.get("format", "srt"),
output_path=payload.get("output_path", ""),
title=payload.get("title", ""),
)

View File

@@ -1,2 +1,49 @@
// Export commands — trigger caption/text export via Python sidecar
// TODO: Implement when export service is built
use serde_json::{json, Value};
use crate::sidecar::messages::IPCMessage;
use crate::sidecar::SidecarManager;
/// Export transcript to caption/text format via the Python sidecar.
#[tauri::command]
pub fn export_transcript(
segments: Value,
speakers: Value,
format: String,
output_path: String,
title: Option<String>,
) -> Result<Value, String> {
let python_path = std::env::current_dir()
.map_err(|e| e.to_string())?
.join("../python")
.canonicalize()
.map_err(|e| format!("Cannot find python directory: {e}"))?;
let python_path_str = python_path.to_string_lossy().to_string();
let manager = SidecarManager::new();
manager.start(&python_path_str)?;
let request_id = uuid::Uuid::new_v4().to_string();
let msg = IPCMessage::new(
&request_id,
"export.start",
json!({
"segments": segments,
"speakers": speakers,
"format": format,
"output_path": output_path,
"title": title.unwrap_or_default(),
}),
);
let response = manager.send_and_receive(&msg)?;
if response.msg_type == "error" {
return Err(format!(
"Export error: {}",
response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
));
}
Ok(response.payload)
}

View File

@@ -3,6 +3,7 @@ pub mod db;
pub mod sidecar;
pub mod state;
use commands::export::export_transcript;
use commands::project::{create_project, get_project, list_projects};
use commands::transcribe::{run_pipeline, transcribe_file};
@@ -17,6 +18,7 @@ pub fn run() {
list_projects,
transcribe_file,
run_pipeline,
export_transcript,
])
.run(tauri::generate_context!())
.expect("error while running tauri application");

View File

@@ -47,6 +47,27 @@ export interface PipelineResult extends TranscriptionResult {
num_speakers: number;
}
export interface ExportResult {
output_path: string;
format: string;
}
export async function exportTranscript(
segments: Array<{ text: string; start_ms: number; end_ms: number; speaker: string | null }>,
speakers: Record<string, string>,
format: string,
outputPath: string,
title?: string,
): Promise<ExportResult> {
return invoke('export_transcript', {
segments,
speakers,
format,
outputPath,
title,
});
}
export async function runPipeline(
filePath: string,
options?: {

View File

@@ -1,6 +1,6 @@
<script lang="ts">
import { invoke } from '@tauri-apps/api/core';
import { open } from '@tauri-apps/plugin-dialog';
import { open, save } from '@tauri-apps/plugin-dialog';
import WaveformPlayer from '$lib/components/WaveformPlayer.svelte';
import TranscriptEditor from '$lib/components/TranscriptEditor.svelte';
import SpeakerManager from '$lib/components/SpeakerManager.svelte';
@@ -109,6 +109,56 @@
isTranscribing = false;
}
}
const exportFormats = [
{ name: 'SubRip Subtitle', ext: 'srt', format: 'srt' },
{ name: 'WebVTT', ext: 'vtt', format: 'vtt' },
{ name: 'Advanced SubStation Alpha', ext: 'ass', format: 'ass' },
{ name: 'Plain Text', ext: 'txt', format: 'txt' },
{ name: 'Markdown', ext: 'md', format: 'md' },
];
let showExportMenu = $state(false);
async function handleExport(format: string, ext: string, filterName: string) {
showExportMenu = false;
const outputPath = await save({
filters: [{ name: filterName, extensions: [ext] }],
});
if (!outputPath) return;
// Build speaker lookup: speaker_id → display_name
const speakerMap: Record<string, string> = {};
for (const s of $speakers) {
speakerMap[s.label] = s.display_name || s.label;
}
// Build export segments from store
const exportSegments = $segments.map(seg => {
const speaker = $speakers.find(s => s.id === seg.speaker_id);
return {
text: seg.text,
start_ms: seg.start_ms,
end_ms: seg.end_ms,
speaker: speaker?.label ?? null,
};
});
try {
await invoke('export_transcript', {
segments: exportSegments,
speakers: speakerMap,
format,
outputPath,
title: 'Voice to Notes Transcript',
});
alert(`Exported to ${outputPath}`);
} catch (err) {
console.error('Export failed:', err);
alert(`Export failed: ${err}`);
}
}
</script>
<div class="app-header">
@@ -117,6 +167,22 @@
<button class="import-btn" onclick={handleFileImport}>
Import Audio/Video
</button>
{#if $segments.length > 0}
<div class="export-dropdown">
<button class="export-btn" onclick={() => showExportMenu = !showExportMenu}>
Export
</button>
{#if showExportMenu}
<div class="export-menu">
{#each exportFormats as fmt}
<button class="export-option" onclick={() => handleExport(fmt.format, fmt.ext, fmt.name)}>
{fmt.name} (.{fmt.ext})
</button>
{/each}
</div>
{/if}
</div>
{/if}
</div>
</div>
@@ -164,6 +230,53 @@
.import-btn:hover {
background: #d63851;
}
.header-actions {
display: flex;
gap: 0.5rem;
align-items: center;
}
.export-dropdown {
position: relative;
}
.export-btn {
background: #0f3460;
border: 1px solid #4a5568;
color: #e0e0e0;
padding: 0.5rem 1rem;
border-radius: 6px;
cursor: pointer;
font-size: 0.875rem;
font-weight: 500;
}
.export-btn:hover {
background: #1a4a7a;
}
.export-menu {
position: absolute;
top: 100%;
right: 0;
margin-top: 0.25rem;
background: #16213e;
border: 1px solid #4a5568;
border-radius: 6px;
overflow: hidden;
z-index: 10;
min-width: 220px;
}
.export-option {
display: block;
width: 100%;
background: none;
border: none;
color: #e0e0e0;
padding: 0.5rem 1rem;
text-align: left;
cursor: pointer;
font-size: 0.8rem;
}
.export-option:hover {
background: rgba(233, 69, 96, 0.2);
}
.workspace {
display: flex;
gap: 1rem;