Phase 2: Core transcription pipeline and audio playback

- Implement faster-whisper TranscribeService with word-level timestamps,
  progress reporting, and hardware auto-detection
- Wire up Rust SidecarManager for Python process lifecycle (spawn, IPC, shutdown)
- Add transcribe_file Tauri command bridging frontend to Python sidecar
- Integrate wavesurfer.js WaveformPlayer with play/pause, skip, seek controls
- Build TranscriptEditor with word-level click-to-seek and active highlighting
- Connect file import flow: prompt → asset load → transcribe → display
- Add typed tauri-bridge service with TranscriptionResult interface
- Add Python tests for hardware detection and transcription result formatting

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-26 15:53:09 -08:00
parent 503cc6c0cf
commit 48fe41b064
18 changed files with 1775 additions and 32 deletions

View File

@@ -1,2 +1,52 @@
// Transcription commands — start/stop/monitor transcription via Python sidecar
// TODO: Implement when sidecar IPC is connected
use serde_json::{json, Value};
use crate::sidecar::messages::IPCMessage;
use crate::sidecar::SidecarManager;
/// Start transcription of an audio file via the Python sidecar.
///
/// This is a blocking command — it starts the sidecar if needed,
/// sends the transcribe request, and waits for the result.
#[tauri::command]
pub fn transcribe_file(
file_path: String,
model: Option<String>,
device: Option<String>,
language: Option<String>,
) -> Result<Value, String> {
// Determine Python sidecar path (relative to app)
let python_path = std::env::current_dir()
.map_err(|e| e.to_string())?
.join("../python")
.canonicalize()
.map_err(|e| format!("Cannot find python directory: {e}"))?;
let python_path_str = python_path.to_string_lossy().to_string();
let manager = SidecarManager::new();
manager.start(&python_path_str)?;
let request_id = uuid::Uuid::new_v4().to_string();
let msg = IPCMessage::new(
&request_id,
"transcribe.start",
json!({
"file": file_path,
"model": model.unwrap_or_else(|| "base".to_string()),
"device": device.unwrap_or_else(|| "cpu".to_string()),
"compute_type": "int8",
"language": language,
}),
);
let response = manager.send_and_receive(&msg)?;
if response.msg_type == "error" {
return Err(format!(
"Transcription error: {}",
response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
));
}
Ok(response.payload)
}

View File

@@ -1,8 +1,10 @@
pub mod commands;
pub mod db;
pub mod sidecar;
pub mod state;
use commands::project::{create_project, get_project, list_projects};
use commands::transcribe::transcribe_file;
#[cfg_attr(mobile, tauri::mobile_entry_point)]
pub fn run() {
@@ -12,6 +14,7 @@ pub fn run() {
create_project,
get_project,
list_projects,
transcribe_file,
])
.run(tauri::generate_context!())
.expect("error while running tauri application");

View File

@@ -0,0 +1,16 @@
use std::io::Write;
use super::messages::IPCMessage;
/// Serialize and write an IPC message to a writer (stdin pipe).
pub fn send_message<W: Write>(writer: &mut W, msg: &IPCMessage) -> Result<(), String> {
let json = serde_json::to_string(msg).map_err(|e| e.to_string())?;
writer
.write_all(json.as_bytes())
.map_err(|e| format!("Write error: {e}"))?;
writer
.write_all(b"\n")
.map_err(|e| format!("Write error: {e}"))?;
writer.flush().map_err(|e| format!("Flush error: {e}"))?;
Ok(())
}

View File

@@ -0,0 +1,21 @@
use serde::{Deserialize, Serialize};
use serde_json::Value;
/// IPC message exchanged between Rust and Python sidecar.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IPCMessage {
pub id: String,
#[serde(rename = "type")]
pub msg_type: String,
pub payload: Value,
}
impl IPCMessage {
pub fn new(id: &str, msg_type: &str, payload: Value) -> Self {
Self {
id: id.to_string(),
msg_type: msg_type.to_string(),
payload,
}
}
}

View File

@@ -0,0 +1,150 @@
pub mod ipc;
pub mod messages;
use std::io::{BufRead, BufReader, Write};
use std::process::{Child, Command, Stdio};
use std::sync::Mutex;
use crate::sidecar::messages::IPCMessage;
/// Manages the Python sidecar process lifecycle.
pub struct SidecarManager {
process: Mutex<Option<Child>>,
}
impl SidecarManager {
pub fn new() -> Self {
Self {
process: Mutex::new(None),
}
}
/// Spawn the Python sidecar process.
pub fn start(&self, python_path: &str) -> Result<(), String> {
let child = Command::new("python3")
.arg("-m")
.arg("voice_to_notes.main")
.current_dir(python_path)
.env("PYTHONPATH", python_path)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::inherit()) // Let sidecar logs go to parent's stderr
.spawn()
.map_err(|e| format!("Failed to start sidecar: {e}"))?;
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
*proc = Some(child);
// Wait for the "ready" message
self.wait_for_ready()?;
Ok(())
}
/// Wait for the sidecar to send its ready message.
fn wait_for_ready(&self) -> Result<(), String> {
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
if let Some(ref mut child) = *proc {
if let Some(ref mut stdout) = child.stdout {
let reader = BufReader::new(stdout);
for line in reader.lines() {
let line = line.map_err(|e| format!("Read error: {e}"))?;
if line.is_empty() {
continue;
}
if let Ok(msg) = serde_json::from_str::<IPCMessage>(&line) {
if msg.msg_type == "ready" {
return Ok(());
}
}
// If we got a non-ready message, something's wrong but don't block forever
break;
}
}
}
Err("Sidecar did not send ready message".to_string())
}
/// Send a message to the sidecar and read the response.
/// This is a blocking call.
pub fn send_and_receive(&self, msg: &IPCMessage) -> Result<IPCMessage, String> {
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
if let Some(ref mut child) = *proc {
// Write message to stdin
if let Some(ref mut stdin) = child.stdin {
let json = serde_json::to_string(msg).map_err(|e| e.to_string())?;
stdin
.write_all(json.as_bytes())
.map_err(|e| format!("Write error: {e}"))?;
stdin
.write_all(b"\n")
.map_err(|e| format!("Write error: {e}"))?;
stdin.flush().map_err(|e| format!("Flush error: {e}"))?;
} else {
return Err("Sidecar stdin not available".to_string());
}
// Read response from stdout
if let Some(ref mut stdout) = child.stdout {
let mut reader = BufReader::new(stdout);
let mut line = String::new();
// Read lines until we get a response (skip progress messages, collect them)
loop {
line.clear();
let bytes_read = reader
.read_line(&mut line)
.map_err(|e| format!("Read error: {e}"))?;
if bytes_read == 0 {
return Err("Sidecar closed stdout".to_string());
}
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
let response: IPCMessage =
serde_json::from_str(trimmed).map_err(|e| format!("Parse error: {e}"))?;
// If it's a progress message, we could emit it as an event
// For now, skip progress and return the final result/error
if response.msg_type != "progress" {
return Ok(response);
}
}
} else {
return Err("Sidecar stdout not available".to_string());
}
} else {
Err("Sidecar not running".to_string())
}
}
/// Stop the sidecar process.
pub fn stop(&self) -> Result<(), String> {
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
if let Some(ref mut child) = proc.take() {
// Close stdin to signal EOF
drop(child.stdin.take());
// Wait briefly for clean exit, then kill
match child.wait() {
Ok(_) => Ok(()),
Err(e) => {
let _ = child.kill();
Err(format!("Sidecar did not exit cleanly: {e}"))
}
}
} else {
Ok(())
}
}
pub fn is_running(&self) -> bool {
let proc = self.process.lock().ok();
proc.map_or(false, |p| p.is_some())
}
}
impl Drop for SidecarManager {
fn drop(&mut self) {
let _ = self.stop();
}
}