perf/pipeline-improvements #2

Merged
jknapp merged 11 commits from perf/pipeline-improvements into main 2026-03-21 05:29:36 +00:00
5 changed files with 718 additions and 17 deletions
Showing only changes of commit 61caa07e4c - Show all commits

View File

@@ -1,9 +1,72 @@
use serde::{Deserialize, Serialize};
use tauri::State;
use crate::db::models::Project;
use crate::db::queries;
use crate::state::AppState;
// ── Input types for save_project_transcript ──────────────────────
#[derive(Deserialize)]
pub struct WordInput {
pub word: String,
pub start_ms: i64,
pub end_ms: i64,
pub confidence: f64,
}
#[derive(Deserialize)]
pub struct SegmentInput {
pub text: String,
pub start_ms: i64,
pub end_ms: i64,
pub speaker: Option<String>, // speaker label, not id
pub words: Vec<WordInput>,
}
#[derive(Deserialize)]
pub struct SpeakerInput {
pub label: String,
pub color: String,
}
// ── Output types for load_project_transcript ─────────────────────
#[derive(Serialize)]
pub struct WordOutput {
pub word: String,
pub start_ms: i64,
pub end_ms: i64,
pub confidence: Option<f64>,
}
#[derive(Serialize)]
pub struct SegmentOutput {
pub id: String,
pub text: String,
pub start_ms: i64,
pub end_ms: i64,
pub speaker: Option<String>, // speaker label
pub words: Vec<WordOutput>,
}
#[derive(Serialize)]
pub struct SpeakerOutput {
pub id: String,
pub label: String,
pub display_name: Option<String>,
pub color: Option<String>,
}
#[derive(Serialize)]
pub struct ProjectTranscript {
pub file_path: String,
pub segments: Vec<SegmentOutput>,
pub speakers: Vec<SpeakerOutput>,
}
// ── Commands ─────────────────────────────────────────────────────
#[tauri::command]
pub fn create_project(name: String, state: State<AppState>) -> Result<Project, String> {
let conn = state.db.lock().map_err(|e| e.to_string())?;
@@ -21,3 +84,152 @@ pub fn list_projects(state: State<AppState>) -> Result<Vec<Project>, String> {
let conn = state.db.lock().map_err(|e| e.to_string())?;
queries::list_projects(&conn).map_err(|e| e.to_string())
}
#[tauri::command]
pub fn delete_project(id: String, state: State<AppState>) -> Result<(), String> {
let conn = state.db.lock().map_err(|e| e.to_string())?;
queries::delete_project(&conn, &id).map_err(|e| e.to_string())
}
#[tauri::command]
pub fn save_project_transcript(
project_id: String,
file_path: String,
segments: Vec<SegmentInput>,
speakers: Vec<SpeakerInput>,
state: State<AppState>,
) -> Result<Project, String> {
let conn = state.db.lock().map_err(|e| e.to_string())?;
// 1. Create media file entry
let media_file =
queries::create_media_file(&conn, &project_id, &file_path).map_err(|e| e.to_string())?;
// 2. Create speaker entries and build label -> id map
let mut speaker_map = std::collections::HashMap::new();
for speaker_input in &speakers {
let speaker = queries::create_speaker(
&conn,
&project_id,
&speaker_input.label,
Some(&speaker_input.color),
)
.map_err(|e| e.to_string())?;
speaker_map.insert(speaker_input.label.clone(), speaker.id);
}
// 3. Create segments with words
for (index, seg_input) in segments.iter().enumerate() {
let speaker_id = seg_input
.speaker
.as_ref()
.and_then(|label| speaker_map.get(label));
let segment = queries::create_segment(
&conn,
&project_id,
&media_file.id,
speaker_id.map(|s| s.as_str()),
seg_input.start_ms,
seg_input.end_ms,
&seg_input.text,
index as i32,
)
.map_err(|e| e.to_string())?;
// Create words for this segment
for (word_index, word_input) in seg_input.words.iter().enumerate() {
queries::create_word(
&conn,
&segment.id,
&word_input.word,
word_input.start_ms,
word_input.end_ms,
Some(word_input.confidence),
word_index as i32,
)
.map_err(|e| e.to_string())?;
}
}
// 4. Return updated project info
queries::get_project(&conn, &project_id)
.map_err(|e| e.to_string())?
.ok_or_else(|| "Project not found".to_string())
}
#[tauri::command]
pub fn load_project_transcript(
project_id: String,
state: State<AppState>,
) -> Result<Option<ProjectTranscript>, String> {
let conn = state.db.lock().map_err(|e| e.to_string())?;
// 1. Get media files for the project
let media_files =
queries::get_media_files_for_project(&conn, &project_id).map_err(|e| e.to_string())?;
let media_file = match media_files.first() {
Some(mf) => mf,
None => return Ok(None),
};
// 2. Get speakers for the project and build id -> label map
let speakers =
queries::get_speakers_for_project(&conn, &project_id).map_err(|e| e.to_string())?;
let speaker_label_map: std::collections::HashMap<String, String> = speakers
.iter()
.map(|s| (s.id.clone(), s.label.clone()))
.collect();
// 3. Get segments for the media file
let db_segments =
queries::get_segments_for_media(&conn, &media_file.id).map_err(|e| e.to_string())?;
// 4. Build output segments with nested words
let mut segment_outputs = Vec::with_capacity(db_segments.len());
for seg in &db_segments {
let words = queries::get_words_for_segment(&conn, &seg.id).map_err(|e| e.to_string())?;
let word_outputs: Vec<WordOutput> = words
.into_iter()
.map(|w| WordOutput {
word: w.word,
start_ms: w.start_ms,
end_ms: w.end_ms,
confidence: w.confidence,
})
.collect();
let speaker_label = seg
.speaker_id
.as_ref()
.and_then(|sid| speaker_label_map.get(sid))
.cloned();
segment_outputs.push(SegmentOutput {
id: seg.id.clone(),
text: seg.text.clone(),
start_ms: seg.start_ms,
end_ms: seg.end_ms,
speaker: speaker_label,
words: word_outputs,
});
}
// 5. Build speaker outputs
let speaker_outputs: Vec<SpeakerOutput> = speakers
.into_iter()
.map(|s| SpeakerOutput {
id: s.id,
label: s.label,
display_name: s.display_name,
color: s.color,
})
.collect();
Ok(Some(ProjectTranscript {
file_path: media_file.file_path.clone(),
segments: segment_outputs,
speakers: speaker_outputs,
}))
}

View File

@@ -85,6 +85,57 @@ pub fn delete_project(conn: &Connection, id: &str) -> Result<(), DatabaseError>
Ok(())
}
// ── Media Files ──────────────────────────────────────────────────
pub fn create_media_file(
conn: &Connection,
project_id: &str,
file_path: &str,
) -> Result<MediaFile, DatabaseError> {
let id = Uuid::new_v4().to_string();
let now = Utc::now().to_rfc3339();
conn.execute(
"INSERT INTO media_files (id, project_id, file_path, created_at) VALUES (?1, ?2, ?3, ?4)",
params![id, project_id, file_path, now],
)?;
Ok(MediaFile {
id,
project_id: project_id.to_string(),
file_path: file_path.to_string(),
file_hash: None,
duration_ms: None,
sample_rate: None,
channels: None,
format: None,
file_size: None,
created_at: now,
})
}
pub fn get_media_files_for_project(
conn: &Connection,
project_id: &str,
) -> Result<Vec<MediaFile>, DatabaseError> {
let mut stmt = conn.prepare(
"SELECT id, project_id, file_path, file_hash, duration_ms, sample_rate, channels, format, file_size, created_at FROM media_files WHERE project_id = ?1 ORDER BY created_at",
)?;
let rows = stmt.query_map(params![project_id], |row| {
Ok(MediaFile {
id: row.get(0)?,
project_id: row.get(1)?,
file_path: row.get(2)?,
file_hash: row.get(3)?,
duration_ms: row.get(4)?,
sample_rate: row.get(5)?,
channels: row.get(6)?,
format: row.get(7)?,
file_size: row.get(8)?,
created_at: row.get(9)?,
})
})?;
Ok(rows.collect::<Result<Vec<_>, _>>()?)
}
// ── Speakers ──────────────────────────────────────────────────────
pub fn create_speaker(
@@ -194,6 +245,39 @@ pub fn reassign_speaker(
Ok(())
}
// ── Segments (create) ────────────────────────────────────────────
pub fn create_segment(
conn: &Connection,
project_id: &str,
media_file_id: &str,
speaker_id: Option<&str>,
start_ms: i64,
end_ms: i64,
text: &str,
segment_index: i32,
) -> Result<Segment, DatabaseError> {
let id = Uuid::new_v4().to_string();
conn.execute(
"INSERT INTO segments (id, project_id, media_file_id, speaker_id, start_ms, end_ms, text, is_edited, segment_index) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 0, ?8)",
params![id, project_id, media_file_id, speaker_id, start_ms, end_ms, text, segment_index],
)?;
Ok(Segment {
id,
project_id: project_id.to_string(),
media_file_id: media_file_id.to_string(),
speaker_id: speaker_id.map(String::from),
start_ms,
end_ms,
text: text.to_string(),
original_text: None,
confidence: None,
is_edited: false,
edited_at: None,
segment_index,
})
}
// ── Words ─────────────────────────────────────────────────────────
pub fn get_words_for_segment(
@@ -217,6 +301,31 @@ pub fn get_words_for_segment(
Ok(rows.collect::<Result<Vec<_>, _>>()?)
}
pub fn create_word(
conn: &Connection,
segment_id: &str,
word: &str,
start_ms: i64,
end_ms: i64,
confidence: Option<f64>,
word_index: i32,
) -> Result<Word, DatabaseError> {
let id = Uuid::new_v4().to_string();
conn.execute(
"INSERT INTO words (id, segment_id, word, start_ms, end_ms, confidence, word_index) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
params![id, segment_id, word, start_ms, end_ms, confidence, word_index],
)?;
Ok(Word {
id,
segment_id: segment_id.to_string(),
word: word.to_string(),
start_ms,
end_ms,
confidence,
word_index,
})
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -9,7 +9,10 @@ use tauri::Manager;
use commands::ai::{ai_chat, ai_configure, ai_list_providers};
use commands::export::export_transcript;
use commands::project::{create_project, get_project, list_projects};
use commands::project::{
create_project, delete_project, get_project, list_projects, load_project_transcript,
save_project_transcript,
};
use commands::settings::{load_settings, save_settings};
use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop};
use commands::transcribe::{download_diarize_model, run_pipeline, transcribe_file};
@@ -34,6 +37,9 @@ pub fn run() {
create_project,
get_project,
list_projects,
delete_project,
save_project_transcript,
load_project_transcript,
transcribe_file,
run_pipeline,
download_diarize_model,

View File

@@ -88,6 +88,88 @@
messages = [];
}
function formatMarkdown(text: string): string {
// Split into lines for block-level processing
const lines = text.split('\n');
const result: string[] = [];
let inList = false;
for (let i = 0; i < lines.length; i++) {
let line = lines[i];
// Headers
if (line.startsWith('### ')) {
if (inList) { result.push('</ul>'); inList = false; }
const content = applyInlineFormatting(line.slice(4));
result.push(`<h4>${content}</h4>`);
continue;
}
if (line.startsWith('## ')) {
if (inList) { result.push('</ul>'); inList = false; }
const content = applyInlineFormatting(line.slice(3));
result.push(`<h3>${content}</h3>`);
continue;
}
if (line.startsWith('# ')) {
if (inList) { result.push('</ul>'); inList = false; }
const content = applyInlineFormatting(line.slice(2));
result.push(`<h2>${content}</h2>`);
continue;
}
// List items (- or *)
if (/^[\-\*] /.test(line)) {
if (!inList) { result.push('<ul>'); inList = true; }
const content = applyInlineFormatting(line.slice(2));
result.push(`<li>${content}</li>`);
continue;
}
// Numbered list items
if (/^\d+\.\s/.test(line)) {
if (!inList) { result.push('<ol>'); inList = true; }
const content = applyInlineFormatting(line.replace(/^\d+\.\s/, ''));
result.push(`<li>${content}</li>`);
continue;
}
// Non-list line: close any open list
if (inList) {
// Check if previous list was ordered or unordered
const lastOpen = result.findLast(r => r === '<ul>' || r === '<ol>');
result.push(lastOpen === '<ol>' ? '</ol>' : '</ul>');
inList = false;
}
// Empty line = paragraph break
if (line.trim() === '') {
result.push('<br>');
continue;
}
// Regular text line
result.push(applyInlineFormatting(line));
}
// Close any trailing open list
if (inList) {
const lastOpen = result.findLast(r => r === '<ul>' || r === '<ol>');
result.push(lastOpen === '<ol>' ? '</ol>' : '</ul>');
}
return result.join('\n');
}
function applyInlineFormatting(text: string): string {
// Code blocks (backtick) — process first to avoid conflicts
text = text.replace(/`([^`]+)`/g, '<code>$1</code>');
// Bold (**text**)
text = text.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>');
// Italic (*text*) — only single asterisks not already consumed by bold
text = text.replace(/\*([^*]+)\*/g, '<em>$1</em>');
return text;
}
// Quick action buttons
async function summarize() {
inputText = 'Please summarize this transcript in bullet points.';
@@ -122,7 +204,11 @@
{:else}
{#each messages as msg}
<div class="message {msg.role}">
<div class="message-content">{msg.content}</div>
{#if msg.role === 'assistant'}
<div class="message-content">{@html formatMarkdown(msg.content)}</div>
{:else}
<div class="message-content">{msg.content}</div>
{/if}
</div>
{/each}
{#if isLoading}
@@ -192,47 +278,101 @@
}
.empty-state {
text-align: center;
color: #666;
font-size: 0.8rem;
padding: 1rem 0;
color: #888;
font-size: 0.85rem;
padding: 2rem 1rem;
}
.empty-state p {
margin-bottom: 1rem;
}
.quick-actions {
display: flex;
gap: 0.5rem;
gap: 0.75rem;
justify-content: center;
margin-top: 0.5rem;
margin-top: 1rem;
}
.quick-btn {
background: rgba(233, 69, 96, 0.15);
border: 1px solid rgba(233, 69, 96, 0.3);
color: #e94560;
padding: 0.3rem 0.6rem;
border-radius: 4px;
padding: 0.45rem 0.85rem;
border-radius: 6px;
cursor: pointer;
font-size: 0.75rem;
font-size: 0.8rem;
transition: background 0.15s;
}
.quick-btn:hover {
background: rgba(233, 69, 96, 0.25);
}
.message {
margin-bottom: 0.5rem;
padding: 0.5rem 0.75rem;
border-radius: 6px;
margin-bottom: 0.75rem;
padding: 0.75rem 1rem;
border-radius: 8px;
font-size: 0.8rem;
line-height: 1.4;
line-height: 1.55;
}
.message.user {
background: rgba(233, 69, 96, 0.15);
margin-left: 1rem;
border-left: 3px solid rgba(233, 69, 96, 0.4);
}
.message.assistant {
background: rgba(255, 255, 255, 0.05);
margin-right: 1rem;
border-left: 3px solid rgba(255, 255, 255, 0.1);
}
.message.loading {
opacity: 0.6;
font-style: italic;
}
/* Markdown styles inside assistant messages */
.message.assistant :global(h2) {
font-size: 1rem;
font-weight: 600;
margin: 0.6rem 0 0.3rem;
color: #f0f0f0;
}
.message.assistant :global(h3) {
font-size: 0.9rem;
font-weight: 600;
margin: 0.5rem 0 0.25rem;
color: #e8e8e8;
}
.message.assistant :global(h4) {
font-size: 0.85rem;
font-weight: 600;
margin: 0.4rem 0 0.2rem;
color: #e0e0e0;
}
.message.assistant :global(strong) {
color: #f0f0f0;
font-weight: 600;
}
.message.assistant :global(em) {
color: #ccc;
font-style: italic;
}
.message.assistant :global(code) {
background: rgba(0, 0, 0, 0.3);
color: #e94560;
padding: 0.1rem 0.35rem;
border-radius: 3px;
font-size: 0.75rem;
font-family: 'Fira Code', 'Cascadia Code', 'Consolas', monospace;
}
.message.assistant :global(ul),
.message.assistant :global(ol) {
margin: 0.35rem 0;
padding-left: 1.3rem;
}
.message.assistant :global(li) {
margin-bottom: 0.25rem;
line-height: 1.5;
}
.message.assistant :global(br) {
display: block;
content: '';
margin-top: 0.35rem;
}
.chat-input {
display: flex;
gap: 0.5rem;

View File

@@ -18,8 +18,15 @@
let audioUrl = $state('');
let showSettings = $state(false);
// Project management state
let currentProjectId = $state<string | null>(null);
let currentProjectName = $state('');
let savedProjects = $state<Array<{id: string, name: string, created_at: string}>>([]);
let showProjectMenu = $state(false);
onMount(() => {
loadSettings();
loadProjects();
// Global keyboard shortcuts
function handleKeyDown(e: KeyboardEvent) {
@@ -38,18 +45,24 @@
showSettings = true;
} else if (e.key === 'Escape') {
showExportMenu = false;
showProjectMenu = false;
showSettings = false;
}
}
// Close export dropdown on outside click
function handleClickOutside(e: MouseEvent) {
const target = e.target as HTMLElement;
if (showExportMenu) {
const target = e.target as HTMLElement;
if (!target.closest('.export-dropdown')) {
showExportMenu = false;
}
}
if (showProjectMenu) {
if (!target.closest('.project-dropdown')) {
showProjectMenu = false;
}
}
}
document.addEventListener('keydown', handleKeyDown);
@@ -70,6 +83,106 @@
// Speaker color palette for auto-assignment
const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77'];
async function loadProjects() {
try {
const projects = await invoke<Array<{id: string, name: string, created_at: string}>>('list_projects');
savedProjects = projects;
} catch (err) {
console.error('Failed to load projects:', err);
}
}
async function loadProject(projectId: string) {
try {
const result = await invoke<{
project_id: string;
name: string;
file_path: string;
segments: Array<{
text: string;
start_ms: number;
end_ms: number;
speaker: string | null;
words: Array<{
word: string;
start_ms: number;
end_ms: number;
confidence: number;
}>;
}>;
speakers: string[];
}>('load_project_transcript', { projectId });
// Set project info
currentProjectId = result.project_id;
currentProjectName = result.name;
// Rebuild speakers
const newSpeakers: Speaker[] = (result.speakers || []).map((label, idx) => ({
id: `speaker-${idx}`,
project_id: result.project_id,
label,
display_name: null,
color: speakerColors[idx % speakerColors.length],
}));
speakers.set(newSpeakers);
// Build speaker label -> id lookup
const speakerLookup = new Map(newSpeakers.map(s => [s.label, s.id]));
// Rebuild segments
const newSegments: Segment[] = result.segments.map((seg, idx) => ({
id: `seg-${idx}`,
project_id: result.project_id,
media_file_id: '',
speaker_id: seg.speaker ? (speakerLookup.get(seg.speaker) ?? null) : null,
start_ms: seg.start_ms,
end_ms: seg.end_ms,
text: seg.text,
original_text: null,
confidence: null,
is_edited: false,
edited_at: null,
segment_index: idx,
words: seg.words.map((w, widx) => ({
id: `word-${idx}-${widx}`,
segment_id: `seg-${idx}`,
word: w.word,
start_ms: w.start_ms,
end_ms: w.end_ms,
confidence: w.confidence,
word_index: widx,
})),
}));
segments.set(newSegments);
// Load audio from saved file path
if (result.file_path) {
audioUrl = convertFileSrc(result.file_path);
waveformPlayer?.loadAudio(audioUrl);
}
showProjectMenu = false;
} catch (err) {
console.error('Failed to load project:', err);
alert(`Failed to load project: ${err}`);
}
}
async function deleteProject(projectId: string) {
try {
await invoke('delete_project', { projectId });
await loadProjects();
if (currentProjectId === projectId) {
currentProjectId = null;
currentProjectName = '';
}
} catch (err) {
console.error('Failed to delete project:', err);
alert(`Failed to delete project: ${err}`);
}
}
function handleWordClick(timeMs: number) {
console.log('[voice-to-notes] Word clicked, seeking to', timeMs, 'ms');
waveformPlayer?.seekTo(timeMs);
@@ -245,6 +358,28 @@
}));
segments.set(newSegments);
// Auto-save project
try {
const fileName = filePath.split(/[\\/]/).pop() || 'Untitled';
const projectName = fileName.replace(/\.[^.]+$/, '');
const projectId = await invoke<string>('create_project', { name: projectName });
await invoke('save_project_transcript', {
projectId,
filePath,
segments: result.segments,
speakers: result.speakers.map((label, idx) => ({
label,
display_name: null,
color: speakerColors[idx % speakerColors.length],
})),
});
currentProjectId = projectId;
currentProjectName = projectName;
await loadProjects();
} catch (saveErr) {
console.error('Auto-save failed:', saveErr);
}
} catch (err) {
console.error('Pipeline failed:', err);
alert(`Pipeline failed: ${err}`);
@@ -318,6 +453,33 @@
<div class="app-header">
<h1>Voice to Notes</h1>
<div class="header-actions">
<div class="project-dropdown">
<button class="project-btn" onclick={() => showProjectMenu = !showProjectMenu}>
{currentProjectName ? `Project: ${currentProjectName}` : 'No project'}
</button>
{#if showProjectMenu}
<div class="project-menu">
{#if savedProjects.length === 0}
<div class="project-empty">No saved projects</div>
{:else}
{#each savedProjects as project}
<div class="project-item">
<button class="project-option" onclick={() => loadProject(project.id)}>
{project.name}
</button>
<button
class="project-delete"
onclick={(e) => { e.stopPropagation(); deleteProject(project.id); }}
title="Delete project"
>
&times;
</button>
</div>
{/each}
{/if}
</div>
{/if}
</div>
<button class="import-btn" onclick={handleFileImport} disabled={isTranscribing}>
{#if isTranscribing}
Processing...
@@ -467,6 +629,78 @@
.export-option:hover {
background: rgba(233, 69, 96, 0.2);
}
.project-dropdown {
position: relative;
}
.project-btn {
background: #0f3460;
border: 1px solid #4a5568;
color: #e0e0e0;
padding: 0.5rem 1rem;
border-radius: 6px;
cursor: pointer;
font-size: 0.875rem;
font-weight: 500;
max-width: 200px;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.project-btn:hover {
background: #1a4a7a;
}
.project-menu {
position: absolute;
top: 100%;
right: 0;
margin-top: 0.25rem;
background: #16213e;
border: 1px solid #4a5568;
border-radius: 6px;
overflow: hidden;
z-index: 10;
min-width: 220px;
max-height: 300px;
overflow-y: auto;
}
.project-empty {
padding: 0.5rem 1rem;
color: #888;
font-size: 0.8rem;
}
.project-item {
display: flex;
align-items: center;
}
.project-option {
flex: 1;
background: none;
border: none;
color: #e0e0e0;
padding: 0.5rem 1rem;
text-align: left;
cursor: pointer;
font-size: 0.8rem;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.project-option:hover {
background: rgba(233, 69, 96, 0.2);
}
.project-delete {
background: none;
border: none;
color: #888;
padding: 0.5rem 0.75rem;
cursor: pointer;
font-size: 1rem;
line-height: 1;
flex-shrink: 0;
}
.project-delete:hover {
color: #e94560;
}
.app-shell {
display: flex;
flex-direction: column;