Merge pull request 'perf/pipeline-improvements' (#2) from perf/pipeline-improvements into main
Some checks failed
Build & Release / Build sidecar (x86_64-pc-windows-msvc) (push) Has been cancelled
Build & Release / Build app (x86_64-unknown-linux-gnu) (push) Has been cancelled
Build & Release / Build app (aarch64-apple-darwin) (push) Has been cancelled
Build & Release / Build app (x86_64-pc-windows-msvc) (push) Has been cancelled
Build & Release / Create Release (push) Has been cancelled
Build & Release / Build sidecar (x86_64-unknown-linux-gnu) (push) Has been cancelled
Build & Release / Build sidecar (aarch64-apple-darwin) (push) Has been cancelled

Reviewed-on: #2
This commit was merged in pull request #2.
This commit is contained in:
2026-03-21 05:29:36 +00:00
8 changed files with 849 additions and 46 deletions

View File

@@ -32,17 +32,24 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Create Python toolcache directory (macOS)
if: matrix.platform == 'macos'
run: sudo mkdir -p /Users/runner && sudo chown $USER /Users/runner
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v5 uses: actions/setup-python@v5
with: with:
python-version: ${{ env.PYTHON_VERSION }} python-version: ${{ env.PYTHON_VERSION }}
- name: Install Python build tools
run: python -m pip install --upgrade pip setuptools wheel
- name: Build sidecar - name: Build sidecar
working-directory: python working-directory: python
run: python build_sidecar.py --cpu-only run: python build_sidecar.py --cpu-only
- name: Upload sidecar artifact - name: Upload sidecar artifact
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v3
with: with:
name: sidecar-${{ matrix.target }} name: sidecar-${{ matrix.target }}
path: python/dist/voice-to-notes-sidecar/ path: python/dist/voice-to-notes-sidecar/
@@ -73,9 +80,6 @@ jobs:
uses: actions/setup-node@v4 uses: actions/setup-node@v4
with: with:
node-version: ${{ env.NODE_VERSION }} node-version: ${{ env.NODE_VERSION }}
# Note: 'cache: npm' requires the Gitea instance to have
# Actions cache configured. Remove this if caching is unavailable.
cache: npm
- name: Install Rust stable - name: Install Rust stable
uses: dtolnay/rust-toolchain@stable uses: dtolnay/rust-toolchain@stable
@@ -86,8 +90,20 @@ jobs:
sudo apt-get update sudo apt-get update
sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf
- name: Install system dependencies (macOS)
if: matrix.platform == 'macos'
run: |
brew install --quiet create-dmg || true
- name: Install system dependencies (Windows)
if: matrix.platform == 'windows'
shell: powershell
run: |
# Ensure Visual Studio Build Tools are available (usually pre-installed on runners)
Write-Host "Windows build environment ready"
- name: Download sidecar artifact - name: Download sidecar artifact
uses: actions/download-artifact@v4 uses: actions/download-artifact@v3
with: with:
name: sidecar-${{ matrix.target }} name: sidecar-${{ matrix.target }}
path: src-tauri/binaries/ path: src-tauri/binaries/
@@ -107,7 +123,7 @@ jobs:
- name: Upload app artifacts (Linux) - name: Upload app artifacts (Linux)
if: matrix.platform == 'linux' if: matrix.platform == 'linux'
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v3
with: with:
name: app-${{ matrix.target }} name: app-${{ matrix.target }}
path: | path: |
@@ -117,7 +133,7 @@ jobs:
- name: Upload app artifacts (Windows) - name: Upload app artifacts (Windows)
if: matrix.platform == 'windows' if: matrix.platform == 'windows'
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v3
with: with:
name: app-${{ matrix.target }} name: app-${{ matrix.target }}
path: | path: |
@@ -127,7 +143,7 @@ jobs:
- name: Upload app artifacts (macOS) - name: Upload app artifacts (macOS)
if: matrix.platform == 'macos' if: matrix.platform == 'macos'
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v3
with: with:
name: app-${{ matrix.target }} name: app-${{ matrix.target }}
path: | path: |
@@ -143,11 +159,15 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Install required tools
run: |
sudo apt-get update
sudo apt-get install -y jq curl
- name: Download all app artifacts - name: Download all app artifacts
uses: actions/download-artifact@v4 uses: actions/download-artifact@v3
with: with:
path: artifacts/ path: artifacts/
pattern: app-*
- name: Generate release tag - name: Generate release tag
id: tag id: tag
@@ -167,6 +187,11 @@ jobs:
echo "Release ID: ${RELEASE_ID}" echo "Release ID: ${RELEASE_ID}"
if [ "${RELEASE_ID}" = "null" ] || [ -z "${RELEASE_ID}" ]; then
echo "ERROR: Failed to create release. Check BUILD_TOKEN permissions."
exit 1
fi
# Upload all artifacts # Upload all artifacts
find artifacts/ -type f \( -name "*.deb" -o -name "*.AppImage" -o -name "*.msi" -o -name "*.exe" -o -name "*.dmg" \) | while read file; do find artifacts/ -type f \( -name "*.deb" -o -name "*.AppImage" -o -name "*.msi" -o -name "*.exe" -o -name "*.dmg" \) | while read file; do
filename=$(basename "$file") filename=$(basename "$file")

View File

@@ -68,32 +68,33 @@ def create_venv_and_install(cpu_only: bool) -> Path:
print(f"[build] Creating venv at {venv_dir}") print(f"[build] Creating venv at {venv_dir}")
subprocess.run([sys.executable, "-m", "venv", str(venv_dir)], check=True) subprocess.run([sys.executable, "-m", "venv", str(venv_dir)], check=True)
# Determine pip and python paths inside venv # Determine python path inside venv — use `python -m pip` instead of
# calling pip directly to avoid permission errors on Windows
if sys.platform == "win32": if sys.platform == "win32":
pip = str(venv_dir / "Scripts" / "pip")
python = str(venv_dir / "Scripts" / "python") python = str(venv_dir / "Scripts" / "python")
else: else:
pip = str(venv_dir / "bin" / "pip")
python = str(venv_dir / "bin" / "python") python = str(venv_dir / "bin" / "python")
def pip_install(*args: str) -> None:
subprocess.run([python, "-m", "pip", *args], check=True)
# Upgrade pip # Upgrade pip
subprocess.run([pip, "install", "--upgrade", "pip"], check=True) pip_install("install", "--upgrade", "pip", "setuptools", "wheel")
# Install torch (CPU-only to avoid bundling ~2GB of CUDA libs) # Install torch (CPU-only to avoid bundling ~2GB of CUDA libs)
if cpu_only: if cpu_only:
print("[build] Installing PyTorch (CPU-only)") print("[build] Installing PyTorch (CPU-only)")
subprocess.run( pip_install(
[pip, "install", "torch", "torchaudio", "install", "torch", "torchaudio",
"--index-url", "https://download.pytorch.org/whl/cpu"], "--index-url", "https://download.pytorch.org/whl/cpu",
check=True,
) )
else: else:
print("[build] Installing PyTorch (default, may include CUDA)") print("[build] Installing PyTorch (default, may include CUDA)")
subprocess.run([pip, "install", "torch", "torchaudio"], check=True) pip_install("install", "torch", "torchaudio")
# Install project and dev deps (includes pyinstaller) # Install project and dev deps (includes pyinstaller)
print("[build] Installing project dependencies") print("[build] Installing project dependencies")
subprocess.run([pip, "install", "-e", f"{SCRIPT_DIR}[dev]"], check=True) pip_install("install", "-e", f"{SCRIPT_DIR}[dev]")
return Path(python) return Path(python)

View File

@@ -1,9 +1,110 @@
use serde::{Deserialize, Serialize};
use std::fs;
use tauri::State; use tauri::State;
use crate::db::models::Project; use crate::db::models::Project;
use crate::db::queries; use crate::db::queries;
use crate::state::AppState; use crate::state::AppState;
// ── File-based project types ────────────────────────────────────
#[derive(Serialize, Deserialize)]
pub struct ProjectFile {
pub version: u32,
pub name: String,
pub audio_file: String,
pub created_at: String,
pub segments: Vec<ProjectFileSegment>,
pub speakers: Vec<ProjectFileSpeaker>,
}
#[derive(Serialize, Deserialize)]
pub struct ProjectFileSegment {
pub text: String,
pub start_ms: i64,
pub end_ms: i64,
pub speaker: Option<String>,
pub is_edited: bool,
pub words: Vec<ProjectFileWord>,
}
#[derive(Serialize, Deserialize)]
pub struct ProjectFileWord {
pub word: String,
pub start_ms: i64,
pub end_ms: i64,
pub confidence: f64,
}
#[derive(Serialize, Deserialize)]
pub struct ProjectFileSpeaker {
pub label: String,
pub display_name: Option<String>,
pub color: String,
}
// ── Input types for save_project_transcript ──────────────────────
#[derive(Deserialize)]
pub struct WordInput {
pub word: String,
pub start_ms: i64,
pub end_ms: i64,
pub confidence: f64,
}
#[derive(Deserialize)]
pub struct SegmentInput {
pub text: String,
pub start_ms: i64,
pub end_ms: i64,
pub speaker: Option<String>, // speaker label, not id
pub words: Vec<WordInput>,
}
#[derive(Deserialize)]
pub struct SpeakerInput {
pub label: String,
pub color: String,
}
// ── Output types for load_project_transcript ─────────────────────
#[derive(Serialize)]
pub struct WordOutput {
pub word: String,
pub start_ms: i64,
pub end_ms: i64,
pub confidence: Option<f64>,
}
#[derive(Serialize)]
pub struct SegmentOutput {
pub id: String,
pub text: String,
pub start_ms: i64,
pub end_ms: i64,
pub speaker: Option<String>, // speaker label
pub words: Vec<WordOutput>,
}
#[derive(Serialize)]
pub struct SpeakerOutput {
pub id: String,
pub label: String,
pub display_name: Option<String>,
pub color: Option<String>,
}
#[derive(Serialize)]
pub struct ProjectTranscript {
pub file_path: String,
pub segments: Vec<SegmentOutput>,
pub speakers: Vec<SpeakerOutput>,
}
// ── Commands ─────────────────────────────────────────────────────
#[tauri::command] #[tauri::command]
pub fn create_project(name: String, state: State<AppState>) -> Result<Project, String> { pub fn create_project(name: String, state: State<AppState>) -> Result<Project, String> {
let conn = state.db.lock().map_err(|e| e.to_string())?; let conn = state.db.lock().map_err(|e| e.to_string())?;
@@ -21,3 +122,176 @@ pub fn list_projects(state: State<AppState>) -> Result<Vec<Project>, String> {
let conn = state.db.lock().map_err(|e| e.to_string())?; let conn = state.db.lock().map_err(|e| e.to_string())?;
queries::list_projects(&conn).map_err(|e| e.to_string()) queries::list_projects(&conn).map_err(|e| e.to_string())
} }
#[tauri::command]
pub fn delete_project(id: String, state: State<AppState>) -> Result<(), String> {
let conn = state.db.lock().map_err(|e| e.to_string())?;
queries::delete_project(&conn, &id).map_err(|e| e.to_string())
}
#[tauri::command]
pub fn update_segment(
segment_id: String,
new_text: String,
state: State<AppState>,
) -> Result<(), String> {
let conn = state.db.lock().map_err(|e| e.to_string())?;
queries::update_segment_text(&conn, &segment_id, &new_text).map_err(|e| e.to_string())
}
#[tauri::command]
pub fn save_project_transcript(
project_id: String,
file_path: String,
segments: Vec<SegmentInput>,
speakers: Vec<SpeakerInput>,
state: State<AppState>,
) -> Result<Project, String> {
let conn = state.db.lock().map_err(|e| e.to_string())?;
// 1. Create media file entry
let media_file =
queries::create_media_file(&conn, &project_id, &file_path).map_err(|e| e.to_string())?;
// 2. Create speaker entries and build label -> id map
let mut speaker_map = std::collections::HashMap::new();
for speaker_input in &speakers {
let speaker = queries::create_speaker(
&conn,
&project_id,
&speaker_input.label,
Some(&speaker_input.color),
)
.map_err(|e| e.to_string())?;
speaker_map.insert(speaker_input.label.clone(), speaker.id);
}
// 3. Create segments with words
for (index, seg_input) in segments.iter().enumerate() {
let speaker_id = seg_input
.speaker
.as_ref()
.and_then(|label| speaker_map.get(label));
let segment = queries::create_segment(
&conn,
&project_id,
&media_file.id,
speaker_id.map(|s| s.as_str()),
seg_input.start_ms,
seg_input.end_ms,
&seg_input.text,
index as i32,
)
.map_err(|e| e.to_string())?;
// Create words for this segment
for (word_index, word_input) in seg_input.words.iter().enumerate() {
queries::create_word(
&conn,
&segment.id,
&word_input.word,
word_input.start_ms,
word_input.end_ms,
Some(word_input.confidence),
word_index as i32,
)
.map_err(|e| e.to_string())?;
}
}
// 4. Return updated project info
queries::get_project(&conn, &project_id)
.map_err(|e| e.to_string())?
.ok_or_else(|| "Project not found".to_string())
}
#[tauri::command]
pub fn load_project_transcript(
project_id: String,
state: State<AppState>,
) -> Result<Option<ProjectTranscript>, String> {
let conn = state.db.lock().map_err(|e| e.to_string())?;
// 1. Get media files for the project
let media_files =
queries::get_media_files_for_project(&conn, &project_id).map_err(|e| e.to_string())?;
let media_file = match media_files.first() {
Some(mf) => mf,
None => return Ok(None),
};
// 2. Get speakers for the project and build id -> label map
let speakers =
queries::get_speakers_for_project(&conn, &project_id).map_err(|e| e.to_string())?;
let speaker_label_map: std::collections::HashMap<String, String> = speakers
.iter()
.map(|s| (s.id.clone(), s.label.clone()))
.collect();
// 3. Get segments for the media file
let db_segments =
queries::get_segments_for_media(&conn, &media_file.id).map_err(|e| e.to_string())?;
// 4. Build output segments with nested words
let mut segment_outputs = Vec::with_capacity(db_segments.len());
for seg in &db_segments {
let words = queries::get_words_for_segment(&conn, &seg.id).map_err(|e| e.to_string())?;
let word_outputs: Vec<WordOutput> = words
.into_iter()
.map(|w| WordOutput {
word: w.word,
start_ms: w.start_ms,
end_ms: w.end_ms,
confidence: w.confidence,
})
.collect();
let speaker_label = seg
.speaker_id
.as_ref()
.and_then(|sid| speaker_label_map.get(sid))
.cloned();
segment_outputs.push(SegmentOutput {
id: seg.id.clone(),
text: seg.text.clone(),
start_ms: seg.start_ms,
end_ms: seg.end_ms,
speaker: speaker_label,
words: word_outputs,
});
}
// 5. Build speaker outputs
let speaker_outputs: Vec<SpeakerOutput> = speakers
.into_iter()
.map(|s| SpeakerOutput {
id: s.id,
label: s.label,
display_name: s.display_name,
color: s.color,
})
.collect();
Ok(Some(ProjectTranscript {
file_path: media_file.file_path.clone(),
segments: segment_outputs,
speakers: speaker_outputs,
}))
}
// ── File-based project commands ─────────────────────────────────
#[tauri::command]
pub fn save_project_file(path: String, project: ProjectFile) -> Result<(), String> {
let json = serde_json::to_string_pretty(&project).map_err(|e| e.to_string())?;
fs::write(&path, json).map_err(|e| format!("Failed to save project: {e}"))
}
#[tauri::command]
pub fn load_project_file(path: String) -> Result<ProjectFile, String> {
let json = fs::read_to_string(&path).map_err(|e| format!("Failed to read project: {e}"))?;
serde_json::from_str(&json).map_err(|e| format!("Failed to parse project: {e}"))
}

View File

@@ -85,6 +85,57 @@ pub fn delete_project(conn: &Connection, id: &str) -> Result<(), DatabaseError>
Ok(()) Ok(())
} }
// ── Media Files ──────────────────────────────────────────────────
pub fn create_media_file(
conn: &Connection,
project_id: &str,
file_path: &str,
) -> Result<MediaFile, DatabaseError> {
let id = Uuid::new_v4().to_string();
let now = Utc::now().to_rfc3339();
conn.execute(
"INSERT INTO media_files (id, project_id, file_path, created_at) VALUES (?1, ?2, ?3, ?4)",
params![id, project_id, file_path, now],
)?;
Ok(MediaFile {
id,
project_id: project_id.to_string(),
file_path: file_path.to_string(),
file_hash: None,
duration_ms: None,
sample_rate: None,
channels: None,
format: None,
file_size: None,
created_at: now,
})
}
pub fn get_media_files_for_project(
conn: &Connection,
project_id: &str,
) -> Result<Vec<MediaFile>, DatabaseError> {
let mut stmt = conn.prepare(
"SELECT id, project_id, file_path, file_hash, duration_ms, sample_rate, channels, format, file_size, created_at FROM media_files WHERE project_id = ?1 ORDER BY created_at",
)?;
let rows = stmt.query_map(params![project_id], |row| {
Ok(MediaFile {
id: row.get(0)?,
project_id: row.get(1)?,
file_path: row.get(2)?,
file_hash: row.get(3)?,
duration_ms: row.get(4)?,
sample_rate: row.get(5)?,
channels: row.get(6)?,
format: row.get(7)?,
file_size: row.get(8)?,
created_at: row.get(9)?,
})
})?;
Ok(rows.collect::<Result<Vec<_>, _>>()?)
}
// ── Speakers ────────────────────────────────────────────────────── // ── Speakers ──────────────────────────────────────────────────────
pub fn create_speaker( pub fn create_speaker(
@@ -194,6 +245,39 @@ pub fn reassign_speaker(
Ok(()) Ok(())
} }
// ── Segments (create) ────────────────────────────────────────────
pub fn create_segment(
conn: &Connection,
project_id: &str,
media_file_id: &str,
speaker_id: Option<&str>,
start_ms: i64,
end_ms: i64,
text: &str,
segment_index: i32,
) -> Result<Segment, DatabaseError> {
let id = Uuid::new_v4().to_string();
conn.execute(
"INSERT INTO segments (id, project_id, media_file_id, speaker_id, start_ms, end_ms, text, is_edited, segment_index) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 0, ?8)",
params![id, project_id, media_file_id, speaker_id, start_ms, end_ms, text, segment_index],
)?;
Ok(Segment {
id,
project_id: project_id.to_string(),
media_file_id: media_file_id.to_string(),
speaker_id: speaker_id.map(String::from),
start_ms,
end_ms,
text: text.to_string(),
original_text: None,
confidence: None,
is_edited: false,
edited_at: None,
segment_index,
})
}
// ── Words ───────────────────────────────────────────────────────── // ── Words ─────────────────────────────────────────────────────────
pub fn get_words_for_segment( pub fn get_words_for_segment(
@@ -217,6 +301,31 @@ pub fn get_words_for_segment(
Ok(rows.collect::<Result<Vec<_>, _>>()?) Ok(rows.collect::<Result<Vec<_>, _>>()?)
} }
pub fn create_word(
conn: &Connection,
segment_id: &str,
word: &str,
start_ms: i64,
end_ms: i64,
confidence: Option<f64>,
word_index: i32,
) -> Result<Word, DatabaseError> {
let id = Uuid::new_v4().to_string();
conn.execute(
"INSERT INTO words (id, segment_id, word, start_ms, end_ms, confidence, word_index) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
params![id, segment_id, word, start_ms, end_ms, confidence, word_index],
)?;
Ok(Word {
id,
segment_id: segment_id.to_string(),
word: word.to_string(),
start_ms,
end_ms,
confidence,
word_index,
})
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

View File

@@ -9,7 +9,10 @@ use tauri::Manager;
use commands::ai::{ai_chat, ai_configure, ai_list_providers}; use commands::ai::{ai_chat, ai_configure, ai_list_providers};
use commands::export::export_transcript; use commands::export::export_transcript;
use commands::project::{create_project, get_project, list_projects}; use commands::project::{
create_project, delete_project, get_project, list_projects, load_project_file,
load_project_transcript, save_project_file, save_project_transcript, update_segment,
};
use commands::settings::{load_settings, save_settings}; use commands::settings::{load_settings, save_settings};
use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop}; use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop};
use commands::transcribe::{download_diarize_model, run_pipeline, transcribe_file}; use commands::transcribe::{download_diarize_model, run_pipeline, transcribe_file};
@@ -34,6 +37,12 @@ pub fn run() {
create_project, create_project,
get_project, get_project,
list_projects, list_projects,
delete_project,
save_project_transcript,
load_project_transcript,
update_segment,
save_project_file,
load_project_file,
transcribe_file, transcribe_file,
run_pipeline, run_pipeline,
download_diarize_model, download_diarize_model,

View File

@@ -88,6 +88,88 @@
messages = []; messages = [];
} }
function formatMarkdown(text: string): string {
// Split into lines for block-level processing
const lines = text.split('\n');
const result: string[] = [];
let inList = false;
for (let i = 0; i < lines.length; i++) {
let line = lines[i];
// Headers
if (line.startsWith('### ')) {
if (inList) { result.push('</ul>'); inList = false; }
const content = applyInlineFormatting(line.slice(4));
result.push(`<h4>${content}</h4>`);
continue;
}
if (line.startsWith('## ')) {
if (inList) { result.push('</ul>'); inList = false; }
const content = applyInlineFormatting(line.slice(3));
result.push(`<h3>${content}</h3>`);
continue;
}
if (line.startsWith('# ')) {
if (inList) { result.push('</ul>'); inList = false; }
const content = applyInlineFormatting(line.slice(2));
result.push(`<h2>${content}</h2>`);
continue;
}
// List items (- or *)
if (/^[\-\*] /.test(line)) {
if (!inList) { result.push('<ul>'); inList = true; }
const content = applyInlineFormatting(line.slice(2));
result.push(`<li>${content}</li>`);
continue;
}
// Numbered list items
if (/^\d+\.\s/.test(line)) {
if (!inList) { result.push('<ol>'); inList = true; }
const content = applyInlineFormatting(line.replace(/^\d+\.\s/, ''));
result.push(`<li>${content}</li>`);
continue;
}
// Non-list line: close any open list
if (inList) {
// Check if previous list was ordered or unordered
const lastOpen = result.findLast(r => r === '<ul>' || r === '<ol>');
result.push(lastOpen === '<ol>' ? '</ol>' : '</ul>');
inList = false;
}
// Empty line = paragraph break
if (line.trim() === '') {
result.push('<br>');
continue;
}
// Regular text line
result.push(applyInlineFormatting(line));
}
// Close any trailing open list
if (inList) {
const lastOpen = result.findLast(r => r === '<ul>' || r === '<ol>');
result.push(lastOpen === '<ol>' ? '</ol>' : '</ul>');
}
return result.join('\n');
}
function applyInlineFormatting(text: string): string {
// Code blocks (backtick) — process first to avoid conflicts
text = text.replace(/`([^`]+)`/g, '<code>$1</code>');
// Bold (**text**)
text = text.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>');
// Italic (*text*) — only single asterisks not already consumed by bold
text = text.replace(/\*([^*]+)\*/g, '<em>$1</em>');
return text;
}
// Quick action buttons // Quick action buttons
async function summarize() { async function summarize() {
inputText = 'Please summarize this transcript in bullet points.'; inputText = 'Please summarize this transcript in bullet points.';
@@ -122,7 +204,11 @@
{:else} {:else}
{#each messages as msg} {#each messages as msg}
<div class="message {msg.role}"> <div class="message {msg.role}">
{#if msg.role === 'assistant'}
<div class="message-content">{@html formatMarkdown(msg.content)}</div>
{:else}
<div class="message-content">{msg.content}</div> <div class="message-content">{msg.content}</div>
{/if}
</div> </div>
{/each} {/each}
{#if isLoading} {#if isLoading}
@@ -192,47 +278,101 @@
} }
.empty-state { .empty-state {
text-align: center; text-align: center;
color: #666; color: #888;
font-size: 0.8rem; font-size: 0.85rem;
padding: 1rem 0; padding: 2rem 1rem;
}
.empty-state p {
margin-bottom: 1rem;
} }
.quick-actions { .quick-actions {
display: flex; display: flex;
gap: 0.5rem; gap: 0.75rem;
justify-content: center; justify-content: center;
margin-top: 0.5rem; margin-top: 1rem;
} }
.quick-btn { .quick-btn {
background: rgba(233, 69, 96, 0.15); background: rgba(233, 69, 96, 0.15);
border: 1px solid rgba(233, 69, 96, 0.3); border: 1px solid rgba(233, 69, 96, 0.3);
color: #e94560; color: #e94560;
padding: 0.3rem 0.6rem; padding: 0.45rem 0.85rem;
border-radius: 4px; border-radius: 6px;
cursor: pointer; cursor: pointer;
font-size: 0.75rem; font-size: 0.8rem;
transition: background 0.15s;
} }
.quick-btn:hover { .quick-btn:hover {
background: rgba(233, 69, 96, 0.25); background: rgba(233, 69, 96, 0.25);
} }
.message { .message {
margin-bottom: 0.5rem; margin-bottom: 0.75rem;
padding: 0.5rem 0.75rem; padding: 0.75rem 1rem;
border-radius: 6px; border-radius: 8px;
font-size: 0.8rem; font-size: 0.8rem;
line-height: 1.4; line-height: 1.55;
} }
.message.user { .message.user {
background: rgba(233, 69, 96, 0.15); background: rgba(233, 69, 96, 0.15);
margin-left: 1rem; border-left: 3px solid rgba(233, 69, 96, 0.4);
} }
.message.assistant { .message.assistant {
background: rgba(255, 255, 255, 0.05); background: rgba(255, 255, 255, 0.05);
margin-right: 1rem; border-left: 3px solid rgba(255, 255, 255, 0.1);
} }
.message.loading { .message.loading {
opacity: 0.6; opacity: 0.6;
font-style: italic; font-style: italic;
} }
/* Markdown styles inside assistant messages */
.message.assistant :global(h2) {
font-size: 1rem;
font-weight: 600;
margin: 0.6rem 0 0.3rem;
color: #f0f0f0;
}
.message.assistant :global(h3) {
font-size: 0.9rem;
font-weight: 600;
margin: 0.5rem 0 0.25rem;
color: #e8e8e8;
}
.message.assistant :global(h4) {
font-size: 0.85rem;
font-weight: 600;
margin: 0.4rem 0 0.2rem;
color: #e0e0e0;
}
.message.assistant :global(strong) {
color: #f0f0f0;
font-weight: 600;
}
.message.assistant :global(em) {
color: #ccc;
font-style: italic;
}
.message.assistant :global(code) {
background: rgba(0, 0, 0, 0.3);
color: #e94560;
padding: 0.1rem 0.35rem;
border-radius: 3px;
font-size: 0.75rem;
font-family: 'Fira Code', 'Cascadia Code', 'Consolas', monospace;
}
.message.assistant :global(ul),
.message.assistant :global(ol) {
margin: 0.35rem 0;
padding-left: 1.3rem;
}
.message.assistant :global(li) {
margin-bottom: 0.25rem;
line-height: 1.5;
}
.message.assistant :global(br) {
display: block;
content: '';
margin-top: 0.35rem;
}
.chat-input { .chat-input {
display: flex; display: flex;
gap: 0.5rem; gap: 0.5rem;

View File

@@ -60,12 +60,14 @@
function finishEditing(segmentId: string) { function finishEditing(segmentId: string) {
const trimmed = editText.trim(); const trimmed = editText.trim();
if (trimmed) { if (trimmed) {
// Update the segment text in the store
segments.update(segs => segs.map(s => { segments.update(segs => segs.map(s => {
if (s.id !== segmentId) return s; if (s.id !== segmentId) return s;
const newWordTexts = trimmed.split(/\s+/);
const newWords = redistributeWords(s, newWordTexts);
return { return {
...s, ...s,
text: trimmed, text: trimmed,
words: newWords,
original_text: s.original_text ?? s.text, original_text: s.original_text ?? s.text,
is_edited: true, is_edited: true,
edited_at: new Date().toISOString(), edited_at: new Date().toISOString(),
@@ -76,6 +78,106 @@
editingSegmentId = null; editingSegmentId = null;
} }
/**
* Redistribute word timing after an edit.
*
* Uses a diff-like alignment between old and new word lists:
* - Unchanged words keep their original timing
* - Spelling fixes (same position, same count) keep timing
* - Split words (1 old → N new) divide the original time range proportionally
* - Inserted words with no match get interpolated timing
*/
function redistributeWords(segment: Segment, newWordTexts: string[]): Word[] {
const oldWords = segment.words;
// Same word count — preserve per-word timing (spelling fixes)
if (newWordTexts.length === oldWords.length) {
return oldWords.map((w, i) => ({ ...w, word: newWordTexts[i] }));
}
// Align old words to new words using a simple greedy match.
// Build a mapping: for each old word, which new words does it cover?
const oldTexts = oldWords.map(w => w.word.toLowerCase());
const newTexts = newWordTexts.map(w => w.toLowerCase());
// Walk both lists, greedily matching old words to new words
const result: Word[] = [];
let oldIdx = 0;
let newIdx = 0;
while (newIdx < newTexts.length) {
if (oldIdx < oldTexts.length && oldTexts[oldIdx] === newTexts[newIdx]) {
// Exact match — keep original timing
result.push({ ...oldWords[oldIdx], word: newWordTexts[newIdx], word_index: newIdx });
oldIdx++;
newIdx++;
} else if (oldIdx < oldTexts.length) {
// Check if old word was split into multiple new words.
// E.g., "gonna" → "going to": see if concatenating upcoming new words
// matches the old word (or close enough — just check if old word's chars
// are consumed by the next few new words).
let splitCount = 0;
let combined = '';
for (let k = newIdx; k < newTexts.length && k - newIdx < 5; k++) {
combined += (k > newIdx ? '' : '') + newTexts[k];
if (combined.length >= oldTexts[oldIdx].length) {
splitCount = k - newIdx + 1;
break;
}
}
if (splitCount > 1) {
// Split: distribute the old word's time range proportionally
const ow = oldWords[oldIdx];
const totalDuration = ow.end_ms - ow.start_ms;
for (let k = 0; k < splitCount; k++) {
const fraction = 1 / splitCount;
result.push({
id: `${segment.id}-word-${newIdx + k}`,
segment_id: segment.id,
word: newWordTexts[newIdx + k],
start_ms: Math.round(ow.start_ms + totalDuration * fraction * k),
end_ms: Math.round(ow.start_ms + totalDuration * fraction * (k + 1)),
confidence: ow.confidence,
word_index: newIdx + k,
});
}
oldIdx++;
newIdx += splitCount;
} else {
// No match found — interpolate timing from neighbors
const prevEnd = result.length > 0 ? result[result.length - 1].end_ms : segment.start_ms;
const nextStart = oldIdx < oldWords.length ? oldWords[oldIdx].start_ms : segment.end_ms;
result.push({
id: `${segment.id}-word-${newIdx}`,
segment_id: segment.id,
word: newWordTexts[newIdx],
start_ms: prevEnd,
end_ms: nextStart,
confidence: 1.0,
word_index: newIdx,
});
newIdx++;
}
} else {
// No more old words — use end of segment
const prevEnd = result.length > 0 ? result[result.length - 1].end_ms : segment.start_ms;
result.push({
id: `${segment.id}-word-${newIdx}`,
segment_id: segment.id,
word: newWordTexts[newIdx],
start_ms: prevEnd,
end_ms: segment.end_ms,
confidence: 1.0,
word_index: newIdx,
});
newIdx++;
}
}
return result;
}
function handleEditKeydown(e: KeyboardEvent, segmentId: string) { function handleEditKeydown(e: KeyboardEvent, segmentId: string) {
if (e.key === 'Escape') { if (e.key === 'Escape') {
editingSegmentId = null; editingSegmentId = null;

View File

@@ -18,6 +18,11 @@
let audioUrl = $state(''); let audioUrl = $state('');
let showSettings = $state(false); let showSettings = $state(false);
// Project management state
let currentProjectPath = $state<string | null>(null);
let currentProjectName = $state('');
let audioFilePath = $state('');
onMount(() => { onMount(() => {
loadSettings(); loadSettings();
@@ -44,8 +49,8 @@
// Close export dropdown on outside click // Close export dropdown on outside click
function handleClickOutside(e: MouseEvent) { function handleClickOutside(e: MouseEvent) {
if (showExportMenu) {
const target = e.target as HTMLElement; const target = e.target as HTMLElement;
if (showExportMenu) {
if (!target.closest('.export-dropdown')) { if (!target.closest('.export-dropdown')) {
showExportMenu = false; showExportMenu = false;
} }
@@ -70,11 +75,136 @@
// Speaker color palette for auto-assignment // Speaker color palette for auto-assignment
const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77']; const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77'];
async function saveProject() {
const defaultName = currentProjectName || 'Untitled';
const outputPath = await save({
defaultPath: `${defaultName}.vtn`,
filters: [{ name: 'Voice to Notes Project', extensions: ['vtn'] }],
});
if (!outputPath) return;
const projectData = {
version: 1,
name: outputPath.split(/[\\/]/).pop()?.replace('.vtn', '') || defaultName,
audio_file: audioFilePath,
created_at: new Date().toISOString(),
segments: $segments.map(seg => {
const speaker = $speakers.find(s => s.id === seg.speaker_id);
return {
text: seg.text,
start_ms: seg.start_ms,
end_ms: seg.end_ms,
speaker: speaker?.label ?? null,
is_edited: seg.is_edited,
words: seg.words.map(w => ({
word: w.word,
start_ms: w.start_ms,
end_ms: w.end_ms,
confidence: w.confidence ?? 0,
})),
};
}),
speakers: $speakers.map(s => ({
label: s.label,
display_name: s.display_name,
color: s.color || '#e94560',
})),
};
try {
await invoke('save_project_file', { path: outputPath, project: projectData });
currentProjectPath = outputPath;
currentProjectName = projectData.name;
} catch (err) {
console.error('Failed to save project:', err);
alert(`Failed to save: ${err}`);
}
}
async function openProject() {
const filePath = await open({
filters: [{ name: 'Voice to Notes Project', extensions: ['vtn'] }],
multiple: false,
});
if (!filePath) return;
try {
const project = await invoke<{
version: number;
name: string;
audio_file: string;
segments: Array<{
text: string;
start_ms: number;
end_ms: number;
speaker: string | null;
is_edited: boolean;
words: Array<{ word: string; start_ms: number; end_ms: number; confidence: number }>;
}>;
speakers: Array<{ label: string; display_name: string | null; color: string }>;
}>('load_project_file', { path: filePath });
// Rebuild speakers
const newSpeakers: Speaker[] = project.speakers.map((s, idx) => ({
id: `speaker-${idx}`,
project_id: '',
label: s.label,
display_name: s.display_name,
color: s.color,
}));
speakers.set(newSpeakers);
const speakerLookup = new Map(newSpeakers.map(s => [s.label, s.id]));
// Rebuild segments
const newSegments: Segment[] = project.segments.map((seg, idx) => ({
id: `seg-${idx}`,
project_id: '',
media_file_id: '',
speaker_id: seg.speaker ? (speakerLookup.get(seg.speaker) ?? null) : null,
start_ms: seg.start_ms,
end_ms: seg.end_ms,
text: seg.text,
original_text: null,
confidence: null,
is_edited: seg.is_edited,
edited_at: null,
segment_index: idx,
words: seg.words.map((w, widx) => ({
id: `word-${idx}-${widx}`,
segment_id: `seg-${idx}`,
word: w.word,
start_ms: w.start_ms,
end_ms: w.end_ms,
confidence: w.confidence,
word_index: widx,
})),
}));
segments.set(newSegments);
// Load audio
audioFilePath = project.audio_file;
audioUrl = convertFileSrc(project.audio_file);
waveformPlayer?.loadAudio(audioUrl);
currentProjectPath = filePath as string;
currentProjectName = project.name;
} catch (err) {
console.error('Failed to load project:', err);
alert(`Failed to load project: ${err}`);
}
}
function handleWordClick(timeMs: number) { function handleWordClick(timeMs: number) {
console.log('[voice-to-notes] Word clicked, seeking to', timeMs, 'ms'); console.log('[voice-to-notes] Word clicked, seeking to', timeMs, 'ms');
waveformPlayer?.seekTo(timeMs); waveformPlayer?.seekTo(timeMs);
} }
function handleTextEdit(segmentId: string, newText: string) {
// In-memory store is already updated by TranscriptEditor.
// Changes persist when user saves the project file.
}
async function handleFileImport() { async function handleFileImport() {
const filePath = await open({ const filePath = await open({
multiple: false, multiple: false,
@@ -86,7 +216,8 @@
}); });
if (!filePath) return; if (!filePath) return;
// Convert file path to asset URL for wavesurfer // Track the original file path and convert to asset URL for wavesurfer
audioFilePath = filePath;
audioUrl = convertFileSrc(filePath); audioUrl = convertFileSrc(filePath);
waveformPlayer?.loadAudio(audioUrl); waveformPlayer?.loadAudio(audioUrl);
@@ -245,6 +376,11 @@
})); }));
segments.set(newSegments); segments.set(newSegments);
// Set project name from audio file name (user can save explicitly)
const fileName = filePath.split(/[\\/]/).pop() || 'Untitled';
currentProjectName = fileName.replace(/\.[^.]+$/, '');
currentProjectPath = null;
} catch (err) { } catch (err) {
console.error('Pipeline failed:', err); console.error('Pipeline failed:', err);
alert(`Pipeline failed: ${err}`); alert(`Pipeline failed: ${err}`);
@@ -316,8 +452,15 @@
{:else} {:else}
<div class="app-shell"> <div class="app-shell">
<div class="app-header"> <div class="app-header">
<h1>Voice to Notes</h1>
<div class="header-actions"> <div class="header-actions">
<button class="settings-btn" onclick={openProject} disabled={isTranscribing}>
Open Project
</button>
{#if $segments.length > 0}
<button class="settings-btn" onclick={saveProject}>
Save Project
</button>
{/if}
<button class="import-btn" onclick={handleFileImport} disabled={isTranscribing}> <button class="import-btn" onclick={handleFileImport} disabled={isTranscribing}>
{#if isTranscribing} {#if isTranscribing}
Processing... Processing...
@@ -350,7 +493,7 @@
<div class="workspace"> <div class="workspace">
<div class="main-content"> <div class="main-content">
<WaveformPlayer bind:this={waveformPlayer} {audioUrl} /> <WaveformPlayer bind:this={waveformPlayer} {audioUrl} />
<TranscriptEditor onWordClick={handleWordClick} /> <TranscriptEditor onWordClick={handleWordClick} onTextEdit={handleTextEdit} />
</div> </div>
<div class="sidebar-right"> <div class="sidebar-right">
<SpeakerManager /> <SpeakerManager />
@@ -381,10 +524,6 @@
background: #0f3460; background: #0f3460;
color: #e0e0e0; color: #e0e0e0;
} }
h1 {
font-size: 1.25rem;
margin: 0;
}
.import-btn { .import-btn {
background: #e94560; background: #e94560;
border: none; border: none;
@@ -421,10 +560,14 @@
cursor: pointer; cursor: pointer;
font-size: 0.875rem; font-size: 0.875rem;
} }
.settings-btn:hover { .settings-btn:hover:not(:disabled) {
background: rgba(255,255,255,0.05); background: rgba(255,255,255,0.05);
border-color: #e94560; border-color: #e94560;
} }
.settings-btn:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.export-dropdown { .export-dropdown {
position: relative; position: relative;
} }