Files

365 lines
13 KiB
Python
Raw Permalink Normal View History

"""Main application window for the local transcription app."""
import customtkinter as ctk
from tkinter import filedialog, messagebox
import threading
from pathlib import Path
import sys
# Add parent directory to path for imports
sys.path.append(str(Path(__file__).parent.parent))
from client.config import Config
from client.device_utils import DeviceManager
from client.audio_capture import AudioCapture
from client.noise_suppression import NoiseSuppressor
from client.transcription_engine import TranscriptionEngine
from gui.transcription_display import TranscriptionDisplay
from gui.settings_dialog import SettingsDialog
class MainWindow(ctk.CTk):
"""Main application window."""
def __init__(self):
"""Initialize the main window."""
super().__init__()
# Application state
self.is_transcribing = False
self.config = Config()
self.device_manager = DeviceManager()
# Components (initialized later)
self.audio_capture: AudioCapture = None
self.noise_suppressor: NoiseSuppressor = None
self.transcription_engine: TranscriptionEngine = None
# Configure window
self.title("Local Transcription")
self.geometry("900x700")
# Set theme
ctk.set_appearance_mode(self.config.get('display.theme', 'dark'))
ctk.set_default_color_theme("blue")
# Create UI
self._create_widgets()
# Handle window close
self.protocol("WM_DELETE_WINDOW", self._on_closing)
# Initialize components after GUI is ready (delay to avoid XCB threading issues)
self.after(100, self._initialize_components)
def _create_widgets(self):
"""Create all UI widgets."""
# Header frame
header_frame = ctk.CTkFrame(self, height=80)
header_frame.pack(fill="x", padx=10, pady=(10, 0))
header_frame.pack_propagate(False)
# Title
title_label = ctk.CTkLabel(
header_frame,
text="Local Transcription",
font=("", 24, "bold")
)
title_label.pack(side="left", padx=20, pady=20)
# Settings button
self.settings_button = ctk.CTkButton(
header_frame,
text="⚙ Settings",
command=self._open_settings,
width=120
)
self.settings_button.pack(side="right", padx=20, pady=20)
# Status frame
status_frame = ctk.CTkFrame(self, height=60)
status_frame.pack(fill="x", padx=10, pady=(10, 0))
status_frame.pack_propagate(False)
# Status label
self.status_label = ctk.CTkLabel(
status_frame,
text="⚫ Ready",
font=("", 14)
)
self.status_label.pack(side="left", padx=20)
# Device info
device_info = self.device_manager.get_device_info()
device_text = device_info[0][1] if device_info else "No device"
self.device_label = ctk.CTkLabel(
status_frame,
text=f"Device: {device_text}",
font=("", 12)
)
self.device_label.pack(side="left", padx=20)
# User name display
user_name = self.config.get('user.name', 'User')
self.user_label = ctk.CTkLabel(
status_frame,
text=f"User: {user_name}",
font=("", 12)
)
self.user_label.pack(side="left", padx=20)
# Transcription display frame
display_frame = ctk.CTkFrame(self)
display_frame.pack(fill="both", expand=True, padx=10, pady=10)
# Transcription display
self.transcription_display = TranscriptionDisplay(
display_frame,
max_lines=self.config.get('display.max_lines', 100),
show_timestamps=self.config.get('display.show_timestamps', True),
font=("Courier", self.config.get('display.font_size', 12))
)
self.transcription_display.pack(fill="both", expand=True, padx=10, pady=10)
# Control frame
control_frame = ctk.CTkFrame(self, height=80)
control_frame.pack(fill="x", padx=10, pady=(0, 10))
control_frame.pack_propagate(False)
# Start/Stop button
self.start_button = ctk.CTkButton(
control_frame,
text="▶ Start Transcription",
command=self._toggle_transcription,
width=200,
height=50,
font=("", 16, "bold"),
fg_color="green"
)
self.start_button.pack(side="left", padx=20, pady=15)
# Clear button
self.clear_button = ctk.CTkButton(
control_frame,
text="Clear",
command=self._clear_transcriptions,
width=120,
height=50
)
self.clear_button.pack(side="left", padx=10, pady=15)
# Save button
self.save_button = ctk.CTkButton(
control_frame,
text="💾 Save",
command=self._save_transcriptions,
width=120,
height=50
)
self.save_button.pack(side="left", padx=10, pady=15)
def _initialize_components(self):
"""Initialize audio, noise suppression, and transcription components."""
# Update status
self.status_label.configure(text="⚙ Initializing...")
self.update()
try:
# Set device based on config
device_config = self.config.get('transcription.device', 'auto')
self.device_manager.set_device(device_config)
# Initialize transcription engine
model_size = self.config.get('transcription.model', 'base')
language = self.config.get('transcription.language', 'en')
device = self.device_manager.get_device_for_whisper()
compute_type = self.device_manager.get_compute_type()
self.transcription_engine = TranscriptionEngine(
model_size=model_size,
device=device,
compute_type=compute_type,
language=language,
min_confidence=self.config.get('processing.min_confidence', 0.5)
)
# Load model (synchronously to avoid X11 threading issues)
success = self.transcription_engine.load_model()
if success:
self.status_label.configure(text="✓ Ready")
else:
self.status_label.configure(text="❌ Model loading failed")
messagebox.showerror("Error", "Failed to load transcription model")
except Exception as e:
print(f"Error initializing components: {e}")
self.status_label.configure(text="❌ Initialization failed")
messagebox.showerror("Error", f"Failed to initialize:\n{e}")
def _update_status(self, status: str):
"""Update status label (thread-safe)."""
self.after(0, lambda: self.status_label.configure(text=status))
def _toggle_transcription(self):
"""Start or stop transcription."""
if not self.is_transcribing:
self._start_transcription()
else:
self._stop_transcription()
def _start_transcription(self):
"""Start transcription."""
try:
# Check if engine is ready
if not self.transcription_engine or not self.transcription_engine.is_loaded:
messagebox.showerror("Error", "Transcription engine not ready")
return
# Get audio device
audio_device_str = self.config.get('audio.input_device', 'default')
audio_device = None if audio_device_str == 'default' else int(audio_device_str)
# Initialize audio capture
self.audio_capture = AudioCapture(
sample_rate=self.config.get('audio.sample_rate', 16000),
chunk_duration=self.config.get('audio.chunk_duration', 3.0),
device=audio_device
)
# Initialize noise suppressor
self.noise_suppressor = NoiseSuppressor(
sample_rate=self.config.get('audio.sample_rate', 16000),
method="noisereduce" if self.config.get('noise_suppression.enabled', True) else "none",
strength=self.config.get('noise_suppression.strength', 0.7),
use_vad=self.config.get('processing.use_vad', True)
)
# Start recording
self.audio_capture.start_recording(callback=self._process_audio_chunk)
# Update UI
self.is_transcribing = True
self.start_button.configure(text="⏸ Stop Transcription", fg_color="red")
self.status_label.configure(text="🔴 Recording...")
except Exception as e:
messagebox.showerror("Error", f"Failed to start transcription:\n{e}")
print(f"Error starting transcription: {e}")
def _stop_transcription(self):
"""Stop transcription."""
try:
# Stop recording
if self.audio_capture:
self.audio_capture.stop_recording()
# Update UI
self.is_transcribing = False
self.start_button.configure(text="▶ Start Transcription", fg_color="green")
self.status_label.configure(text="✓ Ready")
except Exception as e:
messagebox.showerror("Error", f"Failed to stop transcription:\n{e}")
print(f"Error stopping transcription: {e}")
def _process_audio_chunk(self, audio_chunk):
"""Process an audio chunk (noise suppression + transcription)."""
def process():
try:
# Apply noise suppression
processed_audio = self.noise_suppressor.process(audio_chunk, skip_silent=True)
# Skip if silent (VAD filtered it out)
if processed_audio is None:
return
# Transcribe
user_name = self.config.get('user.name', 'User')
result = self.transcription_engine.transcribe(
processed_audio,
sample_rate=self.config.get('audio.sample_rate', 16000),
user_name=user_name
)
# Display result
if result:
self.after(0, lambda: self.transcription_display.add_transcription(
text=result.text,
user_name=result.user_name,
timestamp=result.timestamp
))
except Exception as e:
print(f"Error processing audio: {e}")
# Run in background thread
threading.Thread(target=process, daemon=True).start()
def _clear_transcriptions(self):
"""Clear all transcriptions."""
if messagebox.askyesno("Clear Transcriptions", "Are you sure you want to clear all transcriptions?"):
self.transcription_display.clear()
def _save_transcriptions(self):
"""Save transcriptions to file."""
filepath = filedialog.asksaveasfilename(
defaultextension=".txt",
filetypes=[("Text files", "*.txt"), ("All files", "*.*")]
)
if filepath:
if self.transcription_display.save_to_file(filepath):
messagebox.showinfo("Saved", f"Transcriptions saved to:\n{filepath}")
else:
messagebox.showerror("Error", "Failed to save transcriptions")
def _open_settings(self):
"""Open settings dialog."""
# Get audio devices
audio_devices = AudioCapture.get_input_devices()
if not audio_devices:
audio_devices = [(0, "Default")]
# Get compute devices
compute_devices = self.device_manager.get_device_info()
compute_devices.insert(0, ("auto", "Auto-detect"))
# Open settings dialog
SettingsDialog(
self,
self.config,
audio_devices,
compute_devices,
on_save=self._on_settings_saved
)
def _on_settings_saved(self):
"""Handle settings being saved."""
# Update user label
user_name = self.config.get('user.name', 'User')
self.user_label.configure(text=f"User: {user_name}")
# Update display settings
self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100))
self.transcription_display.set_show_timestamps(self.config.get('display.show_timestamps', True))
# Note: Model/device changes require restart
messagebox.showinfo(
"Settings Saved",
"Some settings (model size, device) require restarting the application to take effect."
)
def _on_closing(self):
"""Handle window closing."""
# Stop transcription if running
if self.is_transcribing:
self._stop_transcription()
# Unload model
if self.transcription_engine:
self.transcription_engine.unload_model()
# Close window
self.destroy()