Phase 1 Complete - Standalone Desktop Application Features: - Real-time speech-to-text with Whisper (faster-whisper) - PySide6 desktop GUI with settings dialog - Web server for OBS browser source integration - Audio capture with automatic sample rate detection and resampling - Noise suppression with Voice Activity Detection (VAD) - Configurable display settings (font, timestamps, fade duration) - Settings apply without restart (with automatic model reloading) - Auto-fade for web display transcriptions - CPU/GPU support with automatic device detection - Standalone executable builds (PyInstaller) - CUDA build support (works on systems without CUDA hardware) Components: - Audio capture with sounddevice - Noise reduction with noisereduce + webrtcvad - Transcription with faster-whisper - GUI with PySide6 - Web server with FastAPI + WebSocket - Configuration system with YAML Build System: - Standard builds (CPU-only): build.sh / build.bat - CUDA builds (universal): build-cuda.sh / build-cuda.bat - Comprehensive BUILD.md documentation - Cross-platform support (Linux, Windows) Documentation: - README.md with project overview and quick start - BUILD.md with detailed build instructions - NEXT_STEPS.md with future enhancement roadmap - INSTALL.md with setup instructions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
128 lines
3.6 KiB
Python
128 lines
3.6 KiB
Python
"""Transcription display widget for showing real-time transcriptions."""
|
|
|
|
import customtkinter as ctk
|
|
from typing import List
|
|
from datetime import datetime
|
|
|
|
|
|
class TranscriptionDisplay(ctk.CTkTextbox):
|
|
"""Custom text widget for displaying transcriptions."""
|
|
|
|
def __init__(self, master, max_lines: int = 100, show_timestamps: bool = True, **kwargs):
|
|
"""
|
|
Initialize transcription display.
|
|
|
|
Args:
|
|
master: Parent widget
|
|
max_lines: Maximum number of lines to keep in display
|
|
show_timestamps: Whether to show timestamps
|
|
**kwargs: Additional arguments for CTkTextbox
|
|
"""
|
|
super().__init__(master, **kwargs)
|
|
|
|
self.max_lines = max_lines
|
|
self.show_timestamps = show_timestamps
|
|
self.line_count = 0
|
|
|
|
# Configure text widget
|
|
self.configure(state="disabled") # Read-only by default
|
|
|
|
def add_transcription(self, text: str, user_name: str = "", timestamp: datetime = None):
|
|
"""
|
|
Add a new transcription to the display.
|
|
|
|
Args:
|
|
text: Transcription text
|
|
user_name: User/speaker name
|
|
timestamp: Timestamp of transcription
|
|
"""
|
|
if timestamp is None:
|
|
timestamp = datetime.now()
|
|
|
|
# Build the display line
|
|
line_parts = []
|
|
|
|
if self.show_timestamps:
|
|
time_str = timestamp.strftime("%H:%M:%S")
|
|
line_parts.append(f"[{time_str}]")
|
|
|
|
if user_name:
|
|
line_parts.append(f"{user_name}:")
|
|
|
|
line_parts.append(text)
|
|
|
|
line = " ".join(line_parts) + "\n"
|
|
|
|
# Add to display
|
|
self.configure(state="normal")
|
|
self.insert("end", line)
|
|
self.configure(state="disabled")
|
|
|
|
# Auto-scroll to bottom
|
|
self.see("end")
|
|
|
|
# Track line count
|
|
self.line_count += 1
|
|
|
|
# Remove old lines if exceeding max
|
|
if self.line_count > self.max_lines:
|
|
self._remove_oldest_lines(self.line_count - self.max_lines)
|
|
|
|
def _remove_oldest_lines(self, num_lines: int):
|
|
"""
|
|
Remove oldest lines from the display.
|
|
|
|
Args:
|
|
num_lines: Number of lines to remove
|
|
"""
|
|
self.configure(state="normal")
|
|
self.delete("1.0", f"{num_lines + 1}.0")
|
|
self.configure(state="disabled")
|
|
self.line_count -= num_lines
|
|
|
|
def clear(self):
|
|
"""Clear all transcriptions."""
|
|
self.configure(state="normal")
|
|
self.delete("1.0", "end")
|
|
self.configure(state="disabled")
|
|
self.line_count = 0
|
|
|
|
def get_all_text(self) -> str:
|
|
"""
|
|
Get all transcription text.
|
|
|
|
Returns:
|
|
All text in the display
|
|
"""
|
|
return self.get("1.0", "end")
|
|
|
|
def set_max_lines(self, max_lines: int):
|
|
"""Update maximum number of lines to keep."""
|
|
self.max_lines = max_lines
|
|
|
|
# Trim if necessary
|
|
if self.line_count > self.max_lines:
|
|
self._remove_oldest_lines(self.line_count - self.max_lines)
|
|
|
|
def set_show_timestamps(self, show: bool):
|
|
"""Update whether to show timestamps."""
|
|
self.show_timestamps = show
|
|
|
|
def save_to_file(self, filepath: str) -> bool:
|
|
"""
|
|
Save transcriptions to a file.
|
|
|
|
Args:
|
|
filepath: Path to save file
|
|
|
|
Returns:
|
|
True if saved successfully
|
|
"""
|
|
try:
|
|
with open(filepath, 'w') as f:
|
|
f.write(self.get_all_text())
|
|
return True
|
|
except Exception as e:
|
|
print(f"Error saving transcriptions: {e}")
|
|
return False
|