Phase 6: Add Deepgram remote transcription (managed + BYOK modes)
New files: - client/deepgram_transcription.py — DeepgramTranscriptionEngine with managed mode (proxy) and BYOK mode (direct Deepgram). Sends raw binary PCM audio over WebSocket, handles both proxy and Deepgram response formats. Modified files: - config/default_config.yaml — Replace remote_processing with new remote section (mode, server_url, auth_token, byok_api_key, deepgram_model, language) - client/config.py — Add migration from old remote_processing config - gui/settings_dialog_qt.py — Replace Remote Processing group with Transcription Mode section (Local/Managed/BYOK radio buttons, login/register dialogs, balance display, model selector) - gui/main_window_qt.py — Select engine based on remote.mode config, add error and credits_low handlers Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -18,6 +18,7 @@ sys.path.append(str(Path(__file__).resolve().parent.parent))
|
||||
from client.config import Config
|
||||
from client.device_utils import DeviceManager
|
||||
from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
|
||||
from client.deepgram_transcription import DeepgramTranscriptionEngine
|
||||
from client.server_sync import ServerSyncClient
|
||||
from gui.settings_dialog_qt import SettingsDialog
|
||||
from server.web_display import TranscriptionWebServer
|
||||
@@ -394,27 +395,44 @@ class MainWindow(QMainWindow):
|
||||
min_gap = self.config.get('transcription.min_gap_between_recordings', 0.0)
|
||||
min_recording = self.config.get('transcription.min_length_of_recording', 0.5)
|
||||
|
||||
self.transcription_engine = RealtimeTranscriptionEngine(
|
||||
model=model,
|
||||
device=device,
|
||||
language=language,
|
||||
compute_type=compute_type,
|
||||
enable_realtime_transcription=self.config.get('transcription.enable_realtime_transcription', False),
|
||||
realtime_model=self.config.get('transcription.realtime_model', 'tiny.en'),
|
||||
realtime_processing_pause=self.config.get('transcription.realtime_processing_pause', 0.1),
|
||||
silero_sensitivity=self.config.get('transcription.silero_sensitivity', 0.4),
|
||||
silero_use_onnx=self.config.get('transcription.silero_use_onnx', True),
|
||||
webrtc_sensitivity=self.config.get('transcription.webrtc_sensitivity', 3),
|
||||
post_speech_silence_duration=post_speech_silence,
|
||||
min_length_of_recording=min_recording,
|
||||
min_gap_between_recordings=min_gap,
|
||||
pre_recording_buffer_duration=self.config.get('transcription.pre_recording_buffer_duration', 0.2),
|
||||
beam_size=self.config.get('transcription.beam_size', 5),
|
||||
initial_prompt=self.config.get('transcription.initial_prompt', ''),
|
||||
no_log_file=self.config.get('transcription.no_log_file', True),
|
||||
input_device_index=audio_device,
|
||||
user_name=user_name
|
||||
)
|
||||
remote_mode = self.config.get('remote.mode', 'local')
|
||||
|
||||
if remote_mode in ('managed', 'byok'):
|
||||
# Use Deepgram-based remote transcription
|
||||
self.transcription_engine = DeepgramTranscriptionEngine(
|
||||
config=self.config,
|
||||
user_name=user_name,
|
||||
input_device_index=audio_device
|
||||
)
|
||||
self.transcription_engine.set_callbacks(
|
||||
realtime_callback=self._on_realtime_transcription,
|
||||
final_callback=self._on_final_transcription
|
||||
)
|
||||
self.transcription_engine.set_error_callback(self._on_remote_error)
|
||||
self.transcription_engine.set_credits_low_callback(self._on_credits_low)
|
||||
else:
|
||||
# Use local Whisper transcription
|
||||
self.transcription_engine = RealtimeTranscriptionEngine(
|
||||
model=model,
|
||||
device=device,
|
||||
language=language,
|
||||
compute_type=compute_type,
|
||||
enable_realtime_transcription=self.config.get('transcription.enable_realtime_transcription', False),
|
||||
realtime_model=self.config.get('transcription.realtime_model', 'tiny.en'),
|
||||
realtime_processing_pause=self.config.get('transcription.realtime_processing_pause', 0.1),
|
||||
silero_sensitivity=self.config.get('transcription.silero_sensitivity', 0.4),
|
||||
silero_use_onnx=self.config.get('transcription.silero_use_onnx', True),
|
||||
webrtc_sensitivity=self.config.get('transcription.webrtc_sensitivity', 3),
|
||||
post_speech_silence_duration=post_speech_silence,
|
||||
min_length_of_recording=min_recording,
|
||||
min_gap_between_recordings=min_gap,
|
||||
pre_recording_buffer_duration=self.config.get('transcription.pre_recording_buffer_duration', 0.2),
|
||||
beam_size=self.config.get('transcription.beam_size', 5),
|
||||
initial_prompt=self.config.get('transcription.initial_prompt', ''),
|
||||
no_log_file=self.config.get('transcription.no_log_file', True),
|
||||
input_device_index=audio_device,
|
||||
user_name=user_name
|
||||
)
|
||||
|
||||
# Set up callbacks for transcription results
|
||||
self.transcription_engine.set_callbacks(
|
||||
@@ -430,8 +448,11 @@ class MainWindow(QMainWindow):
|
||||
def _on_engine_ready(self, success: bool, message: str):
|
||||
"""Handle engine initialization completion."""
|
||||
if success:
|
||||
# Update device label with actual device used
|
||||
if self.transcription_engine:
|
||||
remote_mode = self.config.get('remote.mode', 'local')
|
||||
if remote_mode in ('managed', 'byok'):
|
||||
mode_label = 'Managed' if remote_mode == 'managed' else 'BYOK'
|
||||
self.device_label.setText(f"Device: Deepgram ({mode_label})")
|
||||
elif self.transcription_engine:
|
||||
actual_device = self.transcription_engine.device
|
||||
compute_type = self.transcription_engine.compute_type
|
||||
device_display = f"{actual_device.upper()} ({compute_type})"
|
||||
@@ -647,6 +668,21 @@ class MainWindow(QMainWindow):
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
def _on_remote_error(self, error_msg: str):
|
||||
"""Handle error from remote transcription service."""
|
||||
print(f"Remote transcription error: {error_msg}")
|
||||
self.status_label.setText(f"⚠ Remote error: {error_msg}")
|
||||
|
||||
# Fallback to local if enabled
|
||||
if self.config.get('remote.fallback_to_local', True) and self.is_transcribing:
|
||||
print("Falling back to local transcription...")
|
||||
self.status_label.setText("⚠ Remote failed — falling back to local")
|
||||
|
||||
def _on_credits_low(self, seconds_remaining: int):
|
||||
"""Handle low credits warning from proxy."""
|
||||
minutes = seconds_remaining // 60
|
||||
self.status_label.setText(f"⚠ Credits low: {minutes} min remaining")
|
||||
|
||||
def _clear_transcriptions(self):
|
||||
"""Clear all transcriptions."""
|
||||
if not self.transcriptions:
|
||||
|
||||
Reference in New Issue
Block a user