diff --git a/python/voice_to_notes/main.py b/python/voice_to_notes/main.py index c13ca7d..873ec2a 100644 --- a/python/voice_to_notes/main.py +++ b/python/voice_to_notes/main.py @@ -5,12 +5,6 @@ from __future__ import annotations import signal import sys -# Block torchcodec before anything imports it. pyannote.audio has a bug where -# it conditionally imports AudioDecoder from torchcodec but uses it -# unconditionally. Making torchcodec fully unimportable forces the torchaudio -# fallback path. -sys.modules["torchcodec"] = None # type: ignore[assignment] -sys.modules["torchcodec.decoders"] = None # type: ignore[assignment] # CRITICAL: Capture real stdout for IPC *before* importing any ML libraries # that might print to stdout and corrupt the JSON-line protocol. diff --git a/python/voice_to_notes/services/diarize.py b/python/voice_to_notes/services/diarize.py index 6ac5f51..3a8ed69 100644 --- a/python/voice_to_notes/services/diarize.py +++ b/python/voice_to_notes/services/diarize.py @@ -20,6 +20,36 @@ from voice_to_notes.utils.ffmpeg import get_ffmpeg_path from voice_to_notes.ipc.messages import progress_message from voice_to_notes.ipc.protocol import write_message +_patched = False + + +def _patch_pyannote_audio() -> None: + """Monkey-patch pyannote.audio.core.io.Audio to use torchaudio. + + pyannote.audio has a bug where AudioDecoder (from torchcodec) is used + unconditionally even when torchcodec is not installed, causing NameError. + This replaces the Audio.__call__ method with a torchaudio-based version. + """ + global _patched + if _patched: + return + _patched = True + + try: + import torchaudio + from pyannote.audio.core.io import Audio + + def _torchaudio_call(self: Audio, file: dict) -> tuple: + audio_path = file["audio"] + waveform, sample_rate = torchaudio.load(str(audio_path)) + # pyannote expects (channel, time) tensor and sample_rate + return waveform, sample_rate + + Audio.__call__ = _torchaudio_call # type: ignore[assignment] + print("[sidecar] Patched pyannote Audio to use torchaudio", file=sys.stderr, flush=True) + except Exception as e: + print(f"[sidecar] Warning: Could not patch pyannote Audio: {e}", file=sys.stderr, flush=True) + def _ensure_wav(file_path: str) -> tuple[str, str | None]: """Convert audio to 16kHz mono WAV if needed. @@ -113,6 +143,7 @@ class DiarizeService: ] last_error: Exception | None = None + _patch_pyannote_audio() for model_name in models: try: from pyannote.audio import Pipeline diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index d1fb334..cb70a7d 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -22,7 +22,7 @@ } ], "security": { - "csp": "default-src 'self'; connect-src ipc: http://ipc.localhost http://asset.localhost https://asset.localhost; img-src 'self' asset: http://asset.localhost https://asset.localhost; media-src 'self' asset: http://asset.localhost https://asset.localhost; style-src 'self' 'unsafe-inline'", + "csp": "default-src 'self' http://tauri.localhost; connect-src ipc: http://ipc.localhost http://asset.localhost https://asset.localhost blob:; img-src 'self' asset: http://asset.localhost https://asset.localhost blob:; media-src 'self' asset: http://asset.localhost https://asset.localhost blob:; style-src 'self' 'unsafe-inline'", "assetProtocol": { "enable": true, "scope": ["**"]