Fix CSP for blob URLs + fix pyannote AudioDecoder with torchaudio patch

CSP: Add blob: to connect-src/img-src/media-src for wavesurfer.js audio playback. Add http://tauri.localhost to default-src for devtools. pyannote: sys.modules block didn't work — pyannote still uses AudioDecoder unconditionally. New approach: monkey-patch Audio.__call__ in diarize.py to use torchaudio.load() directly, bypassing the broken torchcodec path. Patch runs once before pipeline loading. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 10:59:52 -07:00
parent 31044b9ad2
commit 2e7a5819bc
3 changed files with 32 additions and 7 deletions
--- a/python/voice_to_notes/services/diarize.py
+++ b/python/voice_to_notes/services/diarize.py
@@ -20,6 +20,36 @@ from voice_to_notes.utils.ffmpeg import get_ffmpeg_path
 from voice_to_notes.ipc.messages import progress_message
 from voice_to_notes.ipc.protocol import write_message

+_patched = False
+
+
+def _patch_pyannote_audio() -> None:
+    """Monkey-patch pyannote.audio.core.io.Audio to use torchaudio.
+
+    pyannote.audio has a bug where AudioDecoder (from torchcodec) is used
+    unconditionally even when torchcodec is not installed, causing NameError.
+    This replaces the Audio.__call__ method with a torchaudio-based version.
+    """
+    global _patched
+    if _patched:
+        return
+    _patched = True
+
+    try:
+        import torchaudio
+        from pyannote.audio.core.io import Audio
+
+        def _torchaudio_call(self: Audio, file: dict) -> tuple:
+            audio_path = file["audio"]
+            waveform, sample_rate = torchaudio.load(str(audio_path))
+            # pyannote expects (channel, time) tensor and sample_rate
+            return waveform, sample_rate
+
+        Audio.__call__ = _torchaudio_call  # type: ignore[assignment]
+        print("[sidecar] Patched pyannote Audio to use torchaudio", file=sys.stderr, flush=True)
+    except Exception as e:
+        print(f"[sidecar] Warning: Could not patch pyannote Audio: {e}", file=sys.stderr, flush=True)
+

 def _ensure_wav(file_path: str) -> tuple[str, str | None]:
    """Convert audio to 16kHz mono WAV if needed.
@@ -113,6 +143,7 @@ class DiarizeService:
        ]

        last_error: Exception | None = None
+        _patch_pyannote_audio()
        for model_name in models:
            try:
                from pyannote.audio import Pipeline