Fix CSP for blob URLs + fix pyannote AudioDecoder with torchaudio patch

CSP: Add blob: to connect-src/img-src/media-src for wavesurfer.js audio playback. Add http://tauri.localhost to default-src for devtools. pyannote: sys.modules block didn't work — pyannote still uses AudioDecoder unconditionally. New approach: monkey-patch Audio.__call__ in diarize.py to use torchaudio.load() directly, bypassing the broken torchcodec path. Patch runs once before pipeline loading. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 10:59:52 -07:00
parent 31044b9ad2
commit 2e7a5819bc
3 changed files with 32 additions and 7 deletions
@@ -5,12 +5,6 @@ from __future__ import annotations
 import signal
 import sys
 # Block torchcodec before anything imports it. pyannote.audio has a bug where
 # it conditionally imports AudioDecoder from torchcodec but uses it
 # unconditionally. Making torchcodec fully unimportable forces the torchaudio
 # fallback path.
 sys.modules["torchcodec"] = None  # type: ignore[assignment]
 sys.modules["torchcodec.decoders"] = None  # type: ignore[assignment]
 # CRITICAL: Capture real stdout for IPC *before* importing any ML libraries
 # that might print to stdout and corrupt the JSON-line protocol.
@@ -20,6 +20,36 @@ from voice_to_notes.utils.ffmpeg import get_ffmpeg_path
 from voice_to_notes.ipc.messages import progress_message
 from voice_to_notes.ipc.protocol import write_message
 _patched = False
 def _patch_pyannote_audio() -> None:
    """Monkey-patch pyannote.audio.core.io.Audio to use torchaudio.
    pyannote.audio has a bug where AudioDecoder (from torchcodec) is used
    unconditionally even when torchcodec is not installed, causing NameError.
    This replaces the Audio.__call__ method with a torchaudio-based version.
    """
    global _patched
    if _patched:
        return
    _patched = True
    try:
        import torchaudio
        from pyannote.audio.core.io import Audio
        def _torchaudio_call(self: Audio, file: dict) -> tuple:
            audio_path = file["audio"]
            waveform, sample_rate = torchaudio.load(str(audio_path))
            # pyannote expects (channel, time) tensor and sample_rate
            return waveform, sample_rate
        Audio.__call__ = _torchaudio_call  # type: ignore[assignment]
        print("[sidecar] Patched pyannote Audio to use torchaudio", file=sys.stderr, flush=True)
    except Exception as e:
        print(f"[sidecar] Warning: Could not patch pyannote Audio: {e}", file=sys.stderr, flush=True)
 def _ensure_wav(file_path: str) -> tuple[str, str | None]:
    """Convert audio to 16kHz mono WAV if needed.
@@ -113,6 +143,7 @@ class DiarizeService:
        ]
        last_error: Exception | None = None
        _patch_pyannote_audio()
        for model_name in models:
            try:
                from pyannote.audio import Pipeline
@@ -22,7 +22,7 @@
      }
    ],
    "security": {
-      "csp": "default-src 'self'; connect-src ipc: http://ipc.localhost http://asset.localhost https://asset.localhost; img-src 'self' asset: http://asset.localhost https://asset.localhost; media-src 'self' asset: http://asset.localhost https://asset.localhost; style-src 'self' 'unsafe-inline'",
+      "csp": "default-src 'self' http://tauri.localhost; connect-src ipc: http://ipc.localhost http://asset.localhost https://asset.localhost blob:; img-src 'self' asset: http://asset.localhost https://asset.localhost blob:; media-src 'self' asset: http://asset.localhost https://asset.localhost blob:; style-src 'self' 'unsafe-inline'",
      "assetProtocol": {
        "enable": true,
        "scope": ["**"]