From 879a1f3fd6ff5003690daadefa04186ecef6f130 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 22 Mar 2026 18:30:40 -0700 Subject: [PATCH] Fix diarization tensor mismatch + fix sidecar build triggers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Diarization: Audio.crop patch now pads short segments with zeros to match the expected duration. pyannote batches embeddings with vstack which requires uniform tensor sizes — the last segment of a file can be shorter than the 10s window. CI: Reordered sidecar workflow to check for python/ changes FIRST, before bumping version or configuring git. All subsequent steps are gated on has_changes. This prevents unnecessary version bumps and build runs when only app code changes. Co-Authored-By: Claude Opus 4.6 --- .gitea/workflows/build-sidecar.yml | 39 ++++++++++++----------- python/voice_to_notes/services/diarize.py | 15 ++++++++- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/.gitea/workflows/build-sidecar.yml b/.gitea/workflows/build-sidecar.yml index 27ef4ab..00aff4e 100644 --- a/.gitea/workflows/build-sidecar.yml +++ b/.gitea/workflows/build-sidecar.yml @@ -18,14 +18,34 @@ jobs: steps: - uses: actions/checkout@v4 with: - fetch-depth: 0 + fetch-depth: 2 + + - name: Check for python changes + id: check_changes + run: | + # If triggered by workflow_dispatch, always build + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + echo "has_changes=true" >> $GITHUB_OUTPUT + exit 0 + fi + # Check if any python/ files changed in this commit + CHANGED=$(git diff --name-only HEAD~1 HEAD -- python/ 2>/dev/null || echo "") + if [ -n "$CHANGED" ]; then + echo "has_changes=true" >> $GITHUB_OUTPUT + echo "Python changes detected: $CHANGED" + else + echo "has_changes=false" >> $GITHUB_OUTPUT + echo "No python/ changes detected, skipping sidecar build" + fi - name: Configure git + if: steps.check_changes.outputs.has_changes == 'true' run: | git config user.name "Gitea Actions" git config user.email "actions@gitea.local" - name: Bump sidecar patch version + if: steps.check_changes.outputs.has_changes == 'true' id: bump run: | # Read current version from python/pyproject.toml @@ -46,23 +66,6 @@ jobs: echo "version=${NEW_VERSION}" >> $GITHUB_OUTPUT echo "tag=sidecar-v${NEW_VERSION}" >> $GITHUB_OUTPUT - - name: Check for python changes - id: check_changes - run: | - # If triggered by workflow_dispatch, always build - if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then - echo "has_changes=true" >> $GITHUB_OUTPUT - exit 0 - fi - # Check if any python/ files changed in this commit - CHANGED=$(git diff --name-only HEAD~1 HEAD -- python/ || echo "") - if [ -n "$CHANGED" ]; then - echo "has_changes=true" >> $GITHUB_OUTPUT - else - echo "has_changes=false" >> $GITHUB_OUTPUT - echo "No python/ changes detected, skipping sidecar build" - fi - - name: Commit and tag if: steps.check_changes.outputs.has_changes == 'true' env: diff --git a/python/voice_to_notes/services/diarize.py b/python/voice_to_notes/services/diarize.py index 49a88b9..bd8afb2 100644 --- a/python/voice_to_notes/services/diarize.py +++ b/python/voice_to_notes/services/diarize.py @@ -56,7 +56,12 @@ def _patch_pyannote_audio() -> None: return _sf_load(file["audio"]) def _soundfile_crop(self, file: dict, segment, **kwargs) -> tuple: - """Replacement for Audio.crop — load full file then slice.""" + """Replacement for Audio.crop — load full file then slice. + + Pads short segments with zeros to match the expected duration, + which pyannote requires for batched embedding extraction. + """ + duration = kwargs.get("duration", None) waveform, sample_rate = _sf_load(file["audio"]) # Convert segment (seconds) to sample indices start_sample = int(segment.start * sample_rate) @@ -65,6 +70,14 @@ def _patch_pyannote_audio() -> None: start_sample = max(0, start_sample) end_sample = min(waveform.shape[-1], end_sample) cropped = waveform[:, start_sample:end_sample] + # Pad to expected duration if needed (pyannote batches require uniform size) + if duration is not None: + expected_samples = int(duration * sample_rate) + else: + expected_samples = int((segment.end - segment.start) * sample_rate) + if cropped.shape[-1] < expected_samples: + pad = torch.zeros(cropped.shape[0], expected_samples - cropped.shape[-1]) + cropped = torch.cat([cropped, pad], dim=-1) return cropped, sample_rate Audio.__call__ = _soundfile_call # type: ignore[assignment]