From 879a1f3fd6ff5003690daadefa04186ecef6f130 Mon Sep 17 00:00:00 2001
From: Claude <claude@anthropic.com>
Date: Sun, 22 Mar 2026 18:30:40 -0700
Subject: [PATCH] Fix diarization tensor mismatch + fix sidecar build triggers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Diarization: Audio.crop patch now pads short segments with zeros to
match the expected duration. pyannote batches embeddings with vstack
which requires uniform tensor sizes — the last segment of a file can
be shorter than the 10s window.

CI: Reordered sidecar workflow to check for python/ changes FIRST,
before bumping version or configuring git. All subsequent steps are
gated on has_changes. This prevents unnecessary version bumps and
build runs when only app code changes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .gitea/workflows/build-sidecar.yml        | 39 ++++++++++++-----------
 python/voice_to_notes/services/diarize.py | 15 ++++++++-
 2 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/.gitea/workflows/build-sidecar.yml b/.gitea/workflows/build-sidecar.yml
index 27ef4ab..00aff4e 100644
--- a/.gitea/workflows/build-sidecar.yml
+++ b/.gitea/workflows/build-sidecar.yml
@@ -18,14 +18,34 @@ jobs:
     steps:
       - uses: actions/checkout@v4
         with:
-          fetch-depth: 0
+          fetch-depth: 2
+
+      - name: Check for python changes
+        id: check_changes
+        run: |
+          # If triggered by workflow_dispatch, always build
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+          # Check if any python/ files changed in this commit
+          CHANGED=$(git diff --name-only HEAD~1 HEAD -- python/ 2>/dev/null || echo "")
+          if [ -n "$CHANGED" ]; then
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+            echo "Python changes detected: $CHANGED"
+          else
+            echo "has_changes=false" >> $GITHUB_OUTPUT
+            echo "No python/ changes detected, skipping sidecar build"
+          fi
 
       - name: Configure git
+        if: steps.check_changes.outputs.has_changes == 'true'
         run: |
           git config user.name "Gitea Actions"
           git config user.email "actions@gitea.local"
 
       - name: Bump sidecar patch version
+        if: steps.check_changes.outputs.has_changes == 'true'
         id: bump
         run: |
           # Read current version from python/pyproject.toml
@@ -46,23 +66,6 @@ jobs:
           echo "version=${NEW_VERSION}" >> $GITHUB_OUTPUT
           echo "tag=sidecar-v${NEW_VERSION}" >> $GITHUB_OUTPUT
 
-      - name: Check for python changes
-        id: check_changes
-        run: |
-          # If triggered by workflow_dispatch, always build
-          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
-            echo "has_changes=true" >> $GITHUB_OUTPUT
-            exit 0
-          fi
-          # Check if any python/ files changed in this commit
-          CHANGED=$(git diff --name-only HEAD~1 HEAD -- python/ || echo "")
-          if [ -n "$CHANGED" ]; then
-            echo "has_changes=true" >> $GITHUB_OUTPUT
-          else
-            echo "has_changes=false" >> $GITHUB_OUTPUT
-            echo "No python/ changes detected, skipping sidecar build"
-          fi
-
       - name: Commit and tag
         if: steps.check_changes.outputs.has_changes == 'true'
         env:
diff --git a/python/voice_to_notes/services/diarize.py b/python/voice_to_notes/services/diarize.py
index 49a88b9..bd8afb2 100644
--- a/python/voice_to_notes/services/diarize.py
+++ b/python/voice_to_notes/services/diarize.py
@@ -56,7 +56,12 @@ def _patch_pyannote_audio() -> None:
             return _sf_load(file["audio"])
 
         def _soundfile_crop(self, file: dict, segment, **kwargs) -> tuple:
-            """Replacement for Audio.crop — load full file then slice."""
+            """Replacement for Audio.crop — load full file then slice.
+
+            Pads short segments with zeros to match the expected duration,
+            which pyannote requires for batched embedding extraction.
+            """
+            duration = kwargs.get("duration", None)
             waveform, sample_rate = _sf_load(file["audio"])
             # Convert segment (seconds) to sample indices
             start_sample = int(segment.start * sample_rate)
@@ -65,6 +70,14 @@ def _patch_pyannote_audio() -> None:
             start_sample = max(0, start_sample)
             end_sample = min(waveform.shape[-1], end_sample)
             cropped = waveform[:, start_sample:end_sample]
+            # Pad to expected duration if needed (pyannote batches require uniform size)
+            if duration is not None:
+                expected_samples = int(duration * sample_rate)
+            else:
+                expected_samples = int((segment.end - segment.start) * sample_rate)
+            if cropped.shape[-1] < expected_samples:
+                pad = torch.zeros(cropped.shape[0], expected_samples - cropped.shape[-1])
+                cropped = torch.cat([cropped, pad], dim=-1)
             return cropped, sample_rate
 
         Audio.__call__ = _soundfile_call  # type: ignore[assignment]