Merge pull request #281 from m-bain/v3

fix Unequal Stack Size VAD error
2025-07-01 18:17:27 -04:00 · 2023-05-26 20:37:57 +01:00
parent 17e2f7f859 42b4909bc0
commit bc8a03881a
2 changed files with 6 additions and 6 deletions
--- a/whisperx/alignment.py
+++ b/whisperx/alignment.py
@ -301,6 +301,8 @@ def align(
        aligned_subsegments["end"] = interpolate_nans(aligned_subsegments["end"], method=interpolate_method)
        # concatenate sentences with same timestamps
        agg_dict = {"text": " ".join, "words": "sum"}
        if model_lang in LANGUAGES_WITHOUT_SPACES:
            agg_dict["text"] = "".join
        if return_char_alignments:
            agg_dict["chars"] = "sum"
        aligned_subsegments= aligned_subsegments.groupby(["start", "end"], as_index=False).agg(agg_dict)
--- a/whisperx/vad.py
+++ b/whisperx/vad.py
@ -147,8 +147,6 @@ class Binarize:
                if is_active: 
                    curr_duration = t - start
                    if curr_duration > self.max_duration:
                        # if curr_duration > 15:
                            # import pdb; pdb.set_trace()
                        search_after = len(curr_scores) // 2
                        # divide segment
                        min_score_div_idx = search_after + np.argmin(curr_scores[search_after:])
@ -159,21 +157,21 @@ class Binarize:
                        curr_scores = curr_scores[min_score_div_idx+1:]
                        curr_timestamps = curr_timestamps[min_score_div_idx+1:]
                    # switching from active to inactive
-                    elif y < self.offset:
+                    elif y <= self.offset:
                        region = Segment(start - self.pad_onset, t + self.pad_offset)
                        active[region, k] = label
                        start = t
                        is_active = False
                        curr_scores = []
                        curr_timestamps = []
                    curr_scores.append(y)
                    curr_timestamps.append(t)
                # currently inactive
                else:
                    # switching from inactive to active
-                    if y > self.onset:
+                    if y >= self.onset:
                        start = t
                        is_active = True
                curr_scores.append(y)
                curr_timestamps.append(t)
            # if active at the end, add final region
            if is_active: