From 42b4909bc07ccad388acf86818d8c0da8482bc71 Mon Sep 17 00:00:00 2001 From: Max Bain Date: Fri, 26 May 2023 20:36:03 +0100 Subject: [PATCH] fix Unequal Stack Size VAD error --- whisperx/alignment.py | 2 ++ whisperx/vad.py | 10 ++++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/whisperx/alignment.py b/whisperx/alignment.py index 1e22a7b..aade4b4 100644 --- a/whisperx/alignment.py +++ b/whisperx/alignment.py @@ -298,6 +298,8 @@ def align( aligned_subsegments["end"] = interpolate_nans(aligned_subsegments["end"], method=interpolate_method) # concatenate sentences with same timestamps agg_dict = {"text": " ".join, "words": "sum"} + if model_lang in LANGUAGES_WITHOUT_SPACES: + agg_dict["text"] = "".join if return_char_alignments: agg_dict["chars"] = "sum" aligned_subsegments= aligned_subsegments.groupby(["start", "end"], as_index=False).agg(agg_dict) diff --git a/whisperx/vad.py b/whisperx/vad.py index 42b0bfb..a7a2451 100644 --- a/whisperx/vad.py +++ b/whisperx/vad.py @@ -147,8 +147,6 @@ class Binarize: if is_active: curr_duration = t - start if curr_duration > self.max_duration: - # if curr_duration > 15: - # import pdb; pdb.set_trace() search_after = len(curr_scores) // 2 # divide segment min_score_div_idx = search_after + np.argmin(curr_scores[search_after:]) @@ -159,21 +157,21 @@ class Binarize: curr_scores = curr_scores[min_score_div_idx+1:] curr_timestamps = curr_timestamps[min_score_div_idx+1:] # switching from active to inactive - elif y < self.offset: + elif y <= self.offset: region = Segment(start - self.pad_onset, t + self.pad_offset) active[region, k] = label start = t is_active = False curr_scores = [] curr_timestamps = [] + curr_scores.append(y) + curr_timestamps.append(t) # currently inactive else: # switching from inactive to active - if y > self.onset: + if y >= self.onset: start = t is_active = True - curr_scores.append(y) - curr_timestamps.append(t) # if active at the end, add final region if is_active: