mirror of
https://github.com/m-bain/whisperX.git
synced 2025-07-01 18:17:27 -04:00
Merge pull request #281 from m-bain/v3
fix Unequal Stack Size VAD error
This commit is contained in:
@ -301,6 +301,8 @@ def align(
|
|||||||
aligned_subsegments["end"] = interpolate_nans(aligned_subsegments["end"], method=interpolate_method)
|
aligned_subsegments["end"] = interpolate_nans(aligned_subsegments["end"], method=interpolate_method)
|
||||||
# concatenate sentences with same timestamps
|
# concatenate sentences with same timestamps
|
||||||
agg_dict = {"text": " ".join, "words": "sum"}
|
agg_dict = {"text": " ".join, "words": "sum"}
|
||||||
|
if model_lang in LANGUAGES_WITHOUT_SPACES:
|
||||||
|
agg_dict["text"] = "".join
|
||||||
if return_char_alignments:
|
if return_char_alignments:
|
||||||
agg_dict["chars"] = "sum"
|
agg_dict["chars"] = "sum"
|
||||||
aligned_subsegments= aligned_subsegments.groupby(["start", "end"], as_index=False).agg(agg_dict)
|
aligned_subsegments= aligned_subsegments.groupby(["start", "end"], as_index=False).agg(agg_dict)
|
||||||
|
@ -147,8 +147,6 @@ class Binarize:
|
|||||||
if is_active:
|
if is_active:
|
||||||
curr_duration = t - start
|
curr_duration = t - start
|
||||||
if curr_duration > self.max_duration:
|
if curr_duration > self.max_duration:
|
||||||
# if curr_duration > 15:
|
|
||||||
# import pdb; pdb.set_trace()
|
|
||||||
search_after = len(curr_scores) // 2
|
search_after = len(curr_scores) // 2
|
||||||
# divide segment
|
# divide segment
|
||||||
min_score_div_idx = search_after + np.argmin(curr_scores[search_after:])
|
min_score_div_idx = search_after + np.argmin(curr_scores[search_after:])
|
||||||
@ -159,21 +157,21 @@ class Binarize:
|
|||||||
curr_scores = curr_scores[min_score_div_idx+1:]
|
curr_scores = curr_scores[min_score_div_idx+1:]
|
||||||
curr_timestamps = curr_timestamps[min_score_div_idx+1:]
|
curr_timestamps = curr_timestamps[min_score_div_idx+1:]
|
||||||
# switching from active to inactive
|
# switching from active to inactive
|
||||||
elif y < self.offset:
|
elif y <= self.offset:
|
||||||
region = Segment(start - self.pad_onset, t + self.pad_offset)
|
region = Segment(start - self.pad_onset, t + self.pad_offset)
|
||||||
active[region, k] = label
|
active[region, k] = label
|
||||||
start = t
|
start = t
|
||||||
is_active = False
|
is_active = False
|
||||||
curr_scores = []
|
curr_scores = []
|
||||||
curr_timestamps = []
|
curr_timestamps = []
|
||||||
|
curr_scores.append(y)
|
||||||
|
curr_timestamps.append(t)
|
||||||
# currently inactive
|
# currently inactive
|
||||||
else:
|
else:
|
||||||
# switching from inactive to active
|
# switching from inactive to active
|
||||||
if y > self.onset:
|
if y >= self.onset:
|
||||||
start = t
|
start = t
|
||||||
is_active = True
|
is_active = True
|
||||||
curr_scores.append(y)
|
|
||||||
curr_timestamps.append(t)
|
|
||||||
|
|
||||||
# if active at the end, add final region
|
# if active at the end, add final region
|
||||||
if is_active:
|
if is_active:
|
||||||
|
Reference in New Issue
Block a user