From 16d24b1c96f5c86037a6eaec6a44675301d10291 Mon Sep 17 00:00:00 2001 From: Max Bain Date: Thu, 26 Jan 2023 10:46:13 +0000 Subject: [PATCH] only pad timestamps if not using VAD --- whisperx/alignment.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/whisperx/alignment.py b/whisperx/alignment.py index ebf084d..d9b1c3e 100644 --- a/whisperx/alignment.py +++ b/whisperx/alignment.py @@ -188,9 +188,11 @@ def align( transcription_cleaned = "".join(clean_char) tokens = [model_dictionary[c] for c in transcription_cleaned] - # pad according original timestamps - t1 = max(segment["start"] - extend_duration, 0) - t2 = min(segment["end"] + extend_duration, MAX_DURATION) + # we only pad if not using VAD filtering + if "seg_text" not in segment: + # pad according original timestamps + t1 = max(segment["start"] - extend_duration, 0) + t2 = min(segment["end"] + extend_duration, MAX_DURATION) # use prev_t2 as current t1 if it"s later if start_from_previous and t1 < prev_t2: