From 65688208c9696ad479f608ab4cd2e66bf789eea8 Mon Sep 17 00:00:00 2001 From: awerks Date: Wed, 16 Aug 2023 16:18:00 +0200 Subject: [PATCH] Update alignment.py --- whisperx/alignment.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/whisperx/alignment.py b/whisperx/alignment.py index bda322c..859c617 100644 --- a/whisperx/alignment.py +++ b/whisperx/alignment.py @@ -98,6 +98,7 @@ def align( device: str, interpolate_method: str = "nearest", return_char_alignments: bool = False, + print_progress = False ) -> AlignedTranscriptionResult: """ Align phoneme recognition predictions to known transcription. @@ -116,9 +117,16 @@ def align( model_lang = align_model_metadata["language"] model_type = align_model_metadata["type"] + total_segments = len(list(transcript)) + transcript = iter(transcript) + # 1. Preprocess to keep only characters in dictionary for sdx, segment in enumerate(transcript): # strip spaces at beginning / end, but keep track of the amount. + if print_progress: + percent_complete = ((sdx + 1) / total_segments) * 100 + print(f"Progress: {percent_complete:.2f}%...") + num_leading = len(segment["text"]) - len(segment["text"].lstrip()) num_trailing = len(segment["text"]) - len(segment["text"].rstrip()) text = segment["text"] @@ -162,15 +170,10 @@ def align( segment["sentence_spans"] = sentence_spans aligned_segments: List[SingleAlignedSegment] = [] - total_segments = len(list(transcript)) - transcript = iter(transcript) # 2. Get prediction matrix from alignment model & align for sdx, segment in enumerate(transcript): - percent_complete = ((sdx + 1) / total_segments) * 100 - print(f"Progress: {percent_complete:.2f}%...") - t1 = segment["start"] t2 = segment["end"] text = segment["text"]