From 51da22771fa00232272967e1d077939e1dcb1099 Mon Sep 17 00:00:00 2001 From: Abhishek Sharma <34185691+H4CK3Rabhi@users.noreply.github.com> Date: Wed, 1 Jan 2025 17:37:52 +0530 Subject: [PATCH] feat: add verbose output (#759) --------- Co-authored-by: Abhishek Sharma Co-authored-by: Barabazs <31799121+Barabazs@users.noreply.github.com> --- whisperx/asr.py | 4 +++- whisperx/transcribe.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/whisperx/asr.py b/whisperx/asr.py index e403040..07e5115 100644 --- a/whisperx/asr.py +++ b/whisperx/asr.py @@ -171,7 +171,7 @@ class FasterWhisperPipeline(Pipeline): return final_iterator def transcribe( - self, audio: Union[str, np.ndarray], batch_size=None, num_workers=0, language=None, task=None, chunk_size=30, print_progress = False, combined_progress=False + self, audio: Union[str, np.ndarray], batch_size=None, num_workers=0, language=None, task=None, chunk_size=30, print_progress = False, combined_progress=False, verbose=False ) -> TranscriptionResult: if isinstance(audio, str): audio = load_audio(audio) @@ -223,6 +223,8 @@ class FasterWhisperPipeline(Pipeline): text = out['text'] if batch_size in [0, 1, None]: text = text[0] + if verbose: + print(f"Transcript: [{round(vad_segments[idx]['start'], 3)} --> {round(vad_segments[idx]['end'], 3)}] {text}") segments.append( { "text": text, diff --git a/whisperx/transcribe.py b/whisperx/transcribe.py index edd2764..7f10f5e 100644 --- a/whisperx/transcribe.py +++ b/whisperx/transcribe.py @@ -87,6 +87,7 @@ def cli(): device: str = args.pop("device") device_index: int = args.pop("device_index") compute_type: str = args.pop("compute_type") + verbose: bool = args.pop("verbose") # model_flush: bool = args.pop("model_flush") os.makedirs(output_dir, exist_ok=True) @@ -173,7 +174,7 @@ def cli(): audio = load_audio(audio_path) # >> VAD & ASR print(">>Performing transcription...") - result = model.transcribe(audio, batch_size=batch_size, chunk_size=chunk_size, print_progress=print_progress) + result = model.transcribe(audio, batch_size=batch_size, chunk_size=chunk_size, print_progress=print_progress, verbose=verbose) results.append((result, audio_path)) # Unload Whisper and VAD