From 51da22771fa00232272967e1d077939e1dcb1099 Mon Sep 17 00:00:00 2001
From: Abhishek Sharma <34185691+H4CK3Rabhi@users.noreply.github.com>
Date: Wed, 1 Jan 2025 17:37:52 +0530
Subject: [PATCH] feat: add verbose output (#759)

---------

Co-authored-by: Abhishek Sharma <abhishek@zipteams.com>
Co-authored-by: Barabazs <31799121+Barabazs@users.noreply.github.com>
---
 whisperx/asr.py        | 4 +++-
 whisperx/transcribe.py | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/whisperx/asr.py b/whisperx/asr.py
index e403040..07e5115 100644
--- a/whisperx/asr.py
+++ b/whisperx/asr.py
@@ -171,7 +171,7 @@ class FasterWhisperPipeline(Pipeline):
         return final_iterator
 
     def transcribe(
-        self, audio: Union[str, np.ndarray], batch_size=None, num_workers=0, language=None, task=None, chunk_size=30, print_progress = False, combined_progress=False
+        self, audio: Union[str, np.ndarray], batch_size=None, num_workers=0, language=None, task=None, chunk_size=30, print_progress = False, combined_progress=False, verbose=False
     ) -> TranscriptionResult:
         if isinstance(audio, str):
             audio = load_audio(audio)
@@ -223,6 +223,8 @@ class FasterWhisperPipeline(Pipeline):
             text = out['text']
             if batch_size in [0, 1, None]:
                 text = text[0]
+            if verbose:
+                print(f"Transcript: [{round(vad_segments[idx]['start'], 3)} --> {round(vad_segments[idx]['end'], 3)}] {text}")
             segments.append(
                 {
                     "text": text,
diff --git a/whisperx/transcribe.py b/whisperx/transcribe.py
index edd2764..7f10f5e 100644
--- a/whisperx/transcribe.py
+++ b/whisperx/transcribe.py
@@ -87,6 +87,7 @@ def cli():
     device: str = args.pop("device")
     device_index: int = args.pop("device_index")
     compute_type: str = args.pop("compute_type")
+    verbose: bool = args.pop("verbose")
 
     # model_flush: bool = args.pop("model_flush")
     os.makedirs(output_dir, exist_ok=True)
@@ -173,7 +174,7 @@ def cli():
         audio = load_audio(audio_path)
         # >> VAD & ASR
         print(">>Performing transcription...")
-        result = model.transcribe(audio, batch_size=batch_size, chunk_size=chunk_size, print_progress=print_progress)
+        result = model.transcribe(audio, batch_size=batch_size, chunk_size=chunk_size, print_progress=print_progress, verbose=verbose)
         results.append((result, audio_path))
 
     # Unload Whisper and VAD