feat: add verbose output (#759)

--------- Co-authored-by: Abhishek Sharma <abhishek@zipteams.com> Co-authored-by: Barabazs <31799121+Barabazs@users.noreply.github.com>
2025-07-01 18:17:27 -04:00 · 2025-01-01 17:37:52 +05:30
parent 15ad5bf7df
commit 51da22771f
2 changed files with 5 additions and 2 deletions
--- a/whisperx/asr.py
+++ b/whisperx/asr.py
@ -171,7 +171,7 @@ class FasterWhisperPipeline(Pipeline):
        return final_iterator

    def transcribe(
-        self, audio: Union[str, np.ndarray], batch_size=None, num_workers=0, language=None, task=None, chunk_size=30, print_progress = False, combined_progress=False
+        self, audio: Union[str, np.ndarray], batch_size=None, num_workers=0, language=None, task=None, chunk_size=30, print_progress = False, combined_progress=False, verbose=False
    ) -> TranscriptionResult:
        if isinstance(audio, str):
            audio = load_audio(audio)
@ -223,6 +223,8 @@ class FasterWhisperPipeline(Pipeline):
            text = out['text']
            if batch_size in [0, 1, None]:
                text = text[0]
+            if verbose:
+                print(f"Transcript: [{round(vad_segments[idx]['start'], 3)} --> {round(vad_segments[idx]['end'], 3)}] {text}")
            segments.append(
                {
                    "text": text,
--- a/whisperx/transcribe.py
+++ b/whisperx/transcribe.py
@ -87,6 +87,7 @@ def cli():
    device: str = args.pop("device")
    device_index: int = args.pop("device_index")
    compute_type: str = args.pop("compute_type")
+    verbose: bool = args.pop("verbose")

    # model_flush: bool = args.pop("model_flush")
    os.makedirs(output_dir, exist_ok=True)
@ -173,7 +174,7 @@ def cli():
        audio = load_audio(audio_path)
        # >> VAD & ASR
        print(">>Performing transcription...")
-        result = model.transcribe(audio, batch_size=batch_size, chunk_size=chunk_size, print_progress=print_progress)
+        result = model.transcribe(audio, batch_size=batch_size, chunk_size=chunk_size, print_progress=print_progress, verbose=verbose)
        results.append((result, audio_path))

    # Unload Whisper and VAD