Merge pull request #527 from jkukul/pass-beam-size-to-fast-whisper

2025-07-01 18:17:27 -04:00 · 2023-10-15 07:15:13 -07:00
parent 4cdce3b927 14a7cab8eb
commit a150df4310
2 changed files with 3 additions and 1 deletions
--- a/whisperx/asr.py
+++ b/whisperx/asr.py
@ -145,6 +145,8 @@ class WhisperModel(faster_whisper.WhisperModel):
        result = self.model.generate(
                encoder_output,
                [prompt] * batch_size,
+                beam_size=options.beam_size,
+                patience=options.patience,
                length_penalty=options.length_penalty,
                max_length=self.max_length,
                suppress_blank=options.suppress_blank,
--- a/whisperx/transcribe.py
+++ b/whisperx/transcribe.py
@ -51,7 +51,7 @@ def cli():
    parser.add_argument("--temperature", type=float, default=0, help="temperature to use for sampling")
    parser.add_argument("--best_of", type=optional_int, default=5, help="number of candidates when sampling with non-zero temperature")
    parser.add_argument("--beam_size", type=optional_int, default=5, help="number of beams in beam search, only applicable when temperature is zero")
-    parser.add_argument("--patience", type=float, default=None, help="optional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the default (1.0) is equivalent to conventional beam search")
+    parser.add_argument("--patience", type=float, default=1.0, help="optional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the default (1.0) is equivalent to conventional beam search")
    parser.add_argument("--length_penalty", type=float, default=1.0, help="optional token length penalty coefficient (alpha) as in https://arxiv.org/abs/1609.08144, uses simple length normalization by default")

    parser.add_argument("--suppress_tokens", type=str, default="-1", help="comma-separated list of token ids to suppress during sampling; '-1' will suppress most special characters except common punctuations")