diff --git a/whisperx/transcribe.py b/whisperx/transcribe.py index c974a89..aeff625 100644 --- a/whisperx/transcribe.py +++ b/whisperx/transcribe.py @@ -347,7 +347,7 @@ def cli(): parser.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu", help="device to use for PyTorch inference") # alignment params parser.add_argument("--align_model", default="WAV2VEC2_ASR_LARGE_LV60K_960H", choices=available_models(), help="Name of phoneme-level ASR model to do alignment") - parser.add_argument("--align_extend", default=2, type=float, help="Seconds before and after to extend the whisper segments for alignment") + parser.add_argument("--align_extend", default=1, type=float, help="Seconds before and after to extend the whisper segments for alignment") parser.add_argument("--align_from_prev", default=True, type=bool, help="Whether to clip the alignment start time of current segment to the end time of the last aligned word of the previous segment") # parser.add_argument("--align_interpolate_missing", default=True, type=bool, help="Whether to interpolate the timestamp of words not tokenized by the align model, e.g. integers") @@ -368,7 +368,7 @@ def cli(): parser.add_argument("--suppress_tokens", type=str, default="-1", help="comma-separated list of token ids to suppress during sampling; '-1' will suppress most special characters except common punctuations") parser.add_argument("--initial_prompt", type=str, default=None, help="optional text to provide as a prompt for the first window.") - parser.add_argument("--condition_on_previous_text", type=str2bool, default=True, help="if True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop") + parser.add_argument("--condition_on_previous_text", type=str2bool, default=False, help="if True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop") parser.add_argument("--fp16", type=str2bool, default=True, help="whether to perform inference in fp16; True by default") parser.add_argument("--temperature_increment_on_fallback", type=optional_float, default=0.2, help="temperature to increase when falling back when the decoding fails to meet either of the thresholds below")