diff --git a/whisperx/transcribe.py b/whisperx/transcribe.py index 9c0c4b5..e8fa47b 100644 --- a/whisperx/transcribe.py +++ b/whisperx/transcribe.py @@ -44,7 +44,7 @@ def cli(): parser.add_argument("--no_align", action='store_true', help="Do not perform phoneme alignment") # vad params - parser.add_argument("--vad_filter", default=True, help="Whether to pre-segment audio with VAD, highly recommended! Produces more accurate alignment + timestamp see WhisperX paper https://arxiv.org/abs/2303.00747") + parser.add_argument("--vad_filter", type=str2bool, default=True, help="Whether to pre-segment audio with VAD, highly recommended! Produces more accurate alignment + timestamp see WhisperX paper https://arxiv.org/abs/2303.00747") parser.add_argument("--vad_onset", type=float, default=0.500, help="Onset threshold for VAD (see pyannote.audio), reduce this if speech is not being detected") parser.add_argument("--vad_offset", type=float, default=0.363, help="Offset threshold for VAD (see pyannote.audio), reduce this if speech is not being detected.")