diff --git a/whisperx/asr.py b/whisperx/asr.py index 67b5b4c..623cf72 100644 --- a/whisperx/asr.py +++ b/whisperx/asr.py @@ -258,7 +258,10 @@ class FasterWhisperPipeline(Pipeline): vad_segments = self.vad_model({"waveform": torch.from_numpy(audio).unsqueeze(0), "sample_rate": SAMPLE_RATE}) vad_segments = merge_chunks(vad_segments, 30) - if self.preset_language is None: + if self.preset_language is None or self.preset_language != language: + if self.preset_language is not None and language is not None and self.preset_language != language: + print(f"Preset language '{self.preset_language}' is different from the language {language} passed to the transcribe method.") + print(f"Overriding preset language with {language}.") language = language or self.detect_language(audio) task = task or "transcribe" self.tokenizer = faster_whisper.tokenizer.Tokenizer(self.model.hf_tokenizer,