From 715435db4284c1e73caf284662c131542a938bb9 Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 20 May 2023 15:42:21 +0200 Subject: [PATCH] add tokenizer is None case --- whisperx/asr.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/whisperx/asr.py b/whisperx/asr.py index 2fab8bc..b4035e5 100644 --- a/whisperx/asr.py +++ b/whisperx/asr.py @@ -228,9 +228,12 @@ class FasterWhisperPipeline(Pipeline): vad_segments = self.vad_model({"waveform": torch.from_numpy(audio).unsqueeze(0), "sample_rate": SAMPLE_RATE}) vad_segments = merge_chunks(vad_segments, 30) - - language = language or self.tokenizer.language_code - task = task or self.tokenizer.task + if self.tokenizer is None: + language = language or self.detect_language(audio) + task = task or "transcribe" + else: + language = language or self.tokenizer.language_code + task = task or self.tokenizer.task if task != self.tokenizer.task or language != self.tokenizer.language_code: self.tokenizer = faster_whisper.tokenizer.Tokenizer(self.model.hf_tokenizer, self.model.model.is_multilingual, task=task,