From a1c705b3a75a0582733109136b6013e652e14464 Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 20 May 2023 15:52:45 +0200 Subject: [PATCH] fix tokenizer is None --- whisperx/asr.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/whisperx/asr.py b/whisperx/asr.py index b4035e5..9b1e450 100644 --- a/whisperx/asr.py +++ b/whisperx/asr.py @@ -231,13 +231,16 @@ class FasterWhisperPipeline(Pipeline): if self.tokenizer is None: language = language or self.detect_language(audio) task = task or "transcribe" - else: - language = language or self.tokenizer.language_code - task = task or self.tokenizer.task - if task != self.tokenizer.task or language != self.tokenizer.language_code: self.tokenizer = faster_whisper.tokenizer.Tokenizer(self.model.hf_tokenizer, self.model.model.is_multilingual, task=task, language=language) + else: + language = language or self.tokenizer.language_code + task = task or self.tokenizer.task + if task != self.tokenizer.task or language != self.tokenizer.language_code: + self.tokenizer = faster_whisper.tokenizer.Tokenizer(self.model.hf_tokenizer, + self.model.model.is_multilingual, task=task, + language=language) segments: List[SingleSegment] = [] batch_size = batch_size or self._batch_size