mirror of
https://github.com/m-bain/whisperX.git
synced 2025-07-01 18:17:27 -04:00
fix issue by resetting tokenizer
This commit is contained in:
@ -258,10 +258,7 @@ class FasterWhisperPipeline(Pipeline):
|
||||
|
||||
vad_segments = self.vad_model({"waveform": torch.from_numpy(audio).unsqueeze(0), "sample_rate": SAMPLE_RATE})
|
||||
vad_segments = merge_chunks(vad_segments, 30)
|
||||
if self.preset_language is None or self.preset_language != language:
|
||||
if self.preset_language is not None and language is not None and self.preset_language != language:
|
||||
print(f"Preset language '{self.preset_language}' is different from the language {language} passed to the transcribe method.")
|
||||
print(f"Overriding preset language with {language}.")
|
||||
if self.tokenizer is None:
|
||||
language = language or self.detect_language(audio)
|
||||
task = task or "transcribe"
|
||||
self.tokenizer = faster_whisper.tokenizer.Tokenizer(self.model.hf_tokenizer,
|
||||
@ -289,6 +286,9 @@ class FasterWhisperPipeline(Pipeline):
|
||||
}
|
||||
)
|
||||
|
||||
if self.preset_language is None:
|
||||
self.tokenizer = None
|
||||
|
||||
return {"segments": segments, "language": language}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user