mirror of
https://github.com/m-bain/whisperX.git
synced 2025-07-01 18:17:27 -04:00
Pad language detection if audio is too short
This commit is contained in:
@ -251,7 +251,10 @@ class FasterWhisperPipeline(Pipeline):
|
|||||||
|
|
||||||
|
|
||||||
def detect_language(self, audio: np.ndarray):
|
def detect_language(self, audio: np.ndarray):
|
||||||
segment = log_mel_spectrogram(audio[: N_SAMPLES], padding=0)
|
if audio.shape[0] < N_SAMPLES:
|
||||||
|
print("Warning: audio is shorter than 30s, language detection may be inaccurate.")
|
||||||
|
segment = log_mel_spectrogram(audio[: N_SAMPLES],
|
||||||
|
padding=0 if audio.shape[0] >= N_SAMPLES else N_SAMPLES - audio.shape[0])
|
||||||
encoder_output = self.model.encode(segment)
|
encoder_output = self.model.encode(segment)
|
||||||
results = self.model.model.detect_language(encoder_output)
|
results = self.model.model.detect_language(encoder_output)
|
||||||
language_token, language_probability = results[0][0]
|
language_token, language_probability = results[0][0]
|
||||||
|
Reference in New Issue
Block a user