Add torch compile to log mel spectrogram

2025-07-01 18:17:27 -04:00 · 2023-05-03 23:17:44 +02:00
parent 2a6830492c
commit 2d59eb9726
2 changed files with 19 additions and 30 deletions
--- a/whisperx/asr.py
+++ b/whisperx/asr.py
@ -181,6 +181,9 @@ class FasterWhisperPipeline(Pipeline):

    def preprocess(self, audio):
        audio = audio['inputs']
+        if isinstance(audio, np.ndarray):
+            audio = torch.from_numpy(audio)
+
        features = log_mel_spectrogram(audio, padding=N_SAMPLES - audio.shape[0])
        return {'inputs': features}

@ -253,7 +256,7 @@ class FasterWhisperPipeline(Pipeline):
    def detect_language(self, audio: np.ndarray):
        if audio.shape[0] < N_SAMPLES:
            print("Warning: audio is shorter than 30s, language detection may be inaccurate.")
-        segment = log_mel_spectrogram(audio[: N_SAMPLES],
+        segment = log_mel_spectrogram(torch.from_numpy(audio[:N_SAMPLES]),
                                      padding=0 if audio.shape[0] >= N_SAMPLES else N_SAMPLES - audio.shape[0])
        encoder_output = self.model.encode(segment)
        results = self.model.model.detect_language(encoder_output)