Merge ae7ea9f4b6 into d700b56c9c

2025-07-01 18:17:27 -04:00 · 2025-06-17 19:02:36 +02:00
parent d700b56c9c ae7ea9f4b6
commit ac1a189e74
1 changed files with 26 additions and 4 deletions
--- a/whisperx/audio.py
+++ b/whisperx/audio.py
@ -58,11 +58,33 @@ def load_audio(file: str, sr: int = SAMPLE_RATE) -> np.ndarray:
            str(sr),
            "-",
        ]
        out = subprocess.run(cmd, capture_output=True, check=True).stdout
    except subprocess.CalledProcessError as e:
        raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
-    return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
+        process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            bufsize=10 * 1024 * 1024
        )
        out = bytearray()
        while True:
            chunk = process.stdout.read(1024 * 1024)
            if not chunk:
                break
            out.extend(chunk)
        stderr_output = process.stderr.read()
        return_code = process.wait()
        if return_code != 0:
            raise RuntimeError(f"FFmpeg process failed with error: {stderr_output.decode()}")
        if len(out) % 2 != 0:
            raise ValueError("Audio buffer size is not aligned to int16.")
        return np.frombuffer(out, np.int16).astype(np.float32) / 32768.0
    except Exception as e:
        raise RuntimeError(f"Error loading audio file {file}: {str(e)}")
 def pad_or_trim(array, length: int = N_SAMPLES, *, axis: int = -1):