Merge pull request #570 from hidenori-endo/main

Drop ffmpeg-python dependency and call ffmpeg directly.
This commit is contained in:
Max Bain
2023-11-09 18:39:53 +00:00
committed by GitHub
2 changed files with 22 additions and 10 deletions

View File

@ -2,7 +2,6 @@ torch>=2
torchaudio>=2 torchaudio>=2
faster-whisper>=0.8 faster-whisper>=0.8
transformers transformers
ffmpeg-python>=0.2
pandas pandas
setuptools>=65 setuptools>=65
nltk nltk

View File

@ -1,8 +1,8 @@
import os import os
import subprocess
from functools import lru_cache from functools import lru_cache
from typing import Optional, Union from typing import Optional, Union
import ffmpeg
import numpy as np import numpy as np
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
@ -40,14 +40,27 @@ def load_audio(file: str, sr: int = SAMPLE_RATE):
A NumPy array containing the audio waveform, in float32 dtype. A NumPy array containing the audio waveform, in float32 dtype.
""" """
try: try:
# This launches a subprocess to decode audio while down-mixing and resampling as necessary. # Launches a subprocess to decode audio while down-mixing and resampling as necessary.
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed. # Requires the ffmpeg CLI to be installed.
out, _ = ( cmd = [
ffmpeg.input(file, threads=0) "ffmpeg",
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr) "-nostdin",
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True) "-threads",
) "0",
except ffmpeg.Error as e: "-i",
file,
"-f",
"s16le",
"-ac",
"1",
"-acodec",
"pcm_s16le",
"-ar",
str(sr),
"-",
]
out = subprocess.run(cmd, capture_output=True, check=True).stdout
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0 return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0