Merge 399010fd12 into d700b56c9c

Revert "docs: add troubleshooting section for libcudnn dependencies in README"
This reverts commit 6fe0a8784a. Revert the commit now that the issue is fixed. Signed-off-by: CHEN, CHUN <jim60105@gmail.com>
2025-07-01 18:17:27 -04:00 · 2025-06-13 16:23:14 +00:00 · 2025-06-14 00:22:57 +08:00 · 2025-06-14 00:21:53 +08:00 · 2025-06-14 00:21:53 +08:00 · 2025-06-08 03:34:49 -06:00
8 changed files with 1780 additions and 1701 deletions
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@ -17,6 +17,9 @@ jobs:
          version: "0.5.14"
          python-version: "3.9"

+      - name: Check if lockfile is up to date
+        run: uv lock --check
+
      - name: Build package
        run: uv build

--- a/.github/workflows/python-compatibility.yml
+++ b/.github/workflows/python-compatibility.yml
@ -23,6 +23,9 @@ jobs:
          version: "0.5.14"
          python-version: ${{ matrix.python-version }}

+      - name: Check if lockfile is up to date
+        run: uv lock --check
+
      - name: Install the project
        run: uv sync --all-extras

--- a/README.md
+++ b/README.md
@ -170,7 +170,7 @@ result = model.transcribe(audio, batch_size=batch_size)
 print(result["segments"]) # before alignment

 # delete model if low on GPU resources
-# import gc; gc.collect(); torch.cuda.empty_cache(); del model
+# import gc; import torch; gc.collect(); torch.cuda.empty_cache(); del model

 # 2. Align whisper output
 model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
@ -179,7 +179,7 @@ result = whisperx.align(result["segments"], model_a, metadata, audio, device, re
 print(result["segments"]) # after alignment

 # delete model if low on GPU resources
-# import gc; gc.collect(); torch.cuda.empty_cache(); del model_a
+# import gc; import torch; gc.collect(); torch.cuda.empty_cache(); del model_a

 # 3. Assign speaker labels
 diarize_model = whisperx.diarize.DiarizationPipeline(use_auth_token=YOUR_HF_TOKEN, device=device)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -13,11 +13,11 @@ dependencies = [
    "faster-whisper>=1.1.1",
    "nltk>=3.9.1",
    "numpy>=2.0.2",
-    "onnxruntime>=1.19",
+    "onnxruntime>=1.19,<1.20.0",
    "pandas>=2.2.3",
    "pyannote-audio>=3.3.2",
-    "torch>=2.5.1",
-    "torchaudio>=2.5.1",
+    "torch<2.4.0",
+    "torchaudio",
    "transformers>=4.48.0",
 ]

--- a/uv.lock
+++ b/uv.lock
--- a/whisperx/main.py
+++ b/whisperx/main.py
@ -43,6 +43,7 @@ def cli():
    parser.add_argument("--diarize", action="store_true", help="Apply diarization to assign speaker labels to each segment/word")
    parser.add_argument("--min_speakers", default=None, type=int, help="Minimum number of speakers to in audio file")
    parser.add_argument("--max_speakers", default=None, type=int, help="Maximum number of speakers to in audio file")
+    parser.add_argument("--diarize_model", default="pyannote/speaker-diarization-3.1", type=str, help="Name of the speaker diarization model to use")

    parser.add_argument("--temperature", type=float, default=0, help="temperature to use for sampling")
    parser.add_argument("--best_of", type=optional_int, default=5, help="number of candidates when sampling with non-zero temperature")
--- a/whisperx/diarize.py
+++ b/whisperx/diarize.py
@ -11,13 +11,14 @@ from whisperx.types import TranscriptionResult, AlignedTranscriptionResult
 class DiarizationPipeline:
    def __init__(
        self,
-        model_name="pyannote/speaker-diarization-3.1",
+        model_name=None,
        use_auth_token=None,
        device: Optional[Union[str, torch.device]] = "cpu",
    ):
        if isinstance(device, str):
            device = torch.device(device)
-        self.model = Pipeline.from_pretrained(model_name, use_auth_token=use_auth_token).to(device)
+        model_config = model_name or "pyannote/speaker-diarization-3.1"
+        self.model = Pipeline.from_pretrained(model_config, use_auth_token=use_auth_token).to(device)

    def __call__(
        self,
--- a/whisperx/transcribe.py
+++ b/whisperx/transcribe.py
@ -57,6 +57,7 @@ def transcribe_task(args: dict, parser: argparse.ArgumentParser):
    diarize: bool = args.pop("diarize")
    min_speakers: int = args.pop("min_speakers")
    max_speakers: int = args.pop("max_speakers")
+    diarize_model_name: str = args.pop("diarize_model")
    print_progress: bool = args.pop("print_progress")

    if args["language"] is not None:
@ -204,8 +205,9 @@ def transcribe_task(args: dict, parser: argparse.ArgumentParser):
            )
        tmp_results = results
        print(">>Performing diarization...")
+        print(">>Using model:", diarize_model_name)
        results = []
-        diarize_model = DiarizationPipeline(use_auth_token=hf_token, device=device)
+        diarize_model = DiarizationPipeline(model_name=diarize_model_name, use_auth_token=hf_token, device=device)
        for result, input_audio_path in tmp_results:
            diarize_segments = diarize_model(
                input_audio_path, min_speakers=min_speakers, max_speakers=max_speakers
Author	SHA1	Message	Date
陳鈞	b94778fd60	Merge `399010fd12` into `d700b56c9c`	2025-06-13 16:23:14 +00:00
CHEN, CHUN	399010fd12	Revert "docs: add troubleshooting section for libcudnn dependencies in README" This reverts commit `6fe0a8784a`. Revert the commit now that the issue is fixed. Signed-off-by: CHEN, CHUN <jim60105@gmail.com>	2025-06-14 00:22:57 +08:00
CHEN, CHUN	d3dcb1175f	chore: restrict onnxruntime to version 1.19 for python 3.9 compatibility - Restrict the onnxruntime dependency to versions >=1.19 and <1.20.0 to avoid potential compatibility issues. Signed-off-by: CHEN, CHUN <jim60105@gmail.com>	2025-06-14 00:21:53 +08:00
CHEN, CHUN	4f99f1f67c	chore: restrict torch version to below 2.4 in dependencies torch depends on libcudnn9 from version 2.4.0 onward. If we restrict torch<2.4.0, there is no need to manually install libcudnn8 and also save about 1GB disk space. - Update torch dependency to be below version 2.4.0 instead of at least 2.5.1 - Change torchaudio dependency to have no minimum version specified Signed-off-by: CHEN, CHUN <jim60105@gmail.com>	2025-06-14 00:21:53 +08:00
Kirill	d700b56c9c	docs: add missing torch import to Python usage example in README	2025-06-08 03:34:49 -06:00
bog	b343241253	feat: add diarize_model arg to CLI (#1101 )	2025-05-31 13:32:31 +02:00
Barabazs	6fe0a8784a	docs: add troubleshooting section for libcudnn dependencies in README	2025-05-31 05:20:06 -06:00
Barabazs	5012650d0f	chore: update lockfile	2025-05-03 16:25:43 +02:00
Barabazs	108bd0c400	chore: add lockfile check step to CI workflows	2025-05-03 16:25:43 +02:00