From bb15d6b68edd9493d0c221d87ca7c72570b890e7 Mon Sep 17 00:00:00 2001
From: Thebys <github@iver.cz>
Date: Fri, 26 May 2023 21:17:01 +0200
Subject: [PATCH] Add Czech alignment model

This PR adds the following Czech alignment model: https://huggingface.co/comodoro/wav2vec2-xls-r-300m-cs-250.

I have successfully tested this with several Czech audio recordings with length of up to 3 hours, and the results are satisfactory.

However, I have received the following warnings and I am not sure how relevant it is:
```
Lightning automatically upgraded your loaded checkpoint from v1.5.4 to v2.0.2. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint --file C:\Users\Thebys\.cache\torch\whisperx-vad-segmentation.bin`
Model was trained with pyannote.audio 0.0.1, yours is 2.1.1. Bad things might happen unless you revert pyannote.audio to 0.x.
Model was trained with torch 1.10.0+cu102, yours is 2.0.0. Bad things might happen unless you revert torch to 1.x.
```
---
 whisperx/alignment.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/whisperx/alignment.py b/whisperx/alignment.py
index 8f84ee5..34153ec 100644
--- a/whisperx/alignment.py
+++ b/whisperx/alignment.py
@@ -33,6 +33,7 @@ DEFAULT_ALIGN_MODELS_HF = {
     "uk": "Yehor/wav2vec2-xls-r-300m-uk-with-small-lm",
     "pt": "jonatasgrosman/wav2vec2-large-xlsr-53-portuguese",
     "ar": "jonatasgrosman/wav2vec2-large-xlsr-53-arabic",
+    "cs": "comodoro/wav2vec2-xls-r-300m-cs-250",
     "ru": "jonatasgrosman/wav2vec2-large-xlsr-53-russian",
     "pl": "jonatasgrosman/wav2vec2-large-xlsr-53-polish",
     "hu": "jonatasgrosman/wav2vec2-large-xlsr-53-hungarian",