From 5dfbfcbdc04e9a502a0c396ba9ac8febcdb2b557 Mon Sep 17 00:00:00 2001 From: pere Date: Tue, 19 Dec 2023 08:48:21 +0100 Subject: [PATCH] =?UTF-8?q?Adding=20Norwegian=20Bokm=C3=A5l=20and=20Norweg?= =?UTF-8?q?ian=20Nynorsk?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding Wav2Vec2-models for Norwegian Bokmål and Norwegian Nynorsk. The models are testet together with WhisperX, and works great. For Bokmål I have added the 1B model, even if I see fairly little difference between that and the 300M model. For Norwegian Nynorsk only a 300M exist.The quality of the Wav2Vec models are also reported here: https://arxiv.org/abs/2307.01672 --- whisperx/alignment.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/whisperx/alignment.py b/whisperx/alignment.py index cd7f8ec..8294983 100644 --- a/whisperx/alignment.py +++ b/whisperx/alignment.py @@ -53,6 +53,8 @@ DEFAULT_ALIGN_MODELS_HF = { "hi": "theainerd/Wav2Vec2-large-xlsr-hindi", "ca": "softcatala/wav2vec2-large-xlsr-catala", "ml": "gvs/wav2vec2-large-xlsr-malayalam", + "no": "NbAiLab/nb-wav2vec2-1b-bokmaal", + "nn": "NbAiLab/nb-wav2vec2-300m-nynorsk", }