From 734ecc284456ab54497995572f819e4769fbb60d Mon Sep 17 00:00:00 2001 From: Ahmad Bilal Date: Mon, 17 Jul 2023 19:29:41 +0500 Subject: [PATCH 1/6] Add Urdu model support for alignment --- whisperx/alignment.py | 1 + 1 file changed, 1 insertion(+) diff --git a/whisperx/alignment.py b/whisperx/alignment.py index 8d088be..3ffc3f7 100644 --- a/whisperx/alignment.py +++ b/whisperx/alignment.py @@ -48,6 +48,7 @@ DEFAULT_ALIGN_MODELS_HF = { "he": "imvladikon/wav2vec2-xls-r-300m-hebrew", "vi": 'nguyenvulebinh/wav2vec2-base-vi', "ko": "kresnik/wav2vec2-large-xlsr-korean", + "ur": "kingabzpro/wav2vec2-large-xls-r-300m-Urdu" } From 30eff5a01ff4dc5a883eff84a7e7c82327498137 Mon Sep 17 00:00:00 2001 From: Ahmad Bilal Date: Thu, 20 Jul 2023 02:32:37 +0500 Subject: [PATCH 2/6] Replace double quotes to single for JSON parsing --- whisperx/diarize.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/whisperx/diarize.py b/whisperx/diarize.py index 320d2a4..cf1e5cb 100644 --- a/whisperx/diarize.py +++ b/whisperx/diarize.py @@ -27,6 +27,9 @@ class DiarizationPipeline: def assign_word_speakers(diarize_df, transcript_result, fill_nearest=False): transcript_segments = transcript_result["segments"] for seg in transcript_segments: + if 'text' in seg: + seg["text"] = seg["text"].replace('"', "'") + # assign speaker to segment (if any) diarize_df['intersection'] = np.minimum(diarize_df['end'], seg['end']) - np.maximum(diarize_df['start'], seg['start']) diarize_df['union'] = np.maximum(diarize_df['end'], seg['end']) - np.minimum(diarize_df['start'], seg['start']) @@ -43,6 +46,7 @@ def assign_word_speakers(diarize_df, transcript_result, fill_nearest=False): # assign speaker to words if 'words' in seg: for word in seg['words']: + word = word.replace('"', "'") if 'start' in word: diarize_df['intersection'] = np.minimum(diarize_df['end'], word['end']) - np.maximum(diarize_df['start'], word['start']) diarize_df['union'] = np.maximum(diarize_df['end'], word['end']) - np.minimum(diarize_df['start'], word['start']) From eb712f3999c0d37d4306063c71a473b214f13d18 Mon Sep 17 00:00:00 2001 From: Ahmad Bilal Date: Thu, 20 Jul 2023 02:54:06 +0500 Subject: [PATCH 3/6] Rectify refernce to the word --- whisperx/diarize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whisperx/diarize.py b/whisperx/diarize.py index cf1e5cb..59ca9e2 100644 --- a/whisperx/diarize.py +++ b/whisperx/diarize.py @@ -46,7 +46,7 @@ def assign_word_speakers(diarize_df, transcript_result, fill_nearest=False): # assign speaker to words if 'words' in seg: for word in seg['words']: - word = word.replace('"', "'") + word["word"] = word["word"].replace('"', "'") if 'start' in word: diarize_df['intersection'] = np.minimum(diarize_df['end'], word['end']) - np.maximum(diarize_df['start'], word['start']) diarize_df['union'] = np.maximum(diarize_df['end'], word['end']) - np.minimum(diarize_df['start'], word['start']) From e92325b7eb03583f53f8c6d19d2ce7f7122aa344 Mon Sep 17 00:00:00 2001 From: Ahmad Bilal Date: Thu, 20 Jul 2023 03:19:37 +0500 Subject: [PATCH 4/6] Remove the fix --- whisperx/diarize.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/whisperx/diarize.py b/whisperx/diarize.py index 59ca9e2..b14addf 100644 --- a/whisperx/diarize.py +++ b/whisperx/diarize.py @@ -27,8 +27,6 @@ class DiarizationPipeline: def assign_word_speakers(diarize_df, transcript_result, fill_nearest=False): transcript_segments = transcript_result["segments"] for seg in transcript_segments: - if 'text' in seg: - seg["text"] = seg["text"].replace('"', "'") # assign speaker to segment (if any) diarize_df['intersection'] = np.minimum(diarize_df['end'], seg['end']) - np.maximum(diarize_df['start'], seg['start']) @@ -46,7 +44,7 @@ def assign_word_speakers(diarize_df, transcript_result, fill_nearest=False): # assign speaker to words if 'words' in seg: for word in seg['words']: - word["word"] = word["word"].replace('"', "'") + if 'start' in word: diarize_df['intersection'] = np.minimum(diarize_df['end'], word['end']) - np.maximum(diarize_df['start'], word['start']) diarize_df['union'] = np.maximum(diarize_df['end'], word['end']) - np.minimum(diarize_df['start'], word['start']) From e6ecbaa68fcbb5a5010d42fc77d000eb362a6964 Mon Sep 17 00:00:00 2001 From: Ahmad Bilal Date: Thu, 20 Jul 2023 03:20:47 +0500 Subject: [PATCH 5/6] Remove spacing --- whisperx/diarize.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/whisperx/diarize.py b/whisperx/diarize.py index b14addf..320d2a4 100644 --- a/whisperx/diarize.py +++ b/whisperx/diarize.py @@ -27,7 +27,6 @@ class DiarizationPipeline: def assign_word_speakers(diarize_df, transcript_result, fill_nearest=False): transcript_segments = transcript_result["segments"] for seg in transcript_segments: - # assign speaker to segment (if any) diarize_df['intersection'] = np.minimum(diarize_df['end'], seg['end']) - np.maximum(diarize_df['start'], seg['start']) diarize_df['union'] = np.maximum(diarize_df['end'], seg['end']) - np.minimum(diarize_df['start'], seg['start']) @@ -44,7 +43,6 @@ def assign_word_speakers(diarize_df, transcript_result, fill_nearest=False): # assign speaker to words if 'words' in seg: for word in seg['words']: - if 'start' in word: diarize_df['intersection'] = np.minimum(diarize_df['end'], word['end']) - np.maximum(diarize_df['start'], word['start']) diarize_df['union'] = np.maximum(diarize_df['end'], word['end']) - np.minimum(diarize_df['start'], word['start']) From 8c0fee90d39004adb72acc48b378f3e2d8704a70 Mon Sep 17 00:00:00 2001 From: Max Bain <36994049+m-bain@users.noreply.github.com> Date: Mon, 24 Jul 2023 10:47:41 +0100 Subject: [PATCH 6/6] Update alignment.py --- whisperx/alignment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whisperx/alignment.py b/whisperx/alignment.py index 3ffc3f7..2717bc4 100644 --- a/whisperx/alignment.py +++ b/whisperx/alignment.py @@ -48,7 +48,7 @@ DEFAULT_ALIGN_MODELS_HF = { "he": "imvladikon/wav2vec2-xls-r-300m-hebrew", "vi": 'nguyenvulebinh/wav2vec2-base-vi', "ko": "kresnik/wav2vec2-large-xlsr-korean", - "ur": "kingabzpro/wav2vec2-large-xls-r-300m-Urdu" + "ur": "kingabzpro/wav2vec2-large-xls-r-300m-Urdu", }