mirror of
https://github.com/m-bain/whisperX.git
synced 2025-07-01 18:17:27 -04:00
Enable Hebrew support
This commit is contained in:
@ -38,6 +38,7 @@ DEFAULT_ALIGN_MODELS_HF = {
|
|||||||
"fa": "jonatasgrosman/wav2vec2-large-xlsr-53-persian",
|
"fa": "jonatasgrosman/wav2vec2-large-xlsr-53-persian",
|
||||||
"el": "jonatasgrosman/wav2vec2-large-xlsr-53-greek",
|
"el": "jonatasgrosman/wav2vec2-large-xlsr-53-greek",
|
||||||
"tr": "mpoyraz/wav2vec2-xls-r-300m-cv7-turkish",
|
"tr": "mpoyraz/wav2vec2-xls-r-300m-cv7-turkish",
|
||||||
|
"he": "imvladikon/wav2vec2-xls-r-300m-hebrew",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -231,8 +232,13 @@ def align(
|
|||||||
|
|
||||||
emission = emissions[0].cpu().detach()
|
emission = emissions[0].cpu().detach()
|
||||||
|
|
||||||
trellis = get_trellis(emission, tokens)
|
blank_id = 0
|
||||||
path = backtrack(trellis, emission, tokens)
|
for char, code in model_dictionary.items():
|
||||||
|
if char == '[pad]' or char == '<pad>':
|
||||||
|
blank_id = code
|
||||||
|
|
||||||
|
trellis = get_trellis(emission, tokens, blank_id)
|
||||||
|
path = backtrack(trellis, emission, tokens, blank_id)
|
||||||
if path is None:
|
if path is None:
|
||||||
print(f'Failed to align segment ("{segment["text"]}"): backtrack failed, resorting to original...')
|
print(f'Failed to align segment ("{segment["text"]}"): backtrack failed, resorting to original...')
|
||||||
break
|
break
|
||||||
|
Reference in New Issue
Block a user