mirror of
https://github.com/m-bain/whisperX.git
synced 2025-07-01 18:17:27 -04:00
fix errors when recognizing non-japanese audios
This commit is contained in:
@ -282,8 +282,12 @@ def align(
|
|||||||
f2 = int(t2 * SAMPLE_RATE)
|
f2 = int(t2 * SAMPLE_RATE)
|
||||||
|
|
||||||
waveform_segment = audio[:, f1:f2]
|
waveform_segment = audio[:, f1:f2]
|
||||||
|
print(language)
|
||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
emissions = model(waveform_segment.to(device)).logits
|
if language != 'ja':
|
||||||
|
emissions, _ = model(waveform_segment.to(device))
|
||||||
|
else:
|
||||||
|
emissions = model(waveform_segment.to(device)).logits
|
||||||
emissions = torch.log_softmax(emissions, dim=-1)
|
emissions = torch.log_softmax(emissions, dim=-1)
|
||||||
emission = emissions[0].cpu().detach()
|
emission = emissions[0].cpu().detach()
|
||||||
transcription = segment['text'].strip()
|
transcription = segment['text'].strip()
|
||||||
|
Reference in New Issue
Block a user