1 Commits

Author SHA1 Message Date
ea79d7e972 Merge c72c627d10 into b343241253 2025-06-03 10:43:51 -04:00

View File

@ -189,7 +189,7 @@ result = model.transcribe(audio, batch_size=batch_size)
print(result["segments"]) # before alignment print(result["segments"]) # before alignment
# delete model if low on GPU resources # delete model if low on GPU resources
# import gc; import torch; gc.collect(); torch.cuda.empty_cache(); del model # import gc; gc.collect(); torch.cuda.empty_cache(); del model
# 2. Align whisper output # 2. Align whisper output
model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device) model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
@ -198,7 +198,7 @@ result = whisperx.align(result["segments"], model_a, metadata, audio, device, re
print(result["segments"]) # after alignment print(result["segments"]) # after alignment
# delete model if low on GPU resources # delete model if low on GPU resources
# import gc; import torch; gc.collect(); torch.cuda.empty_cache(); del model_a # import gc; gc.collect(); torch.cuda.empty_cache(); del model_a
# 3. Assign speaker labels # 3. Assign speaker labels
diarize_model = whisperx.diarize.DiarizationPipeline(use_auth_token=YOUR_HF_TOKEN, device=device) diarize_model = whisperx.diarize.DiarizationPipeline(use_auth_token=YOUR_HF_TOKEN, device=device)