mirror of
https://github.com/m-bain/whisperX.git
synced 2025-07-01 18:17:27 -04:00
feat: enhance diarization with optional output of speaker embeddings
- Updated DiarizationPipeline to include a return_embeddings parameter for optional speaker embeddings. - Modified assign_word_speakers to accept and process speaker embeddings. - Updated CLI to support --speaker_embeddings flag for JSON output. - Ensured backward compatibility for existing functionality.
This commit is contained in:

committed by
Barabazs

parent
d700b56c9c
commit
1631c3040f
@ -44,6 +44,7 @@ def cli():
|
||||
parser.add_argument("--min_speakers", default=None, type=int, help="Minimum number of speakers to in audio file")
|
||||
parser.add_argument("--max_speakers", default=None, type=int, help="Maximum number of speakers to in audio file")
|
||||
parser.add_argument("--diarize_model", default="pyannote/speaker-diarization-3.1", type=str, help="Name of the speaker diarization model to use")
|
||||
parser.add_argument("--speaker_embeddings", action="store_true", help="Include speaker embeddings in JSON output (only works with --diarize)")
|
||||
|
||||
parser.add_argument("--temperature", type=float, default=0, help="temperature to use for sampling")
|
||||
parser.add_argument("--best_of", type=optional_int, default=5, help="number of candidates when sampling with non-zero temperature")
|
||||
|
Reference in New Issue
Block a user