feat: enhance diarization with optional output of speaker embeddings

- Updated DiarizationPipeline to include a return_embeddings parameter for optional speaker embeddings.
- Modified assign_word_speakers to accept and process speaker embeddings.
- Updated CLI to support --speaker_embeddings flag for JSON output.
- Ensured backward compatibility for existing functionality.
This commit is contained in:
Radu-Sebastian Amarie
2025-03-21 13:57:47 +00:00
committed by Barabazs
parent d700b56c9c
commit 1631c3040f
3 changed files with 79 additions and 11 deletions

View File

@ -44,6 +44,7 @@ def cli():
parser.add_argument("--min_speakers", default=None, type=int, help="Minimum number of speakers to in audio file")
parser.add_argument("--max_speakers", default=None, type=int, help="Maximum number of speakers to in audio file")
parser.add_argument("--diarize_model", default="pyannote/speaker-diarization-3.1", type=str, help="Name of the speaker diarization model to use")
parser.add_argument("--speaker_embeddings", action="store_true", help="Include speaker embeddings in JSON output (only works with --diarize)")
parser.add_argument("--temperature", type=float, default=0, help="temperature to use for sampling")
parser.add_argument("--best_of", type=optional_int, default=5, help="number of candidates when sampling with non-zero temperature")