diff --git a/whisperx/diarize.py b/whisperx/diarize.py index 320d2a4..cf1e5cb 100644 --- a/whisperx/diarize.py +++ b/whisperx/diarize.py @@ -27,6 +27,9 @@ class DiarizationPipeline: def assign_word_speakers(diarize_df, transcript_result, fill_nearest=False): transcript_segments = transcript_result["segments"] for seg in transcript_segments: + if 'text' in seg: + seg["text"] = seg["text"].replace('"', "'") + # assign speaker to segment (if any) diarize_df['intersection'] = np.minimum(diarize_df['end'], seg['end']) - np.maximum(diarize_df['start'], seg['start']) diarize_df['union'] = np.maximum(diarize_df['end'], seg['end']) - np.minimum(diarize_df['start'], seg['start']) @@ -43,6 +46,7 @@ def assign_word_speakers(diarize_df, transcript_result, fill_nearest=False): # assign speaker to words if 'words' in seg: for word in seg['words']: + word = word.replace('"', "'") if 'start' in word: diarize_df['intersection'] = np.minimum(diarize_df['end'], word['end']) - np.maximum(diarize_df['start'], word['start']) diarize_df['union'] = np.maximum(diarize_df['end'], word['end']) - np.minimum(diarize_df['start'], word['start'])