Replace double quotes to single for JSON parsing

This commit is contained in:
Ahmad Bilal
2023-07-20 02:32:37 +05:00
parent 734ecc2844
commit 30eff5a01f

View File

@ -27,6 +27,9 @@ class DiarizationPipeline:
def assign_word_speakers(diarize_df, transcript_result, fill_nearest=False): def assign_word_speakers(diarize_df, transcript_result, fill_nearest=False):
transcript_segments = transcript_result["segments"] transcript_segments = transcript_result["segments"]
for seg in transcript_segments: for seg in transcript_segments:
if 'text' in seg:
seg["text"] = seg["text"].replace('"', "'")
# assign speaker to segment (if any) # assign speaker to segment (if any)
diarize_df['intersection'] = np.minimum(diarize_df['end'], seg['end']) - np.maximum(diarize_df['start'], seg['start']) diarize_df['intersection'] = np.minimum(diarize_df['end'], seg['end']) - np.maximum(diarize_df['start'], seg['start'])
diarize_df['union'] = np.maximum(diarize_df['end'], seg['end']) - np.minimum(diarize_df['start'], seg['start']) diarize_df['union'] = np.maximum(diarize_df['end'], seg['end']) - np.minimum(diarize_df['start'], seg['start'])
@ -43,6 +46,7 @@ def assign_word_speakers(diarize_df, transcript_result, fill_nearest=False):
# assign speaker to words # assign speaker to words
if 'words' in seg: if 'words' in seg:
for word in seg['words']: for word in seg['words']:
word = word.replace('"', "'")
if 'start' in word: if 'start' in word:
diarize_df['intersection'] = np.minimum(diarize_df['end'], word['end']) - np.maximum(diarize_df['start'], word['start']) diarize_df['intersection'] = np.minimum(diarize_df['end'], word['end']) - np.maximum(diarize_df['start'], word['start'])
diarize_df['union'] = np.maximum(diarize_df['end'], word['end']) - np.minimum(diarize_df['start'], word['start']) diarize_df['union'] = np.maximum(diarize_df['end'], word['end']) - np.minimum(diarize_df['start'], word['start'])