From 601c91140fe9056b8a40a10c923e0d09c51cd948 Mon Sep 17 00:00:00 2001
From: Prashanth Ellina <prashanthellina@gmail.com>
Date: Sun, 30 Apr 2023 17:33:24 +0000
Subject: [PATCH] references #202, attempt to fix speaker diarization failing
 in v3

---
 whisperx/alignment.py  | 4 ++--
 whisperx/transcribe.py | 6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/whisperx/alignment.py b/whisperx/alignment.py
index 09f044f..e2c86f7 100644
--- a/whisperx/alignment.py
+++ b/whisperx/alignment.py
@@ -450,8 +450,8 @@ def align(
                 "end": srow["end"],
                 "text": text,
                 "words": word_list,
-                # "word-segments": wseg,
-                # "char-segments": cseg
+                "word-segments": wseg,
+                "char-segments": cseg
             }
         )
     
diff --git a/whisperx/transcribe.py b/whisperx/transcribe.py
index fd6cf52..dab9e12 100644
--- a/whisperx/transcribe.py
+++ b/whisperx/transcribe.py
@@ -203,6 +203,12 @@ def cli():
 
     # >> Write
     for result, audio_path in results:
+        # Remove pandas dataframes from result so that
+        # we can serialize the result with json
+        for seg in result["segments"]:
+            seg.pop("word-segments", None)
+            seg.pop("char-segments", None)
+
         writer(result, audio_path, writer_args)
 
 if __name__ == "__main__":