From 076ff96eb20f560f95a22eca7e5f4bcd01747070 Mon Sep 17 00:00:00 2001
From: CaraDuf <91517923+Ca-ressemble-a-du-fake@users.noreply.github.com>
Date: Wed, 7 Jun 2023 05:49:49 +0200
Subject: [PATCH 1/3] Add Audacity export

This exports the transcript to a text file that can be directly imported in Audacity as label file. This is useful to quickly check the transcript-audio alignment.
---
 whisperx/utils.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/whisperx/utils.py b/whisperx/utils.py
index d042bb7..ff17cce 100644
--- a/whisperx/utils.py
+++ b/whisperx/utils.py
@@ -365,6 +365,28 @@ class WriteTSV(ResultWriter):
             print(round(1000 * segment["end"]), file=file, end="\t")
             print(segment["text"].strip().replace("\t", " "), file=file, flush=True)
 
+class WriteAudacity(ResultWriter):
+    """
+    Write a transcript to a text file that audacity can import as labels.
+    The extension used is "aud" to distinguish it from the txt file produced by WriteTXT.
+    Yet this is not an audacity project but only a label file!
+    
+    Please note : Audacity uses seconds in timestamps not ms! 
+    Also there is no header expected.
+
+    If speaker is provided it is prepended to the text between double square brackets [[]].
+    """
+
+    extension: str = "aud"    
+
+    def write_result(self, result: dict, file: TextIO, options: dict):
+        ARROW = "	"
+        for segment in result["segments"]:
+            print(segment["start"], file=file, end=ARROW)
+            print(segment["end"], file=file, end=ARROW)
+            print( ( ("[[" + segment["speaker"] + "]]") if "speaker" in segment else "") + segment["text"].strip().replace("\t", " "), file=file, flush=True)
+
+            
 
 class WriteJSON(ResultWriter):
     extension: str = "json"
@@ -377,6 +399,7 @@ def get_writer(
     output_format: str, output_dir: str
 ) -> Callable[[dict, TextIO, dict], None]:
     writers = {
+        "aud": WriteAudacity,
         "txt": WriteTXT,
         "vtt": WriteVTT,
         "srt": WriteSRT,
@@ -399,4 +422,4 @@ def interpolate_nans(x, method='nearest'):
     if x.notnull().sum() > 1:
         return x.interpolate(method=method).ffill().bfill()
     else:
-        return x.ffill().bfill()
\ No newline at end of file
+        return x.ffill().bfill()

From b13778fefd71c955eb93bd9c4d3bb9c850acd5db Mon Sep 17 00:00:00 2001
From: Max Bain <36994049+m-bain@users.noreply.github.com>
Date: Wed, 7 Jun 2023 11:47:49 +0100
Subject: [PATCH 2/3] make aud optional

---
 whisperx/utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/whisperx/utils.py b/whisperx/utils.py
index ff17cce..36c7543 100644
--- a/whisperx/utils.py
+++ b/whisperx/utils.py
@@ -399,13 +399,15 @@ def get_writer(
     output_format: str, output_dir: str
 ) -> Callable[[dict, TextIO, dict], None]:
     writers = {
-        "aud": WriteAudacity,
         "txt": WriteTXT,
         "vtt": WriteVTT,
         "srt": WriteSRT,
         "tsv": WriteTSV,
         "json": WriteJSON,
     }
+    optional_writers = {
+        "aud": WriteAudacity,
+    }
 
     if output_format == "all":
         all_writers = [writer(output_dir) for writer in writers.values()]
@@ -416,6 +418,8 @@ def get_writer(
 
         return write_all
 
+    if output_format in optional_writers:
+        return optional_writers[output_format](output_dir)
     return writers[output_format](output_dir)
 
 def interpolate_nans(x, method='nearest'):

From d39c1b2319a8140911be7f37b8176f300a89e3da Mon Sep 17 00:00:00 2001
From: Max Bain <36994049+m-bain@users.noreply.github.com>
Date: Wed, 7 Jun 2023 11:48:49 +0100
Subject: [PATCH 3/3] add "aud" to output_format

---
 whisperx/transcribe.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/whisperx/transcribe.py b/whisperx/transcribe.py
index 3bb1a36..1855178 100644
--- a/whisperx/transcribe.py
+++ b/whisperx/transcribe.py
@@ -26,7 +26,7 @@ def cli():
     parser.add_argument("--compute_type", default="float16", type=str, choices=["float16", "float32", "int8"], help="compute type for computation")
 
     parser.add_argument("--output_dir", "-o", type=str, default=".", help="directory to save the outputs")
-    parser.add_argument("--output_format", "-f", type=str, default="all", choices=["all", "srt", "vtt", "txt", "tsv", "json"], help="format of the output file; if not specified, all available formats will be produced")
+    parser.add_argument("--output_format", "-f", type=str, default="all", choices=["all", "srt", "vtt", "txt", "tsv", "json", "aud"], help="format of the output file; if not specified, all available formats will be produced")
     parser.add_argument("--verbose", type=str2bool, default=True, help="whether to print out the progress and debug messages")
 
     parser.add_argument("--task", type=str, default="transcribe", choices=["transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
@@ -210,4 +210,4 @@ def cli():
         writer(result, audio_path, writer_args)
 
 if __name__ == "__main__":
-    cli()
\ No newline at end of file
+    cli()