mirror of
https://github.com/m-bain/whisperX.git
synced 2025-07-01 18:17:27 -04:00
Add Audacity export
This exports the transcript to a text file that can be directly imported in Audacity as label file. This is useful to quickly check the transcript-audio alignment.
This commit is contained in:
@ -365,6 +365,28 @@ class WriteTSV(ResultWriter):
|
|||||||
print(round(1000 * segment["end"]), file=file, end="\t")
|
print(round(1000 * segment["end"]), file=file, end="\t")
|
||||||
print(segment["text"].strip().replace("\t", " "), file=file, flush=True)
|
print(segment["text"].strip().replace("\t", " "), file=file, flush=True)
|
||||||
|
|
||||||
|
class WriteAudacity(ResultWriter):
|
||||||
|
"""
|
||||||
|
Write a transcript to a text file that audacity can import as labels.
|
||||||
|
The extension used is "aud" to distinguish it from the txt file produced by WriteTXT.
|
||||||
|
Yet this is not an audacity project but only a label file!
|
||||||
|
|
||||||
|
Please note : Audacity uses seconds in timestamps not ms!
|
||||||
|
Also there is no header expected.
|
||||||
|
|
||||||
|
If speaker is provided it is prepended to the text between double square brackets [[]].
|
||||||
|
"""
|
||||||
|
|
||||||
|
extension: str = "aud"
|
||||||
|
|
||||||
|
def write_result(self, result: dict, file: TextIO, options: dict):
|
||||||
|
ARROW = " "
|
||||||
|
for segment in result["segments"]:
|
||||||
|
print(segment["start"], file=file, end=ARROW)
|
||||||
|
print(segment["end"], file=file, end=ARROW)
|
||||||
|
print( ( ("[[" + segment["speaker"] + "]]") if "speaker" in segment else "") + segment["text"].strip().replace("\t", " "), file=file, flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class WriteJSON(ResultWriter):
|
class WriteJSON(ResultWriter):
|
||||||
extension: str = "json"
|
extension: str = "json"
|
||||||
@ -377,6 +399,7 @@ def get_writer(
|
|||||||
output_format: str, output_dir: str
|
output_format: str, output_dir: str
|
||||||
) -> Callable[[dict, TextIO, dict], None]:
|
) -> Callable[[dict, TextIO, dict], None]:
|
||||||
writers = {
|
writers = {
|
||||||
|
"aud": WriteAudacity,
|
||||||
"txt": WriteTXT,
|
"txt": WriteTXT,
|
||||||
"vtt": WriteVTT,
|
"vtt": WriteVTT,
|
||||||
"srt": WriteSRT,
|
"srt": WriteSRT,
|
||||||
|
Reference in New Issue
Block a user