Add Audacity export

This exports the transcript to a text file that can be directly imported in Audacity as label file. This is useful to quickly check the transcript-audio alignment.
This commit is contained in:
CaraDuf
2023-06-07 05:49:49 +02:00
committed by GitHub
parent 0c84c26d92
commit 076ff96eb2

View File

@ -365,6 +365,28 @@ class WriteTSV(ResultWriter):
print(round(1000 * segment["end"]), file=file, end="\t")
print(segment["text"].strip().replace("\t", " "), file=file, flush=True)
class WriteAudacity(ResultWriter):
"""
Write a transcript to a text file that audacity can import as labels.
The extension used is "aud" to distinguish it from the txt file produced by WriteTXT.
Yet this is not an audacity project but only a label file!
Please note : Audacity uses seconds in timestamps not ms!
Also there is no header expected.
If speaker is provided it is prepended to the text between double square brackets [[]].
"""
extension: str = "aud"
def write_result(self, result: dict, file: TextIO, options: dict):
ARROW = " "
for segment in result["segments"]:
print(segment["start"], file=file, end=ARROW)
print(segment["end"], file=file, end=ARROW)
print( ( ("[[" + segment["speaker"] + "]]") if "speaker" in segment else "") + segment["text"].strip().replace("\t", " "), file=file, flush=True)
class WriteJSON(ResultWriter):
extension: str = "json"
@ -377,6 +399,7 @@ def get_writer(
output_format: str, output_dir: str
) -> Callable[[dict, TextIO, dict], None]:
writers = {
"aud": WriteAudacity,
"txt": WriteTXT,
"vtt": WriteVTT,
"srt": WriteSRT,
@ -399,4 +422,4 @@ def interpolate_nans(x, method='nearest'):
if x.notnull().sum() > 1:
return x.interpolate(method=method).ffill().bfill()
else:
return x.ffill().bfill()
return x.ffill().bfill()