chore(writer): Join words without spaces for ja, zh

fix #248, fix #310
This commit is contained in:
陳鈞
2023-08-26 06:48:35 +08:00
parent adf455a97c
commit f505702dc7
2 changed files with 6 additions and 1 deletions

View File

@ -213,6 +213,7 @@ def cli():
results.append((result, input_audio_path))
# >> Write
for result, audio_path in results:
result["language"] = align_language
writer(result, audio_path, writer_args)
if __name__ == "__main__":

View File

@ -123,6 +123,7 @@ TO_LANGUAGE_CODE = {
"castilian": "es",
}
LANGUAGES_WITHOUT_SPACES = ["ja", "zh"]
system_encoding = sys.getdefaultencoding()
@ -283,7 +284,10 @@ class SubtitlesWriter(ResultWriter):
sstart, ssend, speaker = _[0]
subtitle_start = self.format_timestamp(sstart)
subtitle_end = self.format_timestamp(ssend)
subtitle_text = " ".join([word["word"] for word in subtitle])
if result["language"] in LANGUAGES_WITHOUT_SPACES:
subtitle_text = "".join([word["word"] for word in subtitle])
else:
subtitle_text = " ".join([word["word"] for word in subtitle])
has_timing = any(["start" in word for word in subtitle])
# add [$SPEAKER_ID]: to each subtitle if speaker is available