mirror of
https://github.com/m-bain/whisperX.git
synced 2025-07-01 18:17:27 -04:00
missing word timestamps
This commit is contained in:
@ -330,8 +330,8 @@ def align(
|
||||
aligned_segments = []
|
||||
|
||||
prev_t2 = 0
|
||||
sdx = 0
|
||||
for segment in transcript:
|
||||
aligned_subsegments = []
|
||||
while True:
|
||||
segment_align_success = False
|
||||
|
||||
@ -505,7 +505,7 @@ def align(
|
||||
else:
|
||||
word_level = None
|
||||
|
||||
aligned_segments.append(
|
||||
aligned_subsegments.append(
|
||||
{
|
||||
"text": segment["seg-text"][sub_seg_idx],
|
||||
"start": seg_start_actual,
|
||||
@ -515,10 +515,7 @@ def align(
|
||||
}
|
||||
)
|
||||
if "language" in segment:
|
||||
aligned_segments[-1]["language"] = segment["language"]
|
||||
|
||||
print(f"[{format_timestamp(aligned_segments[-1]['start'])} --> {format_timestamp(aligned_segments[-1]['end'])}] {aligned_segments[-1]['text']}")
|
||||
|
||||
aligned_subsegments[-1]["language"] = segment["language"]
|
||||
|
||||
char_level = {
|
||||
"start": [],
|
||||
@ -555,12 +552,14 @@ def align(
|
||||
# reset prev_t2 due to drifting issues
|
||||
if not segment_align_success:
|
||||
prev_t2 = 0
|
||||
|
||||
start = interpolate_nans(pd.DataFrame(aligned_subsegments)["start"], method=interpolate_method)
|
||||
end = interpolate_nans(pd.DataFrame(aligned_subsegments)["end"], method=interpolate_method)
|
||||
for idx, seg in enumerate(aligned_subsegments):
|
||||
seg['start'] = start.iloc[idx]
|
||||
seg['end'] = end.iloc[idx]
|
||||
|
||||
# shift segment index by amount of sub-segments
|
||||
if "seg-text" in segment:
|
||||
sdx += len(segment["seg-text"])
|
||||
else:
|
||||
sdx += 1
|
||||
aligned_segments += aligned_subsegments
|
||||
|
||||
# create word level segments for .srt
|
||||
word_seg = []
|
||||
|
@ -4,7 +4,7 @@ from typing import Callable, TextIO, Iterator, Tuple
|
||||
import pandas as pd
|
||||
|
||||
def exact_div(x, y):
|
||||
***ert x % y == 0
|
||||
assert x % y == 0
|
||||
return x // y
|
||||
|
||||
|
||||
@ -30,7 +30,7 @@ def compression_ratio(text) -> float:
|
||||
|
||||
|
||||
def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = '.'):
|
||||
***ert seconds >= 0, "non-negative timestamp expected"
|
||||
assert seconds >= 0, "non-negative timestamp expected"
|
||||
milliseconds = round(seconds * 1000.0)
|
||||
|
||||
hours = milliseconds // 3_600_000
|
||||
@ -105,11 +105,11 @@ def write_ass(transcript: Iterator[dict],
|
||||
strip=True, **kwargs):
|
||||
"""
|
||||
Credit: https://github.com/jianfch/stable-ts/blob/ff79549bd01f764427879f07ecd626c46a9a430a/stable_whisper/text_output.py
|
||||
Generate Advanced SubStation Alpha (***) file from results to
|
||||
Generate Advanced SubStation Alpha (ass) file from results to
|
||||
display both phrase-level & word-level timestamp simultaneously by:
|
||||
-using segment-level timestamps display phrases as usual
|
||||
-using word-level timestamps change formats (e.g. color/underline) of the word in the displayed segment
|
||||
Note: *** file is used in the same way as srt, vtt, etc.
|
||||
Note: ass file is used in the same way as srt, vtt, etc.
|
||||
Parameters
|
||||
----------
|
||||
transcript: dict
|
||||
@ -125,14 +125,14 @@ def write_ass(transcript: Iterator[dict],
|
||||
whether to underline a word at its corresponding timestamp
|
||||
prefmt: str
|
||||
used to specify format for word-level timestamps (must be use with 'suffmt' and overrides 'color'&'underline')
|
||||
appears as such in the .*** file:
|
||||
appears as such in the .ass file:
|
||||
Hi, {<prefmt>}how{<suffmt>} are you?
|
||||
reference [Appendix A: Style override codes] in http://www.tcax.org/docs/***-specs.htm
|
||||
reference [Appendix A: Style override codes] in http://www.tcax.org/docs/ass-specs.htm
|
||||
suffmt: str
|
||||
used to specify format for word-level timestamps (must be use with 'prefmt' and overrides 'color'&'underline')
|
||||
appears as such in the .*** file:
|
||||
appears as such in the .ass file:
|
||||
Hi, {<prefmt>}how{<suffmt>} are you?
|
||||
reference [Appendix A: Style override codes] in http://www.tcax.org/docs/***-specs.htm
|
||||
reference [Appendix A: Style override codes] in http://www.tcax.org/docs/ass-specs.htm
|
||||
font: str
|
||||
word font (default: Arial)
|
||||
font_size: int
|
||||
@ -165,13 +165,13 @@ def write_ass(transcript: Iterator[dict],
|
||||
|
||||
styles = f'Style: {",".join(map(str, fmt_style_dict.values()))}'
|
||||
|
||||
***_str = f'[Script Info]\nScriptType: v4.00+\nPlayResX: 384\nPlayResY: 288\nScaledBorderAndShadow: yes\n\n' \
|
||||
ass_str = f'[Script Info]\nScriptType: v4.00+\nPlayResX: 384\nPlayResY: 288\nScaledBorderAndShadow: yes\n\n' \
|
||||
f'[V4+ Styles]\n{fmts}\n{styles}\n\n' \
|
||||
f'[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n\n'
|
||||
|
||||
if prefmt or suffmt:
|
||||
if suffmt:
|
||||
***ert prefmt, 'prefmt must be used along with suffmt'
|
||||
assert prefmt, 'prefmt must be used along with suffmt'
|
||||
else:
|
||||
suffmt = r'\r'
|
||||
else:
|
||||
@ -201,9 +201,9 @@ def write_ass(transcript: Iterator[dict],
|
||||
elif resolution == "char":
|
||||
resolution_key = "char-segments"
|
||||
else:
|
||||
raise ValueError(".*** resolution should be 'word' or 'char', not ", resolution)
|
||||
raise ValueError(".ass resolution should be 'word' or 'char', not ", resolution)
|
||||
|
||||
***_arr = []
|
||||
ass_arr = []
|
||||
|
||||
for segment in transcript:
|
||||
if resolution_key in segment:
|
||||
@ -231,7 +231,7 @@ def write_ass(transcript: Iterator[dict],
|
||||
"idx_1": -1
|
||||
}
|
||||
|
||||
***_arr.append(filler_ts)
|
||||
ass_arr.append(filler_ts)
|
||||
# highlight current word
|
||||
f_word_ts = {
|
||||
"chars": speaker_str + segment['text'],
|
||||
@ -240,12 +240,12 @@ def write_ass(transcript: Iterator[dict],
|
||||
"idx_0": idx_0 + len(speaker_str),
|
||||
"idx_1": idx_1 + len(speaker_str)
|
||||
}
|
||||
***_arr.append(f_word_ts)
|
||||
ass_arr.append(f_word_ts)
|
||||
prev = crow['end']
|
||||
|
||||
***_str += '\n'.join(map(lambda x: dialogue(**x), ***_arr))
|
||||
ass_str += '\n'.join(map(lambda x: dialogue(**x), ass_arr))
|
||||
|
||||
file.write(***_str)
|
||||
file.write(ass_str)
|
||||
|
||||
def interpolate_nans(x, method='nearest'):
|
||||
if x.notnull().sum() > 1:
|
||||
|
Reference in New Issue
Block a user