mirror of
https://github.com/m-bain/whisperX.git
synced 2025-07-01 18:17:27 -04:00
v2 lets goo
This commit is contained in:
@ -372,10 +372,7 @@ def align(
|
||||
for sdx, srow in segments_arr.iterrows():
|
||||
|
||||
seg_idx = int(srow["segment-idx"])
|
||||
try:
|
||||
sub_start = int(srow["subsegment-idx-start"])
|
||||
except:
|
||||
import pdb; pdb.set_trace()
|
||||
sub_start = int(srow["subsegment-idx-start"])
|
||||
sub_end = int(srow["subsegment-idx-end"])
|
||||
|
||||
seg = transcript[seg_idx]
|
||||
|
@ -301,18 +301,4 @@ def merge_chunks(segments, chunk_size):
|
||||
"end": curr_end,
|
||||
"segments": seg_idxs,
|
||||
})
|
||||
return merged_segments
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import pandas as pd
|
||||
input_fp = "tt298650_sync.wav"
|
||||
df = pd.read_csv(f"/work/maxbain/tmp/{input_fp}.sad", sep=" ", header=None)
|
||||
print(len(df))
|
||||
N = 0.15
|
||||
g = df[0].sub(df[1].shift())
|
||||
input_base = input_fp.split('.')[0]
|
||||
df = df.groupby(g.gt(N).cumsum()).agg({0:'min', 1:'max'})
|
||||
df.to_csv(f"/work/maxbain/tmp/{input_base}.lab", header=None, index=False, sep=" ")
|
||||
print(df)
|
||||
import pdb; pdb.set_trace()
|
||||
return merged_segments
|
Reference in New Issue
Block a user