mirror of
https://github.com/m-bain/whisperX.git
synced 2025-07-01 18:17:27 -04:00
v2 lets goo
This commit is contained in:
@ -372,10 +372,7 @@ def align(
|
|||||||
for sdx, srow in segments_arr.iterrows():
|
for sdx, srow in segments_arr.iterrows():
|
||||||
|
|
||||||
seg_idx = int(srow["segment-idx"])
|
seg_idx = int(srow["segment-idx"])
|
||||||
try:
|
|
||||||
sub_start = int(srow["subsegment-idx-start"])
|
sub_start = int(srow["subsegment-idx-start"])
|
||||||
except:
|
|
||||||
import pdb; pdb.set_trace()
|
|
||||||
sub_end = int(srow["subsegment-idx-end"])
|
sub_end = int(srow["subsegment-idx-end"])
|
||||||
|
|
||||||
seg = transcript[seg_idx]
|
seg = transcript[seg_idx]
|
||||||
|
@ -302,17 +302,3 @@ def merge_chunks(segments, chunk_size):
|
|||||||
"segments": seg_idxs,
|
"segments": seg_idxs,
|
||||||
})
|
})
|
||||||
return merged_segments
|
return merged_segments
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import pandas as pd
|
|
||||||
input_fp = "tt298650_sync.wav"
|
|
||||||
df = pd.read_csv(f"/work/maxbain/tmp/{input_fp}.sad", sep=" ", header=None)
|
|
||||||
print(len(df))
|
|
||||||
N = 0.15
|
|
||||||
g = df[0].sub(df[1].shift())
|
|
||||||
input_base = input_fp.split('.')[0]
|
|
||||||
df = df.groupby(g.gt(N).cumsum()).agg({0:'min', 1:'max'})
|
|
||||||
df.to_csv(f"/work/maxbain/tmp/{input_base}.lab", header=None, index=False, sep=" ")
|
|
||||||
print(df)
|
|
||||||
import pdb; pdb.set_trace()
|
|
Reference in New Issue
Block a user