mirror of
https://github.com/m-bain/whisperX.git
synced 2025-07-01 18:17:27 -04:00
feat: add SegmentData type for temporary processing during alignment
This commit is contained in:
@ -1,4 +1,4 @@
|
||||
from typing import TypedDict, Optional, List
|
||||
from typing import TypedDict, Optional, List, Tuple
|
||||
|
||||
|
||||
class SingleWordSegment(TypedDict):
|
||||
@ -30,6 +30,17 @@ class SingleSegment(TypedDict):
|
||||
text: str
|
||||
|
||||
|
||||
class SegmentData(TypedDict):
|
||||
"""
|
||||
Temporary processing data used during alignment.
|
||||
Contains cleaned and preprocessed data for each segment.
|
||||
"""
|
||||
clean_char: List[str] # Cleaned characters that exist in model dictionary
|
||||
clean_cdx: List[int] # Original indices of cleaned characters
|
||||
clean_wdx: List[int] # Indices of words containing valid characters
|
||||
sentence_spans: List[Tuple[int, int]] # Start and end indices of sentences
|
||||
|
||||
|
||||
class SingleAlignedSegment(TypedDict):
|
||||
"""
|
||||
A single segment (up to multiple sentences) of a speech with word alignment.
|
||||
|
Reference in New Issue
Block a user