feat: add SegmentData type for temporary processing during alignment

This commit is contained in:
Barabazs
2025-01-13 09:27:33 +01:00
parent 024bc8481b
commit 2f93e029c7
2 changed files with 21 additions and 3 deletions

View File

@ -1,4 +1,4 @@
from typing import TypedDict, Optional, List
from typing import TypedDict, Optional, List, Tuple
class SingleWordSegment(TypedDict):
@ -30,6 +30,17 @@ class SingleSegment(TypedDict):
text: str
class SegmentData(TypedDict):
"""
Temporary processing data used during alignment.
Contains cleaned and preprocessed data for each segment.
"""
clean_char: List[str] # Cleaned characters that exist in model dictionary
clean_cdx: List[int] # Original indices of cleaned characters
clean_wdx: List[int] # Indices of words containing valid characters
sentence_spans: List[Tuple[int, int]] # Start and end indices of sentences
class SingleAlignedSegment(TypedDict):
"""
A single segment (up to multiple sentences) of a speech with word alignment.