From d7f1d16f1927bfaf6b2b62de1d68daae470f2721 Mon Sep 17 00:00:00 2001 From: Max Bain Date: Mon, 5 Jun 2023 15:44:17 +0100 Subject: [PATCH] suppress numerals change logic --- whisperx/asr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/whisperx/asr.py b/whisperx/asr.py index 501b21d..09454c9 100644 --- a/whisperx/asr.py +++ b/whisperx/asr.py @@ -17,7 +17,8 @@ def find_numeral_symbol_tokens(tokenizer): numeral_symbol_tokens = [] for i in range(tokenizer.eot): token = tokenizer.decode([i]).removeprefix(" ") - if all(c in "0123456789@#%&*+=_$:-.,?!" for c in token): + has_numeral_symbol = any(c in "0123456789%$£" for c in token) + if has_numeral_symbol: numeral_symbol_tokens.append(i) return numeral_symbol_tokens