init commit

2025-07-01 18:17:27 -04:00 · 2022-12-14 18:59:12 +00:00
parent 9791862c45
commit 9f6fa61160
38 changed files with 105726 additions and 2 deletions
--- a/tests/jfk.flac
+++ b/tests/jfk.flac
--- a/tests/test_audio.py
+++ b/tests/test_audio.py
@ -0,0 +1,19 @@
+import os.path
+
+import numpy as np
+
+from whisper.audio import load_audio, log_mel_spectrogram, SAMPLE_RATE
+
+
+def test_audio():
+    audio_path = os.path.join(os.path.dirname(__file__), "jfk.flac")
+    audio = load_audio(audio_path)
+    assert audio.ndim == 1
+    assert SAMPLE_RATE * 10 < audio.shape[0] < SAMPLE_RATE * 12
+    assert 0 < audio.std() < 1
+
+    mel_from_audio = log_mel_spectrogram(audio)
+    mel_from_file = log_mel_spectrogram(audio_path)
+
+    assert np.allclose(mel_from_audio, mel_from_file)
+    assert mel_from_audio.max() - mel_from_audio.min() <= 2.0
--- a/tests/test_normalizer.py
+++ b/tests/test_normalizer.py
@ -0,0 +1,92 @@
+import pytest
+
+from whisper.normalizers import EnglishTextNormalizer
+from whisper.normalizers.english import EnglishNumberNormalizer, EnglishSpellingNormalizer
+
+
+@pytest.mark.parametrize("std", [EnglishNumberNormalizer(), EnglishTextNormalizer()])
+def test_number_normalizer(std):
+    assert std("two") == "2"
+    assert std("thirty one") == "31"
+    assert std("five twenty four") == "524"
+    assert std("nineteen ninety nine") == "1999"
+    assert std("twenty nineteen") == "2019"
+
+    assert std("two point five million") == "2500000"
+    assert std("four point two billions") == "4200000000s"
+    assert std("200 thousand") == "200000"
+    assert std("200 thousand dollars") == "$200000"
+    assert std("$20 million") == "$20000000"
+    assert std("€52.4 million") == "€52400000"
+    assert std("£77 thousands") == "£77000s"
+
+    assert std("two double o eight") == "2008"
+
+    assert std("three thousand twenty nine") == "3029"
+    assert std("forty three thousand two hundred sixty") == "43260"
+    assert std("forty three thousand two hundred and sixty") == "43260"
+
+    assert std("nineteen fifties") == "1950s"
+    assert std("thirty first") == "31st"
+    assert std("thirty three thousand and three hundred and thirty third") == "33333rd"
+
+    assert std("three billion") == "3000000000"
+    assert std("millions") == "1000000s"
+
+    assert std("july third twenty twenty") == "july 3rd 2020"
+    assert std("august twenty sixth twenty twenty one") == "august 26th 2021"
+    assert std("3 14") == "3 14"
+    assert std("3.14") == "3.14"
+    assert std("3 point 2") == "3.2"
+    assert std("3 point 14") == "3.14"
+    assert std("fourteen point 4") == "14.4"
+    assert std("two point two five dollars") == "$2.25"
+    assert std("two hundred million dollars") == "$200000000"
+    assert std("$20.1 million") == "$20100000"
+
+    assert std("ninety percent") == "90%"
+    assert std("seventy six per cent") == "76%"
+
+    assert std("double oh seven") == "007"
+    assert std("double zero seven") == "007"
+    assert std("nine one one") == "911"
+    assert std("nine double one") == "911"
+    assert std("one triple oh one") == "10001"
+
+    assert std("two thousandth") == "2000th"
+    assert std("thirty two thousandth") == "32000th"
+
+    assert std("minus 500") == "-500"
+    assert std("positive twenty thousand") == "+20000"
+
+    assert std("two dollars and seventy cents") == "$2.70"
+    assert std("3 cents") == "¢3"
+    assert std("$0.36") == "¢36"
+    assert std("three euros and sixty five cents") == "€3.65"
+
+    assert std("three and a half million") == "3500000"
+    assert std("forty eight and a half dollars") == "$48.5"
+    assert std("b747") == "b 747"
+    assert std("10 th") == "10th"
+    assert std("10th") == "10th"
+
+
+def test_spelling_normalizer():
+    std = EnglishSpellingNormalizer()
+
+    assert std("mobilisation") == "mobilization"
+    assert std("cancelation") == "cancellation"
+
+
+def test_text_normalizer():
+    std = EnglishTextNormalizer()
+    assert std("Let's") == "let us"
+    assert std("he's like") == "he is like"
+    assert std("she's been like") == "she has been like"
+    assert std("10km") == "10 km"
+    assert std("RC232") == "rc 232"
+
+    assert (
+        std("Mr. Park visited Assoc. Prof. Kim Jr.")
+        == "mister park visited associate professor kim junior"
+    )
--- a/tests/test_tokenizer.py
+++ b/tests/test_tokenizer.py
@ -0,0 +1,14 @@
+from whisper.tokenizer import get_tokenizer
+
+
+def test_tokenizer():
+    gpt2_tokenizer = get_tokenizer(multilingual=False)
+    multilingual_tokenizer = get_tokenizer(multilingual=True)
+
+    text = "다람쥐 헌 쳇바퀴에 타고파"
+    gpt2_tokens = gpt2_tokenizer.encode(text)
+    multilingual_tokens = multilingual_tokenizer.encode(text)
+
+    assert gpt2_tokenizer.decode(gpt2_tokens) == text
+    assert multilingual_tokenizer.decode(multilingual_tokens) == text
+    assert len(gpt2_tokens) > len(multilingual_tokens)
--- a/tests/test_transcribe.py
+++ b/tests/test_transcribe.py
@ -0,0 +1,20 @@
+import os
+
+import pytest
+
+import whisper
+
+
+@pytest.mark.parametrize('model_name', whisper.available_models())
+def test_transcribe(model_name: str):
+    model = whisper.load_model(model_name).cuda()
+    audio_path = os.path.join(os.path.dirname(__file__), "jfk.flac")
+
+    language = "en" if model_name.endswith(".en") else None
+    result = model.transcribe(audio_path, language=language, temperature=0.0)
+    assert result["language"] == "en"
+
+    transcription = result["text"].lower()
+    assert "my fellow americans" in transcription
+    assert "your country" in transcription
+    assert "do for you" in transcription