wordfreq/tests/test_numbers.py

64 lines
2.1 KiB
Python
Raw Permalink Normal View History

from pytest import approx
from wordfreq import word_frequency
from wordfreq.numbers import digit_freq, smash_numbers
def test_number_smashing():
assert smash_numbers("1") == "1"
assert smash_numbers("3.14") == "0.00"
assert smash_numbers("24601") == "00000"
def test_decimals():
assert word_frequency("3.14", "el") > word_frequency("4.14", "el")
assert word_frequency("3.14", "el") == word_frequency("3.15", "el")
assert word_frequency("3,14", "de") > word_frequency("4,14", "de")
assert word_frequency("3,14", "de") == word_frequency("3,15", "de")
2022-03-11 00:12:45 +00:00
def test_eastern_arabic():
assert word_frequency("٥٤", "ar") == word_frequency("٥٣", "ar")
assert word_frequency("٤٣", "ar") > word_frequency("٥٤", "ar")
def test_year_distribution():
assert word_frequency("2010", "en") > word_frequency("1010", "en")
assert word_frequency("2010", "en") > word_frequency("3010", "en")
def test_boundaries():
assert word_frequency("9", "en") > word_frequency("10", "en")
assert word_frequency("99", "en") > word_frequency("100", "en")
assert word_frequency("999", "en") > word_frequency("1000", "en")
assert word_frequency("9999", "en") > word_frequency("10000", "en")
def test_multiple_words():
once = word_frequency("2015b", "en")
twice = word_frequency("2015b 2015b", "en")
assert once == approx(2 * twice)
def test_distribution():
assert word_frequency("24601", "en") > word_frequency("90210", "en")
assert word_frequency("7", "en") > word_frequency("007", "en")
assert word_frequency("404", "en") == word_frequency("418", "en")
def test_3digit_sum():
"""
Test that the probability distribution given you have a 4-digit sequence
adds up to approximately 1.
"""
three_digit_sum = sum(digit_freq(f"{num:03d}") for num in range(0, 1000))
assert three_digit_sum == approx(1.0)
def test_4digit_sum():
"""
Test that the probability distribution given you have a 4-digit sequence
adds up to approximately 1.
"""
four_digit_sum = sum(digit_freq(f"{num:04d}") for num in range(0, 10000))
assert 0.999 < four_digit_sum < 1.0