estimate the freq distribution of numbers

2024-12-23 09:21:37 +00:00 · 2022-03-10 18:33:42 -05:00 · 2022-03-10 18:33:42 -05:00 · bf05b1b1dc
commit bf05b1b1dc
parent 4e373750e8
14 changed files with 552 additions and 405 deletions
--- a/poetry.lock
+++ b/poetry.lock
@ -61,7 +61,7 @@ uvloop = ["uvloop (>=0.15.2)"]

 [[package]]
 name = "click"
-version = "8.0.3"
+version = "8.0.4"
 description = "Composable command line interface toolkit"
 category = "dev"
 optional = false
@ -103,17 +103,14 @@ pyflakes = ">=2.4.0,<2.5.0"

 [[package]]
 name = "ftfy"
-version = "6.0.3"
-description = "Fixes some problems with Unicode text after the fact"
+version = "6.1.1"
+description = "Fixes mojibake and other problems with Unicode, after the fact"
 category = "main"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7,<4"

 [package.dependencies]
-wcwidth = "*"
-
-[package.extras]
-docs = ["furo", "sphinx"]
+wcwidth = ">=0.2.5"

 [[package]]
 name = "importlib-metadata"
@ -149,7 +146,7 @@ python-versions = "*"

 [[package]]
 name = "ipython"
-version = "7.31.1"
+version = "7.32.0"
 description = "IPython: Productive Interactive Computing"
 category = "dev"
 optional = false
@ -242,7 +239,7 @@ python-versions = "*"

 [[package]]
 name = "mecab-python3"
-version = "1.0.4"
+version = "1.0.5"
 description = "Python wrapper for the MeCab morphological analyzer for Japanese"
 category = "dev"
 optional = false
@ -338,7 +335,7 @@ python-versions = "*"

 [[package]]
 name = "platformdirs"
-version = "2.5.0"
+version = "2.5.1"
 description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
 category = "dev"
 optional = false
@ -365,7 +362,7 @@ testing = ["pytest", "pytest-benchmark"]

 [[package]]
 name = "prompt-toolkit"
-version = "3.0.27"
+version = "3.0.28"
 description = "Library for building powerful interactive command lines in Python"
 category = "dev"
 optional = false
@ -449,11 +446,11 @@ testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xm

 [[package]]
 name = "regex"
-version = "2022.1.18"
+version = "2022.3.2"
 description = "Alternative regular expression module, to replace re."
 category = "main"
 optional = false
-python-versions = "*"
+python-versions = ">=3.6"

 [[package]]
 name = "toml"
@ -492,7 +489,7 @@ python-versions = ">=3.6"

 [[package]]
 name = "types-setuptools"
-version = "57.4.9"
+version = "57.4.10"
 description = "Typing stubs for setuptools"
 category = "dev"
 optional = false
@ -500,7 +497,7 @@ python-versions = "*"

 [[package]]
 name = "typing-extensions"
-version = "4.0.1"
+version = "4.1.1"
 description = "Backported and Experimental Type Hints for Python 3.6+"
 category = "main"
 optional = false
@ -529,7 +526,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7"
-content-hash = "a3b1a9c3b80e338764f1907a77e31f59d6e1e231092b7813182e09e55d7c2f45"
+content-hash = "8507a13e0c8c79c30e911cc5f32bdc35284304246ae50531917df6197d7dcab8"

 [metadata.files]
 appnope = [
@ -574,8 +571,8 @@ black = [
    {file = "black-22.1.0.tar.gz", hash = "sha256:a7c0192d35635f6fc1174be575cb7915e92e5dd629ee79fdaf0dcfa41a80afb5"},
 ]
 click = [
-    {file = "click-8.0.3-py3-none-any.whl", hash = "sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3"},
-    {file = "click-8.0.3.tar.gz", hash = "sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b"},
+    {file = "click-8.0.4-py3-none-any.whl", hash = "sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1"},
+    {file = "click-8.0.4.tar.gz", hash = "sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb"},
 ]
 colorama = [
    {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
@ -590,7 +587,8 @@ flake8 = [
    {file = "flake8-4.0.1.tar.gz", hash = "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"},
 ]
 ftfy = [
-    {file = "ftfy-6.0.3.tar.gz", hash = "sha256:ba71121a9c8d7790d3e833c6c1021143f3e5c4118293ec3afb5d43ed9ca8e72b"},
+    {file = "ftfy-6.1.1-py3-none-any.whl", hash = "sha256:0ffd33fce16b54cccaec78d6ec73d95ad370e5df5a25255c8966a6147bd667ca"},
+    {file = "ftfy-6.1.1.tar.gz", hash = "sha256:bfc2019f84fcd851419152320a6375604a0f1459c281b5b199b2cd0d2e727f8f"},
 ]
 importlib-metadata = [
    {file = "importlib_metadata-4.2.0-py3-none-any.whl", hash = "sha256:057e92c15bc8d9e8109738a48db0ccb31b4d9d5cfbee5a8670879a30be66304b"},
@ -604,8 +602,8 @@ ipadic = [
    {file = "ipadic-1.0.0.tar.gz", hash = "sha256:f5923d31eca6131acaaf18ed28d8998665b1347b640d3a6476f64650e9a71c07"},
 ]
 ipython = [
-    {file = "ipython-7.31.1-py3-none-any.whl", hash = "sha256:55df3e0bd0f94e715abd968bedd89d4e8a7bce4bf498fb123fed4f5398fea874"},
-    {file = "ipython-7.31.1.tar.gz", hash = "sha256:b5548ec5329a4bcf054a5deed5099b0f9622eb9ea51aaa7104d215fece201d8c"},
+    {file = "ipython-7.32.0-py3-none-any.whl", hash = "sha256:86df2cf291c6c70b5be6a7b608650420e89180c8ec74f376a34e2dc15c3400e7"},
+    {file = "ipython-7.32.0.tar.gz", hash = "sha256:468abefc45c15419e3c8e8c0a6a5c115b2127bafa34d7c641b1d443658793909"},
 ]
 jedi = [
    {file = "jedi-0.18.1-py2.py3-none-any.whl", hash = "sha256:637c9635fcf47945ceb91cd7f320234a7be540ded6f3e99a50cb6febdfd1ba8d"},
@ -630,23 +628,27 @@ mecab-ko-dic = [
    {file = "mecab-ko-dic-1.0.0.tar.gz", hash = "sha256:3ba22858736e02e8a0e92f2a7f099528c733ae47701b29d12c75e982a85d1f11"},
 ]
 mecab-python3 = [
-    {file = "mecab-python3-1.0.4.tar.gz", hash = "sha256:b150ad5fe4260539b4ef184657e552ef81307fbbe60ae1f258bc814549ea90f8"},
-    {file = "mecab_python3-1.0.4-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:3c7e87c65160e5e4edb08cb80dbce50f4e711c53f45063321aab72ab2566ffe4"},
-    {file = "mecab_python3-1.0.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2fbed960ef82f4192b31efd88af1f3c24cd1692b62720ed70d7e314a50f581e"},
-    {file = "mecab_python3-1.0.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cb6eb6cc47e3937a2edfaa9595dc2d165ed9f025e3a53bd0a5033a12fa6bcdcf"},
-    {file = "mecab_python3-1.0.4-cp36-cp36m-win_amd64.whl", hash = "sha256:b149b51f0f62c9512d219c9e79c6db2eb66e70863a97eb412d8fc3ba7a25f351"},
-    {file = "mecab_python3-1.0.4-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:c1606b35df0136b3e9dc7add2e69d2c1151e69fd5675c0cde62d0b017b2319e7"},
-    {file = "mecab_python3-1.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53b0b899ef03f364bfd7fa28f260ee1e893e4f47ff90a141a522709b892f0a4e"},
-    {file = "mecab_python3-1.0.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:52a789c708f8b89044236201eb03c7fe5517fad5210a9de2230c7d99a2a8c760"},
-    {file = "mecab_python3-1.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:d6ca73c0dec72038290faa6de17d57d771535eb47c22346e170dffcb82d696bb"},
-    {file = "mecab_python3-1.0.4-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:18e14dfe3d8c66cfa1c9f49e3bc8ac480b79a433ec9e5b5d2c1fb73f36ec7c3e"},
-    {file = "mecab_python3-1.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:221256b84be0ee29dc8fa450210236b40707b9d63cfc70de5102d2531622d062"},
-    {file = "mecab_python3-1.0.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:de39b82f44d97fc0fd636644ad14c9662f51afcd73775379d5a8b1eb20ee85a6"},
-    {file = "mecab_python3-1.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:96d9e8c098401fb4b5bd32258f4952f3b22cdb30ab291f5ff82eae1d0941cbed"},
-    {file = "mecab_python3-1.0.4-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:dcd62ebf2eecde1263119b92ff5379a046bb8231cb999fafda00f0925dfcb67e"},
-    {file = "mecab_python3-1.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178b632b717e3249054a7ad4c0fbc60ce8493d357afa7673d535ffa11e45eaba"},
-    {file = "mecab_python3-1.0.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:fbfad60261ad3b9390b8615528fc013302a3e8febba220f799216c1a1154ee7e"},
-    {file = "mecab_python3-1.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:445b4f5ee5674d85f6de2726ec28991801844ff71eb096129da5f5ba077d5a87"},
+    {file = "mecab-python3-1.0.5.tar.gz", hash = "sha256:e703d78c88a671abb8170351644850015d9bbfab31530a3b40d12481a6779a11"},
+    {file = "mecab_python3-1.0.5-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:8a64bd228704ed9b24da5cbd6c4e325ef22310227153ef481f9037183351aa10"},
+    {file = "mecab_python3-1.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf083884116fa05ca0394c4c8d62013a4954fbac414c33a1931906ddf0f3585a"},
+    {file = "mecab_python3-1.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1fe020df27b249f43df3d38b84473d226e36d6d4a31f951cedbddabfcc450e36"},
+    {file = "mecab_python3-1.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:644f781de083311fcf81f7d55f21a756ceef7ebae7c111bd50a2c9d0855c1927"},
+    {file = "mecab_python3-1.0.5-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:4309a91f0d5b66d3f0e8c9ba5a4d3cf7dbac1334269338704599820e051d1d7f"},
+    {file = "mecab_python3-1.0.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7be2d1cd2ecd1f04b91eb0e26c906f21b50b8526e977f7f01f3901f9a6306944"},
+    {file = "mecab_python3-1.0.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:644bbde31ab1244ff18fb1dcac1e5fee8121f8b27a5c3e041c01ebc301df9266"},
+    {file = "mecab_python3-1.0.5-cp36-cp36m-win_amd64.whl", hash = "sha256:401a2d1608b6503cb755d7d864ad74b64a7a4346309235f84577de807bb29050"},
+    {file = "mecab_python3-1.0.5-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:5f91d5d8a9ac0ea7351e5e2423df98dd463b02013e006b18096cd365de37b2a9"},
+    {file = "mecab_python3-1.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc8ce0151b973f4ca15e651619264442011568ebe48c6fce51d55e64f7e5c2e1"},
+    {file = "mecab_python3-1.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e381df4c55f3ec5bccbb5625c65c54ecf982c215574d1102aff2803ac1a24cd"},
+    {file = "mecab_python3-1.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:8eaaa78227f470c4cf1d6c2a87b92889041f317517fbe65e635b86ea0c84a194"},
+    {file = "mecab_python3-1.0.5-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:dd8601565dd1331ee5cd67bcc45f713cebc14b730ee2e956ed120a0ec6e4fd8a"},
+    {file = "mecab_python3-1.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76a40f717f9592bd12edc7bcf1fa869f4c8058e5d0b80d4cc6c301435afb1f96"},
+    {file = "mecab_python3-1.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f299d6ef96495371f5a622a7004a205e303dabba1fc3a7f9a07e741e315ed2b"},
+    {file = "mecab_python3-1.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:4cdb07edbbd508d9b98ac9529e0ff0b89d93e50a6beeb7b8b946439594bf5e01"},
+    {file = "mecab_python3-1.0.5-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:eb412a25e485e33d7ab69262b58f7365b727f8c447e4c9c1c56b5fd91414ecd2"},
+    {file = "mecab_python3-1.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91e8ac11ef4440418312dd4f1f200f7957fdc0148bb49dc049264c5d07bed527"},
+    {file = "mecab_python3-1.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae1c126cf4982035794042280998066c8b6d26eb89136731078d9105a7070c13"},
+    {file = "mecab_python3-1.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:34a196c6a410e57f975ee077d075ac994b94bb6930b04e207e59e7c7521ecb58"},
 ]
 msgpack = [
    {file = "msgpack-1.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:96acc674bb9c9be63fa8b6dabc3248fdc575c4adc005c440ad02f87ca7edd079"},
@ -731,16 +733,16 @@ pickleshare = [
    {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"},
 ]
 platformdirs = [
-    {file = "platformdirs-2.5.0-py3-none-any.whl", hash = "sha256:30671902352e97b1eafd74ade8e4a694782bd3471685e78c32d0fdfd3aa7e7bb"},
-    {file = "platformdirs-2.5.0.tar.gz", hash = "sha256:8ec11dfba28ecc0715eb5fb0147a87b1bf325f349f3da9aab2cd6b50b96b692b"},
+    {file = "platformdirs-2.5.1-py3-none-any.whl", hash = "sha256:bcae7cab893c2d310a711b70b24efb93334febe65f8de776ee320b517471e227"},
+    {file = "platformdirs-2.5.1.tar.gz", hash = "sha256:7535e70dfa32e84d4b34996ea99c5e432fa29a708d0f4e394bbcb2a8faa4f16d"},
 ]
 pluggy = [
    {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
    {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
 ]
 prompt-toolkit = [
-    {file = "prompt_toolkit-3.0.27-py3-none-any.whl", hash = "sha256:cb7dae7d2c59188c85a1d6c944fad19aded6a26bd9c8ae115a4e1c20eb90b713"},
-    {file = "prompt_toolkit-3.0.27.tar.gz", hash = "sha256:f2b6a8067a4fb959d3677d1ed764cc4e63e0f6f565b9a4fc7edc2b18bf80217b"},
+    {file = "prompt_toolkit-3.0.28-py3-none-any.whl", hash = "sha256:30129d870dcb0b3b6a53efdc9d0a83ea96162ffd28ffe077e94215b233dc670c"},
+    {file = "prompt_toolkit-3.0.28.tar.gz", hash = "sha256:9f1cd16b1e86c2968f2519d7fb31dd9d669916f515612c269d14e9ed52b51650"},
 ]
 ptyprocess = [
    {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
@ -771,80 +773,80 @@ pytest = [
    {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"},
 ]
 regex = [
-    {file = "regex-2022.1.18-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:34316bf693b1d2d29c087ee7e4bb10cdfa39da5f9c50fa15b07489b4ab93a1b5"},
-    {file = "regex-2022.1.18-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a0b9f6a1a15d494b35f25ed07abda03209fa76c33564c09c9e81d34f4b919d7"},
-    {file = "regex-2022.1.18-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f99112aed4fb7cee00c7f77e8b964a9b10f69488cdff626ffd797d02e2e4484f"},
-    {file = "regex-2022.1.18-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a2bf98ac92f58777c0fafc772bf0493e67fcf677302e0c0a630ee517a43b949"},
-    {file = "regex-2022.1.18-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8618d9213a863c468a865e9d2ec50221015f7abf52221bc927152ef26c484b4c"},
-    {file = "regex-2022.1.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b52cc45e71657bc4743a5606d9023459de929b2a198d545868e11898ba1c3f59"},
-    {file = "regex-2022.1.18-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e12949e5071c20ec49ef00c75121ed2b076972132fc1913ddf5f76cae8d10b4"},
-    {file = "regex-2022.1.18-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b02e3e72665cd02afafb933453b0c9f6c59ff6e3708bd28d0d8580450e7e88af"},
-    {file = "regex-2022.1.18-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:abfcb0ef78df0ee9df4ea81f03beea41849340ce33a4c4bd4dbb99e23ec781b6"},
-    {file = "regex-2022.1.18-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6213713ac743b190ecbf3f316d6e41d099e774812d470422b3a0f137ea635832"},
-    {file = "regex-2022.1.18-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:61ebbcd208d78658b09e19c78920f1ad38936a0aa0f9c459c46c197d11c580a0"},
-    {file = "regex-2022.1.18-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:b013f759cd69cb0a62de954d6d2096d648bc210034b79b1881406b07ed0a83f9"},
-    {file = "regex-2022.1.18-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9187500d83fd0cef4669385cbb0961e227a41c0c9bc39219044e35810793edf7"},
-    {file = "regex-2022.1.18-cp310-cp310-win32.whl", hash = "sha256:94c623c331a48a5ccc7d25271399aff29729fa202c737ae3b4b28b89d2b0976d"},
-    {file = "regex-2022.1.18-cp310-cp310-win_amd64.whl", hash = "sha256:1a171eaac36a08964d023eeff740b18a415f79aeb212169080c170ec42dd5184"},
-    {file = "regex-2022.1.18-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:49810f907dfe6de8da5da7d2b238d343e6add62f01a15d03e2195afc180059ed"},
-    {file = "regex-2022.1.18-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d2f5c3f7057530afd7b739ed42eb04f1011203bc5e4663e1e1d01bb50f813e3"},
-    {file = "regex-2022.1.18-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:85ffd6b1cb0dfb037ede50ff3bef80d9bf7fa60515d192403af6745524524f3b"},
-    {file = "regex-2022.1.18-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba37f11e1d020969e8a779c06b4af866ffb6b854d7229db63c5fdddfceaa917f"},
-    {file = "regex-2022.1.18-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637e27ea1ebe4a561db75a880ac659ff439dec7f55588212e71700bb1ddd5af9"},
-    {file = "regex-2022.1.18-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:37978254d9d00cda01acc1997513f786b6b971e57b778fbe7c20e30ae81a97f3"},
-    {file = "regex-2022.1.18-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e54a1eb9fd38f2779e973d2f8958fd575b532fe26013405d1afb9ee2374e7ab8"},
-    {file = "regex-2022.1.18-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:768632fd8172ae03852e3245f11c8a425d95f65ff444ce46b3e673ae5b057b74"},
-    {file = "regex-2022.1.18-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:de2923886b5d3214be951bc2ce3f6b8ac0d6dfd4a0d0e2a4d2e5523d8046fdfb"},
-    {file = "regex-2022.1.18-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:1333b3ce73269f986b1fa4d5d395643810074dc2de5b9d262eb258daf37dc98f"},
-    {file = "regex-2022.1.18-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:d19a34f8a3429bd536996ad53597b805c10352a8561d8382e05830df389d2b43"},
-    {file = "regex-2022.1.18-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:8d2f355a951f60f0843f2368b39970e4667517e54e86b1508e76f92b44811a8a"},
-    {file = "regex-2022.1.18-cp36-cp36m-win32.whl", hash = "sha256:2245441445099411b528379dee83e56eadf449db924648e5feb9b747473f42e3"},
-    {file = "regex-2022.1.18-cp36-cp36m-win_amd64.whl", hash = "sha256:25716aa70a0d153cd844fe861d4f3315a6ccafce22b39d8aadbf7fcadff2b633"},
-    {file = "regex-2022.1.18-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7e070d3aef50ac3856f2ef5ec7214798453da878bb5e5a16c16a61edf1817cc3"},
-    {file = "regex-2022.1.18-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22709d701e7037e64dae2a04855021b62efd64a66c3ceed99dfd684bfef09e38"},
-    {file = "regex-2022.1.18-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9099bf89078675c372339011ccfc9ec310310bf6c292b413c013eb90ffdcafc"},
-    {file = "regex-2022.1.18-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04611cc0f627fc4a50bc4a9a2e6178a974c6a6a4aa9c1cca921635d2c47b9c87"},
-    {file = "regex-2022.1.18-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:552a39987ac6655dad4bf6f17dd2b55c7b0c6e949d933b8846d2e312ee80005a"},
-    {file = "regex-2022.1.18-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e031899cb2bc92c0cf4d45389eff5b078d1936860a1be3aa8c94fa25fb46ed8"},
-    {file = "regex-2022.1.18-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2dacb3dae6b8cc579637a7b72f008bff50a94cde5e36e432352f4ca57b9e54c4"},
-    {file = "regex-2022.1.18-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e5c31d70a478b0ca22a9d2d76d520ae996214019d39ed7dd93af872c7f301e52"},
-    {file = "regex-2022.1.18-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bb804c7d0bfbd7e3f33924ff49757de9106c44e27979e2492819c16972ec0da2"},
-    {file = "regex-2022.1.18-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:36b2d700a27e168fa96272b42d28c7ac3ff72030c67b32f37c05616ebd22a202"},
-    {file = "regex-2022.1.18-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:16f81025bb3556eccb0681d7946e2b35ff254f9f888cff7d2120e8826330315c"},
-    {file = "regex-2022.1.18-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:da80047524eac2acf7c04c18ac7a7da05a9136241f642dd2ed94269ef0d0a45a"},
-    {file = "regex-2022.1.18-cp37-cp37m-win32.whl", hash = "sha256:6ca45359d7a21644793de0e29de497ef7f1ae7268e346c4faf87b421fea364e6"},
-    {file = "regex-2022.1.18-cp37-cp37m-win_amd64.whl", hash = "sha256:38289f1690a7e27aacd049e420769b996826f3728756859420eeee21cc857118"},
-    {file = "regex-2022.1.18-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6014038f52b4b2ac1fa41a58d439a8a00f015b5c0735a0cd4b09afe344c94899"},
-    {file = "regex-2022.1.18-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0b5d6f9aed3153487252d00a18e53f19b7f52a1651bc1d0c4b5844bc286dfa52"},
-    {file = "regex-2022.1.18-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9d24b03daf7415f78abc2d25a208f234e2c585e5e6f92f0204d2ab7b9ab48e3"},
-    {file = "regex-2022.1.18-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bf594cc7cc9d528338d66674c10a5b25e3cde7dd75c3e96784df8f371d77a298"},
-    {file = "regex-2022.1.18-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd914db437ec25bfa410f8aa0aa2f3ba87cdfc04d9919d608d02330947afaeab"},
-    {file = "regex-2022.1.18-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90b6840b6448203228a9d8464a7a0d99aa8fa9f027ef95fe230579abaf8a6ee1"},
-    {file = "regex-2022.1.18-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11772be1eb1748e0e197a40ffb82fb8fd0d6914cd147d841d9703e2bef24d288"},
-    {file = "regex-2022.1.18-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a602bdc8607c99eb5b391592d58c92618dcd1537fdd87df1813f03fed49957a6"},
-    {file = "regex-2022.1.18-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7e26eac9e52e8ce86f915fd33380f1b6896a2b51994e40bb094841e5003429b4"},
-    {file = "regex-2022.1.18-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:519c0b3a6fbb68afaa0febf0d28f6c4b0a1074aefc484802ecb9709faf181607"},
-    {file = "regex-2022.1.18-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3c7ea86b9ca83e30fa4d4cd0eaf01db3ebcc7b2726a25990966627e39577d729"},
-    {file = "regex-2022.1.18-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:51f02ca184518702975b56affde6c573ebad4e411599005ce4468b1014b4786c"},
-    {file = "regex-2022.1.18-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:385ccf6d011b97768a640e9d4de25412204fbe8d6b9ae39ff115d4ff03f6fe5d"},
-    {file = "regex-2022.1.18-cp38-cp38-win32.whl", hash = "sha256:1f8c0ae0a0de4e19fddaaff036f508db175f6f03db318c80bbc239a1def62d02"},
-    {file = "regex-2022.1.18-cp38-cp38-win_amd64.whl", hash = "sha256:760c54ad1b8a9b81951030a7e8e7c3ec0964c1cb9fee585a03ff53d9e531bb8e"},
-    {file = "regex-2022.1.18-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:93c20777a72cae8620203ac11c4010365706062aa13aaedd1a21bb07adbb9d5d"},
-    {file = "regex-2022.1.18-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6aa427c55a0abec450bca10b64446331b5ca8f79b648531138f357569705bc4a"},
-    {file = "regex-2022.1.18-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c38baee6bdb7fe1b110b6b3aaa555e6e872d322206b7245aa39572d3fc991ee4"},
-    {file = "regex-2022.1.18-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:752e7ddfb743344d447367baa85bccd3629c2c3940f70506eb5f01abce98ee68"},
-    {file = "regex-2022.1.18-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8acef4d8a4353f6678fd1035422a937c2170de58a2b29f7da045d5249e934101"},
-    {file = "regex-2022.1.18-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c73d2166e4b210b73d1429c4f1ca97cea9cc090e5302df2a7a0a96ce55373f1c"},
-    {file = "regex-2022.1.18-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24c89346734a4e4d60ecf9b27cac4c1fee3431a413f7aa00be7c4d7bbacc2c4d"},
-    {file = "regex-2022.1.18-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:596f5ae2eeddb79b595583c2e0285312b2783b0ec759930c272dbf02f851ff75"},
-    {file = "regex-2022.1.18-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ecfe51abf7f045e0b9cdde71ca9e153d11238679ef7b5da6c82093874adf3338"},
-    {file = "regex-2022.1.18-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1d6301f5288e9bdca65fab3de6b7de17362c5016d6bf8ee4ba4cbe833b2eda0f"},
-    {file = "regex-2022.1.18-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:93cce7d422a0093cfb3606beae38a8e47a25232eea0f292c878af580a9dc7605"},
-    {file = "regex-2022.1.18-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cf0db26a1f76aa6b3aa314a74b8facd586b7a5457d05b64f8082a62c9c49582a"},
-    {file = "regex-2022.1.18-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:defa0652696ff0ba48c8aff5a1fac1eef1ca6ac9c660b047fc8e7623c4eb5093"},
-    {file = "regex-2022.1.18-cp39-cp39-win32.whl", hash = "sha256:6db1b52c6f2c04fafc8da17ea506608e6be7086715dab498570c3e55e4f8fbd1"},
-    {file = "regex-2022.1.18-cp39-cp39-win_amd64.whl", hash = "sha256:ebaeb93f90c0903233b11ce913a7cb8f6ee069158406e056f884854c737d2442"},
-    {file = "regex-2022.1.18.tar.gz", hash = "sha256:97f32dc03a8054a4c4a5ab5d761ed4861e828b2c200febd4e46857069a483916"},
+    {file = "regex-2022.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ab69b4fe09e296261377d209068d52402fb85ef89dc78a9ac4a29a895f4e24a7"},
+    {file = "regex-2022.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5bc5f921be39ccb65fdda741e04b2555917a4bced24b4df14eddc7569be3b493"},
+    {file = "regex-2022.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:43eba5c46208deedec833663201752e865feddc840433285fbadee07b84b464d"},
+    {file = "regex-2022.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c68d2c04f7701a418ec2e5631b7f3552efc32f6bcc1739369c6eeb1af55f62e0"},
+    {file = "regex-2022.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:caa2734ada16a44ae57b229d45091f06e30a9a52ace76d7574546ab23008c635"},
+    {file = "regex-2022.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef806f684f17dbd6263d72a54ad4073af42b42effa3eb42b877e750c24c76f86"},
+    {file = "regex-2022.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:be319f4eb400ee567b722e9ea63d5b2bb31464e3cf1b016502e3ee2de4f86f5c"},
+    {file = "regex-2022.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:42bb37e2b2d25d958c25903f6125a41aaaa1ed49ca62c103331f24b8a459142f"},
+    {file = "regex-2022.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:fbc88d3ba402b5d041d204ec2449c4078898f89c4a6e6f0ed1c1a510ef1e221d"},
+    {file = "regex-2022.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:91e0f7e7be77250b808a5f46d90bf0032527d3c032b2131b63dee54753a4d729"},
+    {file = "regex-2022.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:cb3652bbe6720786b9137862205986f3ae54a09dec8499a995ed58292bdf77c2"},
+    {file = "regex-2022.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:878c626cbca3b649e14e972c14539a01191d79e58934e3f3ef4a9e17f90277f8"},
+    {file = "regex-2022.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6df070a986fc064d865c381aecf0aaff914178fdf6874da2f2387e82d93cc5bd"},
+    {file = "regex-2022.3.2-cp310-cp310-win32.whl", hash = "sha256:b549d851f91a4efb3e65498bd4249b1447ab6035a9972f7fc215eb1f59328834"},
+    {file = "regex-2022.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:8babb2b5751105dc0aef2a2e539f4ba391e738c62038d8cb331c710f6b0f3da7"},
+    {file = "regex-2022.3.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:1977bb64264815d3ef016625adc9df90e6d0e27e76260280c63eca993e3f455f"},
+    {file = "regex-2022.3.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e73652057473ad3e6934944af090852a02590c349357b79182c1b681da2c772"},
+    {file = "regex-2022.3.2-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b22ff939a8856a44f4822da38ef4868bd3a9ade22bb6d9062b36957c850e404f"},
+    {file = "regex-2022.3.2-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:878f5d649ba1db9f52cc4ef491f7dba2d061cdc48dd444c54260eebc0b1729b9"},
+    {file = "regex-2022.3.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0008650041531d0eadecc96a73d37c2dc4821cf51b0766e374cb4f1ddc4e1c14"},
+    {file = "regex-2022.3.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:06b1df01cf2aef3a9790858af524ae2588762c8a90e784ba00d003f045306204"},
+    {file = "regex-2022.3.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:57484d39447f94967e83e56db1b1108c68918c44ab519b8ecfc34b790ca52bf7"},
+    {file = "regex-2022.3.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:74d86e8924835f863c34e646392ef39039405f6ce52956d8af16497af4064a30"},
+    {file = "regex-2022.3.2-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:ae17fc8103f3b63345709d3e9654a274eee1c6072592aec32b026efd401931d0"},
+    {file = "regex-2022.3.2-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:5f92a7cdc6a0ae2abd184e8dfd6ef2279989d24c85d2c85d0423206284103ede"},
+    {file = "regex-2022.3.2-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:5dcc4168536c8f68654f014a3db49b6b4a26b226f735708be2054314ed4964f4"},
+    {file = "regex-2022.3.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:1e30762ddddb22f7f14c4f59c34d3addabc789216d813b0f3e2788d7bcf0cf29"},
+    {file = "regex-2022.3.2-cp36-cp36m-win32.whl", hash = "sha256:286ff9ec2709d56ae7517040be0d6c502642517ce9937ab6d89b1e7d0904f863"},
+    {file = "regex-2022.3.2-cp36-cp36m-win_amd64.whl", hash = "sha256:d326ff80ed531bf2507cba93011c30fff2dd51454c85f55df0f59f2030b1687b"},
+    {file = "regex-2022.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9d828c5987d543d052b53c579a01a52d96b86f937b1777bbfe11ef2728929357"},
+    {file = "regex-2022.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c87ac58b9baaf50b6c1b81a18d20eda7e2883aa9a4fb4f1ca70f2e443bfcdc57"},
+    {file = "regex-2022.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d6c2441538e4fadd4291c8420853431a229fcbefc1bf521810fbc2629d8ae8c2"},
+    {file = "regex-2022.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f3356afbb301ec34a500b8ba8b47cba0b44ed4641c306e1dd981a08b416170b5"},
+    {file = "regex-2022.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d96eec8550fd2fd26f8e675f6d8b61b159482ad8ffa26991b894ed5ee19038b"},
+    {file = "regex-2022.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf668f26604e9f7aee9f8eaae4ca07a948168af90b96be97a4b7fa902a6d2ac1"},
+    {file = "regex-2022.3.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0eb0e2845e81bdea92b8281a3969632686502565abf4a0b9e4ab1471c863d8f3"},
+    {file = "regex-2022.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:87bc01226cd288f0bd9a4f9f07bf6827134dc97a96c22e2d28628e824c8de231"},
+    {file = "regex-2022.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:09b4b6ccc61d4119342b26246ddd5a04accdeebe36bdfe865ad87a0784efd77f"},
+    {file = "regex-2022.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:9557545c10d52c845f270b665b52a6a972884725aa5cf12777374e18f2ea8960"},
+    {file = "regex-2022.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:0be0c34a39e5d04a62fd5342f0886d0e57592a4f4993b3f9d257c1f688b19737"},
+    {file = "regex-2022.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7b103dffb9f6a47ed7ffdf352b78cfe058b1777617371226c1894e1be443afec"},
+    {file = "regex-2022.3.2-cp37-cp37m-win32.whl", hash = "sha256:f8169ec628880bdbca67082a9196e2106060a4a5cbd486ac51881a4df805a36f"},
+    {file = "regex-2022.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:4b9c16a807b17b17c4fa3a1d8c242467237be67ba92ad24ff51425329e7ae3d0"},
+    {file = "regex-2022.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:67250b36edfa714ba62dc62d3f238e86db1065fccb538278804790f578253640"},
+    {file = "regex-2022.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5510932596a0f33399b7fff1bd61c59c977f2b8ee987b36539ba97eb3513584a"},
+    {file = "regex-2022.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6f7ee2289176cb1d2c59a24f50900f8b9580259fa9f1a739432242e7d254f93"},
+    {file = "regex-2022.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86d7a68fa53688e1f612c3246044157117403c7ce19ebab7d02daf45bd63913e"},
+    {file = "regex-2022.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aaf5317c961d93c1a200b9370fb1c6b6836cc7144fef3e5a951326912bf1f5a3"},
+    {file = "regex-2022.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad397bc7d51d69cb07ef89e44243f971a04ce1dca9bf24c992c362406c0c6573"},
+    {file = "regex-2022.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:297c42ede2c81f0cb6f34ea60b5cf6dc965d97fa6936c11fc3286019231f0d66"},
+    {file = "regex-2022.3.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:af4d8cc28e4c7a2f6a9fed544228c567340f8258b6d7ea815b62a72817bbd178"},
+    {file = "regex-2022.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:452519bc4c973e961b1620c815ea6dd8944a12d68e71002be5a7aff0a8361571"},
+    {file = "regex-2022.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:cb34c2d66355fb70ae47b5595aafd7218e59bb9c00ad8cc3abd1406ca5874f07"},
+    {file = "regex-2022.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3d146e5591cb67c5e836229a04723a30af795ef9b70a0bbd913572e14b7b940f"},
+    {file = "regex-2022.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:03299b0bcaa7824eb7c0ebd7ef1e3663302d1b533653bfe9dc7e595d453e2ae9"},
+    {file = "regex-2022.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:9ccb0a4ab926016867260c24c192d9df9586e834f5db83dfa2c8fffb3a6e5056"},
+    {file = "regex-2022.3.2-cp38-cp38-win32.whl", hash = "sha256:f7e8f1ee28e0a05831c92dc1c0c1c94af5289963b7cf09eca5b5e3ce4f8c91b0"},
+    {file = "regex-2022.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:35ed2f3c918a00b109157428abfc4e8d1ffabc37c8f9abc5939ebd1e95dabc47"},
+    {file = "regex-2022.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:55820bc631684172b9b56a991d217ec7c2e580d956591dc2144985113980f5a3"},
+    {file = "regex-2022.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:83f03f0bd88c12e63ca2d024adeee75234d69808b341e88343b0232329e1f1a1"},
+    {file = "regex-2022.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42d6007722d46bd2c95cce700181570b56edc0dcbadbfe7855ec26c3f2d7e008"},
+    {file = "regex-2022.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:320c2f4106962ecea0f33d8d31b985d3c185757c49c1fb735501515f963715ed"},
+    {file = "regex-2022.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbd3fe37353c62fd0eb19fb76f78aa693716262bcd5f9c14bb9e5aca4b3f0dc4"},
+    {file = "regex-2022.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17e51ad1e6131c496b58d317bc9abec71f44eb1957d32629d06013a21bc99cac"},
+    {file = "regex-2022.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:72bc3a5effa5974be6d965ed8301ac1e869bc18425c8a8fac179fbe7876e3aee"},
+    {file = "regex-2022.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e5602a9b5074dcacc113bba4d2f011d2748f50e3201c8139ac5b68cf2a76bd8b"},
+    {file = "regex-2022.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:729aa8ca624c42f309397c5fc9e21db90bf7e2fdd872461aabdbada33de9063c"},
+    {file = "regex-2022.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d6ecfd1970b3380a569d7b3ecc5dd70dba295897418ed9e31ec3c16a5ab099a5"},
+    {file = "regex-2022.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:13bbf0c9453c6d16e5867bda7f6c0c7cff1decf96c5498318bb87f8136d2abd4"},
+    {file = "regex-2022.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:58ba41e462653eaf68fc4a84ec4d350b26a98d030be1ab24aba1adcc78ffe447"},
+    {file = "regex-2022.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c0446b2871335d5a5e9fcf1462f954586b09a845832263db95059dcd01442015"},
+    {file = "regex-2022.3.2-cp39-cp39-win32.whl", hash = "sha256:20e6a27959f162f979165e496add0d7d56d7038237092d1aba20b46de79158f1"},
+    {file = "regex-2022.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:9efa41d1527b366c88f265a227b20bcec65bda879962e3fc8a2aee11e81266d7"},
+    {file = "regex-2022.3.2.tar.gz", hash = "sha256:79e5af1ff258bc0fe0bdd6f69bc4ae33935a898e3cbefbbccf22e88a27fa053b"},
 ]
 toml = [
    {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
@ -885,12 +887,12 @@ typed-ast = [
    {file = "typed_ast-1.5.2.tar.gz", hash = "sha256:525a2d4088e70a9f75b08b3f87a51acc9cde640e19cc523c7e41aa355564ae27"},
 ]
 types-setuptools = [
-    {file = "types-setuptools-57.4.9.tar.gz", hash = "sha256:536ef74744f8e1e4be4fc719887f886e74e4cf3c792b4a06984320be4df450b5"},
-    {file = "types_setuptools-57.4.9-py3-none-any.whl", hash = "sha256:948dc6863373750e2cd0b223a84f1fb608414cde5e55cf38ea657b93aeb411d2"},
+    {file = "types-setuptools-57.4.10.tar.gz", hash = "sha256:9a13513679c640f6616e2d9ab50d431c99ca8ae9848a97243f887c80fd5cf294"},
+    {file = "types_setuptools-57.4.10-py3-none-any.whl", hash = "sha256:ddc98da82c12e1208012d65276641a132d3aadc78ecfff68fd3e17d85933a3c1"},
 ]
 typing-extensions = [
-    {file = "typing_extensions-4.0.1-py3-none-any.whl", hash = "sha256:7f001e5ac290a0c0401508864c7ec868be4e701886d5b573a9528ed3973d9d3b"},
-    {file = "typing_extensions-4.0.1.tar.gz", hash = "sha256:4ca091dea149f945ec56afb48dae714f21e8692ef22a395223bcd328961b6a0e"},
+    {file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"},
+    {file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"},
 ]
 wcwidth = [
    {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"},
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,6 +4,7 @@ version = "2.6.0"
 description = "Look up the frequencies of words in many languages, based on many sources of data."
 authors = ["Robyn Speer <rspeer@arborelia.net>"]
 license = "MIT"
+readme = "README.md"

 [tool.poetry.dependencies]
 python = "^3.7"
--- a/tests/test_apostrophes.py
+++ b/tests/test_apostrophes.py
@ -3,17 +3,21 @@ from wordfreq import tokenize, word_frequency

 def test_apostrophes():
    # Test that we handle apostrophes in French reasonably.
-    assert tokenize("qu'un", 'fr') == ['qu', 'un']
-    assert tokenize("qu'un", 'fr', include_punctuation=True) == ["qu'", "un"]
-    assert tokenize("langues d'oïl", 'fr') == ['langues', "d", 'oïl']
-    assert tokenize("langues d'oïl", 'fr', include_punctuation=True) == ['langues', "d'", 'oïl']
-    assert tokenize("l'heure", 'fr') == ['l', 'heure']
-    assert tokenize("l'ànima", 'ca') == ['l', 'ànima']
-    assert tokenize("l'anima", 'it') == ['l', 'anima']
-    assert tokenize("l'heure", 'fr', include_punctuation=True) == ["l'", 'heure']
-    assert tokenize("L'Hôpital", 'fr', include_punctuation=True) == ["l'", 'hôpital']
-    assert tokenize("aujourd'hui", 'fr') == ["aujourd'hui"]
-    assert tokenize("This isn't French", 'en') == ['this', "isn't", 'french']
+    assert tokenize("qu'un", "fr") == ["qu", "un"]
+    assert tokenize("qu'un", "fr", include_punctuation=True) == ["qu'", "un"]
+    assert tokenize("langues d'oïl", "fr") == ["langues", "d", "oïl"]
+    assert tokenize("langues d'oïl", "fr", include_punctuation=True) == [
+        "langues",
+        "d'",
+        "oïl",
+    ]
+    assert tokenize("l'heure", "fr") == ["l", "heure"]
+    assert tokenize("l'ànima", "ca") == ["l", "ànima"]
+    assert tokenize("l'anima", "it") == ["l", "anima"]
+    assert tokenize("l'heure", "fr", include_punctuation=True) == ["l'", "heure"]
+    assert tokenize("L'Hôpital", "fr", include_punctuation=True) == ["l'", "hôpital"]
+    assert tokenize("aujourd'hui", "fr") == ["aujourd'hui"]
+    assert tokenize("This isn't French", "en") == ["this", "isn't", "french"]

    # This next behavior is not ideal -- we would prefer "dell'" to be handled
    # the same as "l'" -- but this is the most consistent result we can get without
@ -21,26 +25,28 @@ def test_apostrophes():
    #
    # Versions of regex from 2019 and earlier would give ['dell', 'anima'], which
    # is better but inconsistent.
-    assert tokenize("dell'anima", 'it') == ["dell'anima"]
+    assert tokenize("dell'anima", "it") == ["dell'anima"]

    # Versions of regex from 2019 and earlier would give ['hawai', 'i'], and that's
    # an example of why we don't want the apostrophe-vowel fix to apply everywhere.
-    assert tokenize("hawai'i", 'en') == ["hawai'i"]
+    assert tokenize("hawai'i", "en") == ["hawai'i"]


 def test_catastrophes():
    # More apostrophes, but this time they're in Catalan, and there's other
    # mid-word punctuation going on too.
-    assert tokenize("M'acabo d'instal·lar.", 'ca') == ['m', 'acabo', 'd', 'instal·lar']
-    assert (
-        tokenize("M'acabo d'instal·lar.", 'ca', include_punctuation=True) ==
-        ["m'", 'acabo', "d'", 'instal·lar', '.']
-    )
+    assert tokenize("M'acabo d'instal·lar.", "ca") == ["m", "acabo", "d", "instal·lar"]
+    assert tokenize("M'acabo d'instal·lar.", "ca", include_punctuation=True) == [
+        "m'",
+        "acabo",
+        "d'",
+        "instal·lar",
+        ".",
+    ]


 def test_alternate_codes():
    # Try over-long language codes for French and Catalan
-    assert tokenize("qu'un", 'fra') == ['qu', 'un']
-    assert tokenize("qu'un", 'fre') == ['qu', 'un']
-    assert tokenize("M'acabo d'instal·lar.", 'cat') == ['m', 'acabo', 'd', 'instal·lar']
-
+    assert tokenize("qu'un", "fra") == ["qu", "un"]
+    assert tokenize("qu'un", "fre") == ["qu", "un"]
+    assert tokenize("M'acabo d'instal·lar.", "cat") == ["m", "acabo", "d", "instal·lar"]
--- a/tests/test_at_sign.py
+++ b/tests/test_at_sign.py
@ -14,12 +14,12 @@ def test_gender_neutral_at():
        "tod@s",
        "l@s",
        "trabajador@s",
-        "migrantes"
+        "migrantes",
    ]

    text = "el distrito 22@ de Barcelona"
-    assert tokenize(text, 'es') == ["el", "distrito", "22@", "de", "barcelona"]
-    assert lossy_tokenize(text, 'es') == ["el", "distrito", "00@", "de", "barcelona"]
+    assert tokenize(text, "es") == ["el", "distrito", "22@", "de", "barcelona"]
+    assert lossy_tokenize(text, "es") == ["el", "distrito", "22@", "de", "barcelona"]

    # It also appears in Portuguese
    text = "direitos e deveres para @s membr@s da comunidade virtual"
@ -32,7 +32,7 @@ def test_gender_neutral_at():
        "membr@s",
        "da",
        "comunidade",
-        "virtual"
+        "virtual",
    ]

    # Because this is part of our tokenization, the language code doesn't
@ -43,10 +43,10 @@ def test_gender_neutral_at():

 def test_at_in_corpus():
    # We have a word frequency for "l@s"
-    assert word_frequency('l@s', 'es') > 0
+    assert word_frequency("l@s", "es") > 0

    # It's not just treated as a word break
-    assert word_frequency('l@s', 'es') < word_frequency('l s', 'es')
+    assert word_frequency("l@s", "es") < word_frequency("l s", "es")


 def test_punctuation_at():
@ -65,7 +65,7 @@ def test_punctuation_at():
        "ao",
        "lado",
        "do",
-        "nick"
+        "nick",
    ]

    assert tokenize(text, "pt", include_punctuation=True) == [
@ -83,7 +83,7 @@ def test_punctuation_at():
        "ao",
        "lado",
        "do",
-        "nick"
+        "nick",
    ]

    # If the @ is not at the end of the word or part of the word ending '@s',
@ -98,12 +98,9 @@ def test_punctuation_at():
        "la",
        "línea",
        "all:all",
-        "all"
+        "all",
    ]

    # Make sure not to catch e-mail addresses
    text = "info@something.example"
-    assert tokenize(text, "en") == [
-        "info",
-        "something.example"
-    ]
+    assert tokenize(text, "en") == ["info", "something.example"]
--- a/tests/test_chinese.py
+++ b/tests/test_chinese.py
@ -9,92 +9,112 @@ def test_tokens():
    # (He was the Chinese Wikipedia's featured article of the day when I
    # wrote this test.)

-    hobart = '加勒特·霍巴特'  # Garret Hobart, or "jiā lè tè huò bā tè".
+    hobart = "加勒特·霍巴特"  # Garret Hobart, or "jiā lè tè huò bā tè".

    # He was the sixth American vice president to die in office.
-    fact_simplified  = '他是历史上第六位在任期内去世的美国副总统。'
-    fact_traditional = '他是歷史上第六位在任期內去世的美國副總統。'
+    fact_simplified = "他是历史上第六位在任期内去世的美国副总统。"
+    fact_traditional = "他是歷史上第六位在任期內去世的美國副總統。"

    # His name breaks into five pieces, with the only piece staying together
    # being the one that means 'Bart'. The dot is not included as a token.
-    assert tokenize(hobart, 'zh') == ['加', '勒', '特', '霍', '巴特']
+    assert tokenize(hobart, "zh") == ["加", "勒", "特", "霍", "巴特"]

-    assert tokenize(fact_simplified, 'zh') == [
+    assert tokenize(fact_simplified, "zh") == [
        # he / is / history / in / #6 / counter for people
-        '他', '是',  '历史', '上', '第六', '位',
+        "他",
+        "是",
+        "历史",
+        "上",
+        "第六",
+        "位",
        # during / term of office / in / die
-        '在', '任期', '内', '去世',
+        "在",
+        "任期",
+        "内",
+        "去世",
        # of / U.S. / deputy / president
-        '的', '美国', '副', '总统'
+        "的",
+        "美国",
+        "副",
+        "总统",
    ]

    # Jieba's original tokenizer knows a lot of names, it seems.
-    assert tokenize(hobart, 'zh', external_wordlist=True) == ['加勒特', '霍巴特']
+    assert tokenize(hobart, "zh", external_wordlist=True) == ["加勒特", "霍巴特"]

    # We get almost the same tokens from the sentence using Jieba's own
    # wordlist, but it tokenizes "in history" as two words and
    # "sixth person" as one.
-    assert tokenize(fact_simplified, 'zh', external_wordlist=True) == [
+    assert tokenize(fact_simplified, "zh", external_wordlist=True) == [
        # he / is / history / in / sixth person
-        '他', '是', '历史', '上', '第六位',
+        "他",
+        "是",
+        "历史",
+        "上",
+        "第六位",
        # during / term of office / in / die
-        '在', '任期', '内', '去世',
+        "在",
+        "任期",
+        "内",
+        "去世",
        # of / U.S. / deputy / president
-        '的', '美国', '副', '总统'
+        "的",
+        "美国",
+        "副",
+        "总统",
    ]

    # Check that Traditional Chinese works at all
-    assert word_frequency(fact_traditional, 'zh') > 0
+    assert word_frequency(fact_traditional, "zh") > 0

    # You get the same token lengths if you look it up in Traditional Chinese,
    # but the words are different
-    simp_tokens = tokenize(fact_simplified, 'zh', include_punctuation=True)
-    trad_tokens = tokenize(fact_traditional, 'zh', include_punctuation=True)
-    assert ''.join(simp_tokens) == fact_simplified
-    assert ''.join(trad_tokens) == fact_traditional
+    simp_tokens = tokenize(fact_simplified, "zh", include_punctuation=True)
+    trad_tokens = tokenize(fact_traditional, "zh", include_punctuation=True)
+    assert "".join(simp_tokens) == fact_simplified
+    assert "".join(trad_tokens) == fact_traditional
    simp_lengths = [len(token) for token in simp_tokens]
    trad_lengths = [len(token) for token in trad_tokens]
    assert simp_lengths == trad_lengths


 def test_combination():
-    xiexie_freq = word_frequency('谢谢', 'zh')   # "Thanks"
-    assert word_frequency('谢谢谢谢', 'zh') == pytest.approx(xiexie_freq / 20, rel=0.01)
+    xiexie_freq = word_frequency("谢谢", "zh")  # "Thanks"
+    assert word_frequency("谢谢谢谢", "zh") == pytest.approx(xiexie_freq / 20, rel=0.01)


 def test_alternate_codes():
    # Tokenization of Chinese works when you use other language codes
    # that are not equal to 'zh'.
-    tokens = ['谢谢', '谢谢']
+    tokens = ["谢谢", "谢谢"]

    # Code with a region attached
-    assert tokenize('谢谢谢谢', 'zh-CN') == tokens
+    assert tokenize("谢谢谢谢", "zh-CN") == tokens

    # Over-long codes for Chinese
-    assert tokenize('谢谢谢谢', 'chi') == tokens
-    assert tokenize('谢谢谢谢', 'zho') == tokens
+    assert tokenize("谢谢谢谢", "chi") == tokens
+    assert tokenize("谢谢谢谢", "zho") == tokens

    # Separate codes for Mandarin and Cantonese
-    assert tokenize('谢谢谢谢', 'cmn') == tokens
-    assert tokenize('谢谢谢谢', 'yue') == tokens
+    assert tokenize("谢谢谢谢", "cmn") == tokens
+    assert tokenize("谢谢谢谢", "yue") == tokens


 def test_unreasonably_long():
    # This crashed earlier versions of wordfreq due to an overflow in
    # exponentiation. We've now changed the sequence of operations so it
    # will underflow instead.
-    lots_of_ls = 'l' * 800
-    assert word_frequency(lots_of_ls, 'zh') == 0.
-    assert zipf_frequency(lots_of_ls, 'zh') == 0.
+    lots_of_ls = "l" * 800
+    assert word_frequency(lots_of_ls, "zh") == 0.0
+    assert zipf_frequency(lots_of_ls, "zh") == 0.0


 def test_hyphens():
    # An edge case of Chinese tokenization that changed sometime around
    # jieba 0.42.

-    tok = tokenize('--------', 'zh', include_punctuation=True)
-    assert tok == ['-'] * 8
-    
-    tok = tokenize('--------', 'zh', include_punctuation=True, external_wordlist=True)
-    assert tok == ['--------']
+    tok = tokenize("--------", "zh", include_punctuation=True)
+    assert tok == ["-"] * 8

+    tok = tokenize("--------", "zh", include_punctuation=True, external_wordlist=True)
+    assert tok == ["--------"]
--- a/tests/test_general.py
+++ b/tests/test_general.py
@ -1,16 +1,22 @@
 from wordfreq import (
-    word_frequency, available_languages, cB_to_freq,
-    top_n_list, random_words, random_ascii_words, tokenize, lossy_tokenize
+    word_frequency,
+    available_languages,
+    cB_to_freq,
+    top_n_list,
+    random_words,
+    random_ascii_words,
+    tokenize,
+    lossy_tokenize,
 )
 import pytest


 def test_freq_examples():
    # Stopwords are most common in the correct language
-    assert word_frequency('the', 'en') > word_frequency('de', 'en')
-    assert word_frequency('de', 'es') > word_frequency('the', 'es')
+    assert word_frequency("the", "en") > word_frequency("de", "en")
+    assert word_frequency("de", "es") > word_frequency("the", "es")
    # We get word frequencies from the 'large' list when available
-    assert word_frequency('infrequency', 'en') > 0.
+    assert word_frequency("infrequency", "en") > 0.0


 def test_languages():
@ -20,33 +26,33 @@ def test_languages():
    assert len(avail) >= 34

    # 'small' covers the same languages, but with some different lists
-    avail_small = available_languages('small')
+    avail_small = available_languages("small")
    assert len(avail_small) == len(avail)
    assert avail_small != avail

    # 'combined' is the same as 'small'
-    avail_old_name = available_languages('combined')
+    avail_old_name = available_languages("combined")
    assert avail_old_name == avail_small

    # 'large' covers fewer languages
-    avail_large = available_languages('large')
+    avail_large = available_languages("large")
    assert len(avail_large) >= 14
    assert len(avail) > len(avail_large)

    # Look up the digit '2' in the main word list for each language
    for lang in avail:
-        assert word_frequency('2', lang) > 0
+        assert word_frequency("2", lang) > 0

        # Make up a weirdly verbose language code and make sure
        # we still get it
-        new_lang_code = '%s-001-x-fake-extension' % lang.upper()
-        assert word_frequency('2', new_lang_code) > 0
+        new_lang_code = "%s-001-x-fake-ext" % lang.upper()
+        assert word_frequency("2", new_lang_code) > 0


 def test_minimums():
-    assert word_frequency('esquivalience', 'en') == 0
-    assert word_frequency('esquivalience', 'en', minimum=1e-6) == 1e-6
-    assert word_frequency('the', 'en', minimum=1) == 1
+    assert word_frequency("esquivalience", "en") == 0
+    assert word_frequency("esquivalience", "en", minimum=1e-6) == 1e-6
+    assert word_frequency("the", "en", minimum=1) == 1


 def test_most_common_words():
@ -59,61 +65,61 @@ def test_most_common_words():
        """
        return top_n_list(lang, 1)[0]

-    assert get_most_common('ar') == 'في'
-    assert get_most_common('bg') == 'на'
-    assert get_most_common('bn') == 'না'
-    assert get_most_common('ca') == 'de'
-    assert get_most_common('cs') == 'a'
-    assert get_most_common('da') == 'i'
-    assert get_most_common('el') == 'και'
-    assert get_most_common('de') == 'die'
-    assert get_most_common('en') == 'the'
-    assert get_most_common('es') == 'de'
-    assert get_most_common('fi') == 'ja'
-    assert get_most_common('fil') == 'sa'
-    assert get_most_common('fr') == 'de'
-    assert get_most_common('he') == 'את'
-    assert get_most_common('hi') == 'के'
-    assert get_most_common('hu') == 'a'
-    assert get_most_common('id') == 'yang'
-    assert get_most_common('is') == 'og'
-    assert get_most_common('it') == 'di'
-    assert get_most_common('ja') == 'の'
-    assert get_most_common('ko') == '이'
-    assert get_most_common('lt') == 'ir'
-    assert get_most_common('lv') == 'un'
-    assert get_most_common('mk') == 'на'
-    assert get_most_common('ms') == 'yang'
-    assert get_most_common('nb') == 'i'
-    assert get_most_common('nl') == 'de'
-    assert get_most_common('pl') == 'w'
-    assert get_most_common('pt') == 'de'
-    assert get_most_common('ro') == 'de'
-    assert get_most_common('ru') == 'в'
-    assert get_most_common('sh') == 'je'
-    assert get_most_common('sk') == 'a'
-    assert get_most_common('sl') == 'je'
-    assert get_most_common('sv') == 'är'
-    assert get_most_common('ta') == 'ஒரு'
-    assert get_most_common('tr') == 've'
-    assert get_most_common('uk') == 'в'
-    assert get_most_common('ur') == 'کے'
-    assert get_most_common('vi') == 'là'
-    assert get_most_common('zh') == '的'
+    assert get_most_common("ar") == "في"
+    assert get_most_common("bg") == "на"
+    assert get_most_common("bn") == "না"
+    assert get_most_common("ca") == "de"
+    assert get_most_common("cs") == "a"
+    assert get_most_common("da") == "i"
+    assert get_most_common("el") == "και"
+    assert get_most_common("de") == "die"
+    assert get_most_common("en") == "the"
+    assert get_most_common("es") == "de"
+    assert get_most_common("fi") == "ja"
+    assert get_most_common("fil") == "sa"
+    assert get_most_common("fr") == "de"
+    assert get_most_common("he") == "את"
+    assert get_most_common("hi") == "के"
+    assert get_most_common("hu") == "a"
+    assert get_most_common("id") == "yang"
+    assert get_most_common("is") == "og"
+    assert get_most_common("it") == "di"
+    assert get_most_common("ja") == "の"
+    assert get_most_common("ko") == "이"
+    assert get_most_common("lt") == "ir"
+    assert get_most_common("lv") == "un"
+    assert get_most_common("mk") == "на"
+    assert get_most_common("ms") == "yang"
+    assert get_most_common("nb") == "i"
+    assert get_most_common("nl") == "de"
+    assert get_most_common("pl") == "w"
+    assert get_most_common("pt") == "de"
+    assert get_most_common("ro") == "de"
+    assert get_most_common("ru") == "в"
+    assert get_most_common("sh") == "je"
+    assert get_most_common("sk") == "a"
+    assert get_most_common("sl") == "je"
+    assert get_most_common("sv") == "är"
+    assert get_most_common("ta") == "ஒரு"
+    assert get_most_common("tr") == "ve"
+    assert get_most_common("uk") == "в"
+    assert get_most_common("ur") == "کے"
+    assert get_most_common("vi") == "là"
+    assert get_most_common("zh") == "的"


 def test_language_matching():
-    freq = word_frequency('的', 'zh')
-    assert word_frequency('的', 'zh-TW') == freq
-    assert word_frequency('的', 'zh-CN') == freq
-    assert word_frequency('的', 'zh-Hant') == freq
-    assert word_frequency('的', 'zh-Hans') == freq
-    assert word_frequency('的', 'yue-HK') == freq
-    assert word_frequency('的', 'cmn') == freq
+    freq = word_frequency("的", "zh")
+    assert word_frequency("的", "zh-TW") == freq
+    assert word_frequency("的", "zh-CN") == freq
+    assert word_frequency("的", "zh-Hant") == freq
+    assert word_frequency("的", "zh-Hans") == freq
+    assert word_frequency("的", "yue-CN") == freq
+    assert word_frequency("的", "cmn") == freq


 def test_cB_conversion():
-    assert cB_to_freq(0) == 1.
+    assert cB_to_freq(0) == 1.0
    assert cB_to_freq(-100) == pytest.approx(0.1)
    assert cB_to_freq(-600) == pytest.approx(1e-6)

@ -126,101 +132,125 @@ def test_failed_cB_conversion():
 def test_tokenization():
    # We preserve apostrophes within words, so "can't" is a single word in the
    # data
-    assert (
-        tokenize("I don't split at apostrophes, you see.", 'en')
-        == ['i', "don't", 'split', 'at', 'apostrophes', 'you', 'see']
-    )
+    assert tokenize("I don't split at apostrophes, you see.", "en") == [
+        "i",
+        "don't",
+        "split",
+        "at",
+        "apostrophes",
+        "you",
+        "see",
+    ]

-    assert (
-        tokenize("I don't split at apostrophes, you see.", 'en', include_punctuation=True)
-        == ['i', "don't", 'split', 'at', 'apostrophes', ',', 'you', 'see', '.']
-    )
+    assert tokenize(
+        "I don't split at apostrophes, you see.", "en", include_punctuation=True
+    ) == ["i", "don't", "split", "at", "apostrophes", ",", "you", "see", "."]

    # Certain punctuation does not inherently split a word.
-    assert (
-        tokenize("Anything is possible at zombo.com", 'en')
-        == ['anything', 'is', 'possible', 'at', 'zombo.com']
-    )
+    assert tokenize("Anything is possible at zombo.com", "en") == [
+        "anything",
+        "is",
+        "possible",
+        "at",
+        "zombo.com",
+    ]

    # Splits occur after symbols, and at splitting punctuation such as hyphens.
-    assert tokenize('😂test', 'en') == ['😂', 'test']
-    assert tokenize("flip-flop", 'en') == ['flip', 'flop']
-    assert (
-        tokenize('this text has... punctuation :)', 'en', include_punctuation=True)
-        == ['this', 'text', 'has', '...', 'punctuation', ':)']
-    )
+    assert tokenize("😂test", "en") == ["😂", "test"]
+    assert tokenize("flip-flop", "en") == ["flip", "flop"]
+    assert tokenize(
+        "this text has... punctuation :)", "en", include_punctuation=True
+    ) == ["this", "text", "has", "...", "punctuation", ":)"]

    # Multi-codepoint emoji sequences such as 'medium-skinned woman with headscarf'
    # and 'David Bowie' stay together, because our Unicode segmentation algorithm
    # is up to date
-    assert tokenize('emoji test 🧕🏽', 'en') == ['emoji', 'test', '🧕🏽']
-    assert (
-        tokenize("👨‍🎤 Planet Earth is blue, and there's nothing I can do 🌎🚀", 'en')
-        == ['👨‍🎤', 'planet', 'earth', 'is', 'blue', 'and', "there's",
-            'nothing', 'i', 'can', 'do', '🌎', '🚀']
-    )
+    assert tokenize("emoji test 🧕🏽", "en") == ["emoji", "test", "🧕🏽"]
+    assert tokenize(
+        "👨‍🎤 Planet Earth is blue, and there's nothing I can do 🌎🚀", "en"
+    ) == [
+        "👨‍🎤",
+        "planet",
+        "earth",
+        "is",
+        "blue",
+        "and",
+        "there's",
+        "nothing",
+        "i",
+        "can",
+        "do",
+        "🌎",
+        "🚀",
+    ]

    # Water wave, surfer, flag of California (indicates ridiculously complete support
    # for Unicode 10 and Emoji 5.0)
-    assert tokenize("Surf's up 🌊🏄🏴󠁵󠁳󠁣󠁡󠁿'",'en') == ["surf's", "up", "🌊", "🏄", "🏴󠁵󠁳󠁣󠁡󠁿"]
+    assert tokenize("Surf's up 🌊🏄🏴󠁵󠁳󠁣󠁡󠁿'", "en") == ["surf's", "up", "🌊", "🏄", "🏴󠁵󠁳󠁣󠁡󠁿"]


 def test_casefolding():
-    assert tokenize('WEISS', 'de') == ['weiss']
-    assert tokenize('weiß', 'de') == ['weiss']
-    assert tokenize('İstanbul', 'tr') == ['istanbul']
-    assert tokenize('SIKISINCA', 'tr') == ['sıkısınca']
+    assert tokenize("WEISS", "de") == ["weiss"]
+    assert tokenize("weiß", "de") == ["weiss"]
+    assert tokenize("İstanbul", "tr") == ["istanbul"]
+    assert tokenize("SIKISINCA", "tr") == ["sıkısınca"]


-def test_number_smashing():
-    assert tokenize('"715 - CRΣΣKS" by Bon Iver', 'en') == ['715', 'crσσks', 'by', 'bon', 'iver']
-    assert lossy_tokenize('"715 - CRΣΣKS" by Bon Iver', 'en') == ['000', 'crσσks', 'by', 'bon', 'iver']
-    assert (
-        lossy_tokenize('"715 - CRΣΣKS" by Bon Iver', 'en', include_punctuation=True)
-        == ['"', '000', '-', 'crσσks', '"', 'by', 'bon', 'iver']
-    )
-    assert lossy_tokenize('1', 'en') == ['1']
-    assert lossy_tokenize('3.14', 'en') == ['0.00']
-    assert lossy_tokenize('24601', 'en') == ['00000']
-    assert word_frequency('24601', 'en') == word_frequency('90210', 'en')
+def test_normalization():
+    assert tokenize('"715 - CRΣΣKS" by Bon Iver', "en") == [
+        "715",
+        "crσσks",
+        "by",
+        "bon",
+        "iver",
+    ]
+    assert lossy_tokenize('"715 - CRΣΣKS" by Bon Iver', "en") == [
+        "715",
+        "crσσks",
+        "by",
+        "bon",
+        "iver",
+    ]


 def test_uncurl_quotes():
-    assert lossy_tokenize("let’s", 'en') == ["let's"]
-    assert word_frequency("let’s", 'en') == word_frequency("let's", 'en')
+    assert lossy_tokenize("let’s", "en") == ["let's"]
+    assert word_frequency("let’s", "en") == word_frequency("let's", "en")


 def test_phrase_freq():
-    ff = word_frequency("flip-flop", 'en')
+    ff = word_frequency("flip-flop", "en")
    assert ff > 0
-    phrase_freq = 1.0 / word_frequency('flip', 'en') + 1.0 / word_frequency('flop', 'en')
+    phrase_freq = 1.0 / word_frequency("flip", "en") + 1.0 / word_frequency(
+        "flop", "en"
+    )
    assert 1.0 / ff == pytest.approx(phrase_freq, rel=0.01)


 def test_not_really_random():
    # If your xkcd-style password comes out like this, maybe you shouldn't
    # use it
-    assert random_words(nwords=4, lang='en', bits_per_word=0) == 'the the the the'
+    assert random_words(nwords=4, lang="en", bits_per_word=0) == "the the the the"

    # This not only tests random_ascii_words, it makes sure we didn't end
    # up with 'eos' as a very common Japanese word
-    assert random_ascii_words(nwords=4, lang='ja', bits_per_word=0) == '00 00 00 00'
+    assert random_ascii_words(nwords=4, lang="ja", bits_per_word=0) == "1 1 1 1"


 def test_not_enough_ascii():
    with pytest.raises(ValueError):
-        random_ascii_words(lang='zh', bits_per_word=16)
+        random_ascii_words(lang="zh", bits_per_word=16)


 def test_arabic():
    # Remove tatweels
-    assert tokenize('متــــــــعب', 'ar') == ['متعب']
+    assert tokenize("متــــــــعب", "ar") == ["متعب"]

    # Remove combining marks
-    assert tokenize('حَرَكَات', 'ar') == ['حركات']
+    assert tokenize("حَرَكَات", "ar") == ["حركات"]

    # An Arabic ligature that is affected by NFKC normalization
-    assert tokenize('\ufefb', 'ar') == ['\u0644\u0627']
+    assert tokenize("\ufefb", "ar") == ["\u0644\u0627"]


 def test_ideographic_fallback():
@ -228,28 +258,33 @@ def test_ideographic_fallback():
    #
    # More complex examples like this, involving the multiple scripts of Japanese,
    # are in test_japanese.py.
-    assert tokenize('中国文字', 'en') == ['中国文字']
+    assert tokenize("中国文字", "en") == ["中国文字"]


 def test_other_languages():
    # Test that we leave Thai letters stuck together. If we had better Thai support,
    # we would actually split this into a three-word phrase.
-    assert tokenize('การเล่นดนตรี', 'th') == ['การเล่นดนตรี']
-    assert tokenize('"การเล่นดนตรี" means "playing music"', 'en') == ['การเล่นดนตรี', 'means', 'playing', 'music']
+    assert tokenize("การเล่นดนตรี", "th") == ["การเล่นดนตรี"]
+    assert tokenize('"การเล่นดนตรี" means "playing music"', "en") == [
+        "การเล่นดนตรี",
+        "means",
+        "playing",
+        "music",
+    ]

    # Test Khmer, a script similar to Thai
-    assert tokenize('សូមស្វាគមន៍', 'km') == ['សូមស្វាគមន៍']
+    assert tokenize("សូមស្វាគមន៍", "km") == ["សូមស្វាគមន៍"]

    # Test Hindi -- tokens split where there are spaces, and not where there aren't
-    assert tokenize('हिन्दी विक्षनरी', 'hi') == ['हिन्दी', 'विक्षनरी']
+    assert tokenize("हिन्दी विक्षनरी", "hi") == ["हिन्दी", "विक्षनरी"]

    # Remove vowel points in Hebrew
-    assert tokenize('דֻּגְמָה', 'he') == ['דגמה']
+    assert tokenize("דֻּגְמָה", "he") == ["דגמה"]

    # Deal with commas, cedillas, and I's in Turkish
-    assert tokenize('kișinin', 'tr') == ['kişinin']
-    assert tokenize('KİȘİNİN', 'tr') == ['kişinin']
+    assert tokenize("kișinin", "tr") == ["kişinin"]
+    assert tokenize("KİȘİNİN", "tr") == ["kişinin"]

    # Deal with cedillas that should be commas-below in Romanian
-    assert tokenize('acelaşi', 'ro') == ['același']
-    assert tokenize('ACELAŞI', 'ro') == ['același']
+    assert tokenize("acelaşi", "ro") == ["același"]
+    assert tokenize("ACELAŞI", "ro") == ["același"]
--- a/tests/test_japanese.py
+++ b/tests/test_japanese.py
@ -3,7 +3,7 @@ import pytest


 def test_tokens():
-    assert tokenize('おはようございます', 'ja') == ['おはよう', 'ござい', 'ます']
+    assert tokenize("おはようございます", "ja") == ["おはよう", "ござい", "ます"]


 def test_simple_tokenize():
@ -17,13 +17,12 @@ def test_simple_tokenize():
    #
    # We used to try to infer word boundaries between hiragana and katakana,
    # but this leads to edge cases that are unsolvable without a dictionary.
-    ja_text = 'ひらがなカタカナromaji'
-    assert simple_tokenize(ja_text) == ['ひらがなカタカナ', 'romaji']
-    
+    ja_text = "ひらがなカタカナromaji"
+    assert simple_tokenize(ja_text) == ["ひらがなカタカナ", "romaji"]

    # An example that would be multiple tokens if tokenized as 'ja' via MeCab,
    # but sticks together in simple_tokenize
-    assert simple_tokenize('おはようございます') == ['おはようございます']
+    assert simple_tokenize("おはようございます") == ["おはようございます"]

    # Names that use the weird possessive marker ヶ, which is technically a
    # katakana even though it's being used like a kanji, stay together as one
@ -43,17 +42,13 @@ def test_simple_tokenize():
    assert simple_tokenize("見ヶ〆料") == ["見ヶ〆料"]


-
 def test_combination():
-    ohayou_freq = word_frequency('おはよう', 'ja')
-    gozai_freq = word_frequency('ござい', 'ja')
-    masu_freq = word_frequency('ます', 'ja')
+    ohayou_freq = word_frequency("おはよう", "ja")
+    gozai_freq = word_frequency("ござい", "ja")
+    masu_freq = word_frequency("ます", "ja")

-    assert word_frequency('おはようおはよう', 'ja') == pytest.approx(ohayou_freq / 2, rel=0.01)
-    
-    assert (
-        1.0 / word_frequency('おはようございます', 'ja') ==
-        pytest.approx(1.0 / ohayou_freq + 1.0 / gozai_freq + 1.0 / masu_freq, rel=0.01)
+    assert word_frequency("おはようおはよう", "ja") == pytest.approx(ohayou_freq / 2, rel=0.01)
+
+    assert 1.0 / word_frequency("おはようございます", "ja") == pytest.approx(
+        1.0 / ohayou_freq + 1.0 / gozai_freq + 1.0 / masu_freq, rel=0.01
    )
-    
-
--- a/tests/test_korean.py
+++ b/tests/test_korean.py
@ -3,16 +3,14 @@ import pytest


 def test_tokens():
-    assert tokenize('감사합니다', 'ko') == ['감사', '합니다']
+    assert tokenize("감사합니다", "ko") == ["감사", "합니다"]


 def test_combination():
-    gamsa_freq = word_frequency('감사', 'ko')
-    habnida_freq = word_frequency('합니다', 'ko')
+    gamsa_freq = word_frequency("감사", "ko")
+    habnida_freq = word_frequency("합니다", "ko")

-    assert word_frequency('감사감사', 'ko') == pytest.approx(gamsa_freq / 2, rel=0.01)
-    assert (
-        1.0 / word_frequency('감사합니다', 'ko') ==
-        pytest.approx(1.0 / gamsa_freq + 1.0 / habnida_freq, rel=0.01)
+    assert word_frequency("감사감사", "ko") == pytest.approx(gamsa_freq / 2, rel=0.01)
+    assert 1.0 / word_frequency("감사합니다", "ko") == pytest.approx(
+        1.0 / gamsa_freq + 1.0 / habnida_freq, rel=0.01
    )
-
--- a/tests/test_numbers.py
+++ b/tests/test_numbers.py
@ -0,0 +1,58 @@
+from wordfreq import word_frequency
+from wordfreq.numbers import digit_freq, smash_numbers
+from pytest import approx
+
+
+def test_number_smashing():
+    assert smash_numbers("1") == "1"
+    assert smash_numbers("3.14") == "0.00"
+    assert smash_numbers("24601") == "00000"
+
+
+def test_decimals():
+    assert word_frequency("3.14", "el") > word_frequency("4.14", "el")
+    assert word_frequency("3.14", "el") == word_frequency("3.15", "el")
+    assert word_frequency("3,14", "de") > word_frequency("4,14", "de")
+    assert word_frequency("3,14", "de") == word_frequency("3,15", "de")
+
+
+def test_year_distribution():
+    assert word_frequency("2010", "en") > word_frequency("1010", "en")
+    assert word_frequency("2010", "en") > word_frequency("3010", "en")
+
+
+def test_boundaries():
+    assert word_frequency("9", "en") > word_frequency("10", "en")
+    assert word_frequency("99", "en") > word_frequency("100", "en")
+    assert word_frequency("999", "en") > word_frequency("1000", "en")
+    assert word_frequency("9999", "en") > word_frequency("10000", "en")
+
+
+def test_multiple_words():
+    once = word_frequency("2015b", "en")
+    twice = word_frequency("2015b 2015b", "en")
+    assert once == approx(2 * twice)
+
+
+def test_distribution():
+    assert word_frequency("24601", "en") > word_frequency("90210", "en")
+    assert word_frequency("7", "en") > word_frequency("007", "en")
+    assert word_frequency("404", "en") == word_frequency("418", "en")
+
+
+def test_3digit_sum():
+    """
+    Test that the probability distribution given you have a 4-digit sequence
+    adds up to approximately 1.
+    """
+    three_digit_sum = sum(digit_freq(f"{num:03d}") for num in range(0, 1000))
+    assert three_digit_sum == approx(1.0)
+
+
+def test_4digit_sum():
+    """
+    Test that the probability distribution given you have a 4-digit sequence
+    adds up to approximately 1.
+    """
+    four_digit_sum = sum(digit_freq(f"{num:04d}") for num in range(0, 10000))
+    assert 0.999 < four_digit_sum < 1.0
--- a/tests/test_transliteration.py
+++ b/tests/test_transliteration.py
@ -5,14 +5,26 @@ from wordfreq.preprocess import preprocess_text
 def test_transliteration():
    # "Well, there's a lot of things you do not understand."
    # (from somewhere in OpenSubtitles
-    assert (
-        tokenize("Па, има ту много ствари које не схваташ.", 'sr') ==
-        ['pa', 'ima', 'tu', 'mnogo', 'stvari', 'koje', 'ne', 'shvataš']
-    )
-    assert (
-        tokenize("Pa, ima tu mnogo stvari koje ne shvataš.", 'sr') ==
-        ['pa', 'ima', 'tu', 'mnogo', 'stvari', 'koje', 'ne', 'shvataš']
-    )
+    assert tokenize("Па, има ту много ствари које не схваташ.", "sr") == [
+        "pa",
+        "ima",
+        "tu",
+        "mnogo",
+        "stvari",
+        "koje",
+        "ne",
+        "shvataš",
+    ]
+    assert tokenize("Pa, ima tu mnogo stvari koje ne shvataš.", "sr") == [
+        "pa",
+        "ima",
+        "tu",
+        "mnogo",
+        "stvari",
+        "koje",
+        "ne",
+        "shvataš",
+    ]

    # I don't have examples of complete sentences in Azerbaijani that are
    # naturally in Cyrillic, because it turns out everyone writes Azerbaijani
@ -20,14 +32,14 @@ def test_transliteration():
    # So here are some individual words.

    # 'library' in Azerbaijani Cyrillic
-    assert preprocess_text('китабхана', 'az') == 'kitabxana'
-    assert preprocess_text('КИТАБХАНА', 'az') == 'kitabxana'
-    assert preprocess_text('KİTABXANA', 'az') == 'kitabxana'
+    assert preprocess_text("китабхана", "az") == "kitabxana"
+    assert preprocess_text("КИТАБХАНА", "az") == "kitabxana"
+    assert preprocess_text("KİTABXANA", "az") == "kitabxana"

    # 'scream' in Azerbaijani Cyrillic
-    assert preprocess_text('бағырты', 'az') == 'bağırtı'
-    assert preprocess_text('БАҒЫРТЫ', 'az') == 'bağırtı'
-    assert preprocess_text('BAĞIRTI', 'az') == 'bağırtı'
+    assert preprocess_text("бағырты", "az") == "bağırtı"
+    assert preprocess_text("БАҒЫРТЫ", "az") == "bağırtı"
+    assert preprocess_text("BAĞIRTI", "az") == "bağırtı"


 def test_actually_russian():
@ -38,13 +50,12 @@ def test_actually_russian():
    # We make sure to handle this case so we don't end up with a mixed-script
    # word like "pacanы".

-    assert tokenize("сто из ста, пацаны!", 'sr') == ['sto', 'iz', 'sta', 'pacany']
-    assert tokenize("культуры", 'sr') == ["kul'tury"]
+    assert tokenize("сто из ста, пацаны!", "sr") == ["sto", "iz", "sta", "pacany"]
+    assert tokenize("культуры", "sr") == ["kul'tury"]


 def test_alternate_codes():
    # Try language codes for Serbo-Croatian that have been split, and now
    # are canonically mapped to Serbian
-    assert tokenize("культуры", 'sh') == ["kul'tury"]
-    assert tokenize("культуры", 'hbs') == ["kul'tury"]
-
+    assert tokenize("культуры", "sh") == ["kul'tury"]
+    assert tokenize("культуры", "hbs") == ["kul'tury"]
--- a/wordfreq/init.py
+++ b/wordfreq/init.py
@ -13,7 +13,7 @@ import warnings

 from .tokens import tokenize, simple_tokenize, lossy_tokenize
 from .language_info import get_language_info
-from .numbers import digit_freq
+from .numbers import digit_freq, has_digit_sequence, smash_numbers

 logger = logging.getLogger(__name__)

@ -234,7 +234,7 @@ _wf_cache: Dict[Tuple[str, str, str, float], float] = {}

 def _word_frequency(word: str, lang: str, wordlist: str, minimum: float) -> float:
    tokens = lossy_tokenize(word, lang)
-    dfreq = digit_freq(word)
+
    if not tokens:
        return minimum

@ -245,13 +245,20 @@ def _word_frequency(word: str, lang: str, wordlist: str, minimum: float) -> floa
    freqs = get_frequency_dict(lang, wordlist)
    one_over_result = 0.0
    for token in tokens:
-        if token not in freqs:
+        smashed = smash_numbers(token)
+        if smashed not in freqs:
            # If any word is missing, just return the default value
            return minimum
-        # spread the frequency of digits over all digit combinations
-        freq = freqs[token]
+        freq = freqs[smashed]
+        if smashed != token:
+            # If there is a digit sequence in the token, the digits are
+            # internally replaced by 0s to aggregate their probabilities
+            # together. We then assign a specific frequency to the digit
+            # sequence using the `digit_freq` distribution.
+            freq *= digit_freq(token)
        one_over_result += 1.0 / freq

+    # Combine the frequencies of tokens we looked up.
    freq = 1.0 / one_over_result

    if get_language_info(lang)["tokenizer"] == "jieba":
@ -334,13 +341,18 @@ def top_n_list(
    Return a frequency list of length `n` in descending order of frequency.
    This list contains words from `wordlist`, of the given language.
    If `ascii_only`, then only ascii words are considered.
+
+    The frequency list will not contain multi-digit sequences, because we
+    estimate the frequencies of those using the functions in `numbers.py`,
+    not using a wordlist that contains all of them.
    """
    results = []
    for word in iter_wordlist(lang, wordlist):
        if (not ascii_only) or max(word) <= "~":
-            results.append(word)
-            if len(results) >= n:
-                break
+            if not has_digit_sequence(word):
+                results.append(word)
+                if len(results) >= n:
+                    break
    return results


--- a/wordfreq/numbers.py
+++ b/wordfreq/numbers.py
@ -1,4 +1,4 @@
-from .preprocess import MULTI_DIGIT_RE
+import regex

 # Frequencies of leading digits, according to Benford's law, sort of.
 # Benford's law doesn't describe numbers with leading zeroes, because "007"
@ -11,23 +11,37 @@ DIGIT_FREQS = [0.009, 0.300, 0.175, 0.124, 0.096, 0.078, 0.066, 0.057, 0.050, 0.
 #
 # We do this with a piecewise exponential function whose peak is a plateau covering
 # the years 2019 to 2039.
-#
-# YEAR_LOG_PEAK is chosen by experimentation to make this probability add up to about
-# .994. Here, that represents P(token represents a year) | P(token is 4 digits).
-# The other .006 represents P(token does not represent a year) | P(token is 4 digits).

-YEAR_LOG_PEAK = -1.875
-NOT_YEAR_PROB = 0.006
+# Determined by experimentation: makes the probabilities of all years add up to 90%.
+# The other 10% goes to NOT_YEAR_PROB. tests/test_numbers.py confirms that this
+# probability distribution adds up to 1.
+YEAR_LOG_PEAK = -1.9185
+NOT_YEAR_PROB = 0.1
 REFERENCE_YEAR = 2019
 PLATEAU_WIDTH = 20

+DIGIT_RE = regex.compile(r"\d")
+MULTI_DIGIT_RE = regex.compile(r"\d[\d.,]+")
+PURE_DIGIT_RE = regex.compile(r"\d+")

 def benford_freq(text: str) -> float:
+    """
+    Estimate the frequency of a digit sequence according to Benford's law.
+    """
    first_digit = int(text[0])
    return DIGIT_FREQS[first_digit] / 10 ** (len(text) - 1)


 def year_freq(text: str) -> float:
+    """
+    Estimate the relative frequency of a particular 4-digit sequence representing
+    a year.
+
+    For example, suppose text == "1985". We're estimating the probability that a
+    randomly-selected token from a large corpus will be "1985" and refer to the
+    year, _given_ that it is 4 digits. Tokens that are not 4 digits are not involved
+    in the probability distribution.
+    """
    year = int(text)

    # Fitting a line to the curve seen at
@ -60,13 +74,38 @@ def year_freq(text: str) -> float:


 def digit_freq(text: str) -> float:
+    """
+    Get the relative frequency of a string of digits, using our estimates.
+    """
    freq = 1.0
    for match in MULTI_DIGIT_RE.findall(text):
-        if len(match) == 4:
-            freq *= year_freq(match)
-        else:
-            freq *= benford_freq(match)
+        for submatch in PURE_DIGIT_RE.findall(match):
+            if len(submatch) == 4:
+                freq *= year_freq(submatch)
+            else:
+                freq *= benford_freq(submatch)
    return freq


-print(sum(digit_freq("%04d" % year) for year in range(0, 10000)))
+def has_digit_sequence(text: str) -> bool:
+    """
+    Returns True iff the text has a digit sequence that will be normalized out
+    and handled with `digit_freq`.
+    """
+    return bool(MULTI_DIGIT_RE.match(text))
+
+
+def _sub_zeroes(match: regex.Match) -> str:
+    """
+    Given a regex match, return what it matched with digits replaced by
+    zeroes.
+    """
+    return DIGIT_RE.sub("0", match.group(0))
+
+
+def smash_numbers(text: str) -> str:
+    """
+    Replace sequences of multiple digits with zeroes, so we don't need to
+    distinguish the frequencies of thousands of numbers.
+    """
+    return MULTI_DIGIT_RE.sub(_sub_zeroes, text)
--- a/wordfreq/preprocess.py
+++ b/wordfreq/preprocess.py
@ -7,10 +7,6 @@ from langcodes import Language

 MARK_RE = regex.compile(r"[\p{Mn}\N{ARABIC TATWEEL}]", regex.V1)

-DIGIT_RE = regex.compile(r"\d")
-MULTI_DIGIT_RE = regex.compile(r"\d[\d.,]+")
-
-
 def preprocess_text(text: str, language: Language) -> str:
    """
    This function applies pre-processing steps that convert forms of words
@ -251,19 +247,3 @@ def cedillas_to_commas(text: str) -> str:
        "\N{LATIN SMALL LETTER T WITH CEDILLA}",
        "\N{LATIN SMALL LETTER T WITH COMMA BELOW}",
    )
-
-
-def _sub_zeroes(match: regex.Match) -> str:
-    """
-    Given a regex match, return what it matched with digits replaced by
-    zeroes.
-    """
-    return DIGIT_RE.sub("0", match.group(0))
-
-
-def smash_numbers(text: str) -> str:
-    """
-    Replace sequences of multiple digits with zeroes, so we don't need to
-    distinguish the frequencies of thousands of numbers.
-    """
-    return MULTI_DIGIT_RE.sub(_sub_zeroes, text)
--- a/wordfreq/tokens.py
+++ b/wordfreq/tokens.py
@ -10,7 +10,7 @@ from .language_info import (
    SPACELESS_SCRIPTS,
    EXTRA_JAPANESE_CHARACTERS,
 )
-from .preprocess import preprocess_text, smash_numbers
+from .preprocess import preprocess_text

 # Placeholders for CJK functions that we'll import on demand
 _mecab_tokenize = None
@ -309,13 +309,6 @@ def lossy_tokenize(

    In particular:

-    - Any sequence of 2 or more adjacent digits, possibly with intervening
-      punctuation such as a decimal point, will replace each digit with '0'
-      so that frequencies for numbers don't have to be counted separately.
-
-      This is similar to but not quite identical to the word2vec Google News
-      data, which replaces digits with '#' in tokens with more than one digit.
-
    - In Chinese, unless Traditional Chinese is specifically requested using
      'zh-Hant', all characters will be converted to Simplified Chinese.

@ -334,4 +327,4 @@ def lossy_tokenize(

        tokens = [_simplify_chinese(token) for token in tokens]

-    return [uncurl_quotes(smash_numbers(token)) for token in tokens]
+    return [uncurl_quotes(token) for token in tokens]