estimate the freq distribution of numbers

2024-12-23 09:21:37 +00:00 · 2022-03-10 18:33:42 -05:00 · 2022-03-10 18:33:42 -05:00 · bf05b1b1dc
commit bf05b1b1dc
parent 4e373750e8
14 changed files with 552 additions and 405 deletions
--- a/poetry.lock
+++ b/poetry.lock
@ -61,7 +61,7 @@ uvloop = ["uvloop (>=0.15.2)"]
 [[package]]
 name = "click"
-version = "8.0.3"
+version = "8.0.4"
 description = "Composable command line interface toolkit"
 category = "dev"
 optional = false
@ -103,17 +103,14 @@ pyflakes = ">=2.4.0,<2.5.0"
 [[package]]
 name = "ftfy"
-version = "6.0.3"
+version = "6.1.1"
-description = "Fixes some problems with Unicode text after the fact"
+description = "Fixes mojibake and other problems with Unicode, after the fact"
 category = "main"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7,<4"
 [package.dependencies]
-wcwidth = "*"
+wcwidth = ">=0.2.5"
 [package.extras]
 docs = ["furo", "sphinx"]
 [[package]]
 name = "importlib-metadata"
@ -149,7 +146,7 @@ python-versions = "*"
 [[package]]
 name = "ipython"
-version = "7.31.1"
+version = "7.32.0"
 description = "IPython: Productive Interactive Computing"
 category = "dev"
 optional = false
@ -242,7 +239,7 @@ python-versions = "*"
 [[package]]
 name = "mecab-python3"
-version = "1.0.4"
+version = "1.0.5"
 description = "Python wrapper for the MeCab morphological analyzer for Japanese"
 category = "dev"
 optional = false
@ -338,7 +335,7 @@ python-versions = "*"
 [[package]]
 name = "platformdirs"
-version = "2.5.0"
+version = "2.5.1"
 description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
 category = "dev"
 optional = false
@ -365,7 +362,7 @@ testing = ["pytest", "pytest-benchmark"]
 [[package]]
 name = "prompt-toolkit"
-version = "3.0.27"
+version = "3.0.28"
 description = "Library for building powerful interactive command lines in Python"
 category = "dev"
 optional = false
@ -449,11 +446,11 @@ testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xm
 [[package]]
 name = "regex"
-version = "2022.1.18"
+version = "2022.3.2"
 description = "Alternative regular expression module, to replace re."
 category = "main"
 optional = false
-python-versions = "*"
+python-versions = ">=3.6"
 [[package]]
 name = "toml"
@ -492,7 +489,7 @@ python-versions = ">=3.6"
 [[package]]
 name = "types-setuptools"
-version = "57.4.9"
+version = "57.4.10"
 description = "Typing stubs for setuptools"
 category = "dev"
 optional = false
@ -500,7 +497,7 @@ python-versions = "*"
 [[package]]
 name = "typing-extensions"
-version = "4.0.1"
+version = "4.1.1"
 description = "Backported and Experimental Type Hints for Python 3.6+"
 category = "main"
 optional = false
@ -529,7 +526,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7"
-content-hash = "a3b1a9c3b80e338764f1907a77e31f59d6e1e231092b7813182e09e55d7c2f45"
+content-hash = "8507a13e0c8c79c30e911cc5f32bdc35284304246ae50531917df6197d7dcab8"
 [metadata.files]
 appnope = [
@ -574,8 +571,8 @@ black = [
    {file = "black-22.1.0.tar.gz", hash = "sha256:a7c0192d35635f6fc1174be575cb7915e92e5dd629ee79fdaf0dcfa41a80afb5"},
 ]
 click = [
-    {file = "click-8.0.3-py3-none-any.whl", hash = "sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3"},
+    {file = "click-8.0.4-py3-none-any.whl", hash = "sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1"},
-    {file = "click-8.0.3.tar.gz", hash = "sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b"},
+    {file = "click-8.0.4.tar.gz", hash = "sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb"},
 ]
 colorama = [
    {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
@ -590,7 +587,8 @@ flake8 = [
    {file = "flake8-4.0.1.tar.gz", hash = "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"},
 ]
 ftfy = [
-    {file = "ftfy-6.0.3.tar.gz", hash = "sha256:ba71121a9c8d7790d3e833c6c1021143f3e5c4118293ec3afb5d43ed9ca8e72b"},
+    {file = "ftfy-6.1.1-py3-none-any.whl", hash = "sha256:0ffd33fce16b54cccaec78d6ec73d95ad370e5df5a25255c8966a6147bd667ca"},
    {file = "ftfy-6.1.1.tar.gz", hash = "sha256:bfc2019f84fcd851419152320a6375604a0f1459c281b5b199b2cd0d2e727f8f"},
 ]
 importlib-metadata = [
    {file = "importlib_metadata-4.2.0-py3-none-any.whl", hash = "sha256:057e92c15bc8d9e8109738a48db0ccb31b4d9d5cfbee5a8670879a30be66304b"},
@ -604,8 +602,8 @@ ipadic = [
    {file = "ipadic-1.0.0.tar.gz", hash = "sha256:f5923d31eca6131acaaf18ed28d8998665b1347b640d3a6476f64650e9a71c07"},
 ]
 ipython = [
-    {file = "ipython-7.31.1-py3-none-any.whl", hash = "sha256:55df3e0bd0f94e715abd968bedd89d4e8a7bce4bf498fb123fed4f5398fea874"},
+    {file = "ipython-7.32.0-py3-none-any.whl", hash = "sha256:86df2cf291c6c70b5be6a7b608650420e89180c8ec74f376a34e2dc15c3400e7"},
-    {file = "ipython-7.31.1.tar.gz", hash = "sha256:b5548ec5329a4bcf054a5deed5099b0f9622eb9ea51aaa7104d215fece201d8c"},
+    {file = "ipython-7.32.0.tar.gz", hash = "sha256:468abefc45c15419e3c8e8c0a6a5c115b2127bafa34d7c641b1d443658793909"},
 ]
 jedi = [
    {file = "jedi-0.18.1-py2.py3-none-any.whl", hash = "sha256:637c9635fcf47945ceb91cd7f320234a7be540ded6f3e99a50cb6febdfd1ba8d"},
@ -630,23 +628,27 @@ mecab-ko-dic = [
    {file = "mecab-ko-dic-1.0.0.tar.gz", hash = "sha256:3ba22858736e02e8a0e92f2a7f099528c733ae47701b29d12c75e982a85d1f11"},
 ]
 mecab-python3 = [
-    {file = "mecab-python3-1.0.4.tar.gz", hash = "sha256:b150ad5fe4260539b4ef184657e552ef81307fbbe60ae1f258bc814549ea90f8"},
+    {file = "mecab-python3-1.0.5.tar.gz", hash = "sha256:e703d78c88a671abb8170351644850015d9bbfab31530a3b40d12481a6779a11"},
-    {file = "mecab_python3-1.0.4-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:3c7e87c65160e5e4edb08cb80dbce50f4e711c53f45063321aab72ab2566ffe4"},
+    {file = "mecab_python3-1.0.5-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:8a64bd228704ed9b24da5cbd6c4e325ef22310227153ef481f9037183351aa10"},
-    {file = "mecab_python3-1.0.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2fbed960ef82f4192b31efd88af1f3c24cd1692b62720ed70d7e314a50f581e"},
+    {file = "mecab_python3-1.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf083884116fa05ca0394c4c8d62013a4954fbac414c33a1931906ddf0f3585a"},
-    {file = "mecab_python3-1.0.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cb6eb6cc47e3937a2edfaa9595dc2d165ed9f025e3a53bd0a5033a12fa6bcdcf"},
+    {file = "mecab_python3-1.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1fe020df27b249f43df3d38b84473d226e36d6d4a31f951cedbddabfcc450e36"},
-    {file = "mecab_python3-1.0.4-cp36-cp36m-win_amd64.whl", hash = "sha256:b149b51f0f62c9512d219c9e79c6db2eb66e70863a97eb412d8fc3ba7a25f351"},
+    {file = "mecab_python3-1.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:644f781de083311fcf81f7d55f21a756ceef7ebae7c111bd50a2c9d0855c1927"},
-    {file = "mecab_python3-1.0.4-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:c1606b35df0136b3e9dc7add2e69d2c1151e69fd5675c0cde62d0b017b2319e7"},
+    {file = "mecab_python3-1.0.5-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:4309a91f0d5b66d3f0e8c9ba5a4d3cf7dbac1334269338704599820e051d1d7f"},
-    {file = "mecab_python3-1.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53b0b899ef03f364bfd7fa28f260ee1e893e4f47ff90a141a522709b892f0a4e"},
+    {file = "mecab_python3-1.0.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7be2d1cd2ecd1f04b91eb0e26c906f21b50b8526e977f7f01f3901f9a6306944"},
-    {file = "mecab_python3-1.0.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:52a789c708f8b89044236201eb03c7fe5517fad5210a9de2230c7d99a2a8c760"},
+    {file = "mecab_python3-1.0.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:644bbde31ab1244ff18fb1dcac1e5fee8121f8b27a5c3e041c01ebc301df9266"},
-    {file = "mecab_python3-1.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:d6ca73c0dec72038290faa6de17d57d771535eb47c22346e170dffcb82d696bb"},
+    {file = "mecab_python3-1.0.5-cp36-cp36m-win_amd64.whl", hash = "sha256:401a2d1608b6503cb755d7d864ad74b64a7a4346309235f84577de807bb29050"},
-    {file = "mecab_python3-1.0.4-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:18e14dfe3d8c66cfa1c9f49e3bc8ac480b79a433ec9e5b5d2c1fb73f36ec7c3e"},
+    {file = "mecab_python3-1.0.5-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:5f91d5d8a9ac0ea7351e5e2423df98dd463b02013e006b18096cd365de37b2a9"},
-    {file = "mecab_python3-1.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:221256b84be0ee29dc8fa450210236b40707b9d63cfc70de5102d2531622d062"},
+    {file = "mecab_python3-1.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc8ce0151b973f4ca15e651619264442011568ebe48c6fce51d55e64f7e5c2e1"},
-    {file = "mecab_python3-1.0.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:de39b82f44d97fc0fd636644ad14c9662f51afcd73775379d5a8b1eb20ee85a6"},
+    {file = "mecab_python3-1.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e381df4c55f3ec5bccbb5625c65c54ecf982c215574d1102aff2803ac1a24cd"},
-    {file = "mecab_python3-1.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:96d9e8c098401fb4b5bd32258f4952f3b22cdb30ab291f5ff82eae1d0941cbed"},
+    {file = "mecab_python3-1.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:8eaaa78227f470c4cf1d6c2a87b92889041f317517fbe65e635b86ea0c84a194"},
-    {file = "mecab_python3-1.0.4-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:dcd62ebf2eecde1263119b92ff5379a046bb8231cb999fafda00f0925dfcb67e"},
+    {file = "mecab_python3-1.0.5-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:dd8601565dd1331ee5cd67bcc45f713cebc14b730ee2e956ed120a0ec6e4fd8a"},
-    {file = "mecab_python3-1.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178b632b717e3249054a7ad4c0fbc60ce8493d357afa7673d535ffa11e45eaba"},
+    {file = "mecab_python3-1.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76a40f717f9592bd12edc7bcf1fa869f4c8058e5d0b80d4cc6c301435afb1f96"},
-    {file = "mecab_python3-1.0.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:fbfad60261ad3b9390b8615528fc013302a3e8febba220f799216c1a1154ee7e"},
+    {file = "mecab_python3-1.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f299d6ef96495371f5a622a7004a205e303dabba1fc3a7f9a07e741e315ed2b"},
-    {file = "mecab_python3-1.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:445b4f5ee5674d85f6de2726ec28991801844ff71eb096129da5f5ba077d5a87"},
+    {file = "mecab_python3-1.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:4cdb07edbbd508d9b98ac9529e0ff0b89d93e50a6beeb7b8b946439594bf5e01"},
    {file = "mecab_python3-1.0.5-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:eb412a25e485e33d7ab69262b58f7365b727f8c447e4c9c1c56b5fd91414ecd2"},
    {file = "mecab_python3-1.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91e8ac11ef4440418312dd4f1f200f7957fdc0148bb49dc049264c5d07bed527"},
    {file = "mecab_python3-1.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae1c126cf4982035794042280998066c8b6d26eb89136731078d9105a7070c13"},
    {file = "mecab_python3-1.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:34a196c6a410e57f975ee077d075ac994b94bb6930b04e207e59e7c7521ecb58"},
 ]
 msgpack = [
    {file = "msgpack-1.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:96acc674bb9c9be63fa8b6dabc3248fdc575c4adc005c440ad02f87ca7edd079"},
@ -731,16 +733,16 @@ pickleshare = [
    {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"},
 ]
 platformdirs = [
-    {file = "platformdirs-2.5.0-py3-none-any.whl", hash = "sha256:30671902352e97b1eafd74ade8e4a694782bd3471685e78c32d0fdfd3aa7e7bb"},
+    {file = "platformdirs-2.5.1-py3-none-any.whl", hash = "sha256:bcae7cab893c2d310a711b70b24efb93334febe65f8de776ee320b517471e227"},
-    {file = "platformdirs-2.5.0.tar.gz", hash = "sha256:8ec11dfba28ecc0715eb5fb0147a87b1bf325f349f3da9aab2cd6b50b96b692b"},
+    {file = "platformdirs-2.5.1.tar.gz", hash = "sha256:7535e70dfa32e84d4b34996ea99c5e432fa29a708d0f4e394bbcb2a8faa4f16d"},
 ]
 pluggy = [
    {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
    {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
 ]
 prompt-toolkit = [
-    {file = "prompt_toolkit-3.0.27-py3-none-any.whl", hash = "sha256:cb7dae7d2c59188c85a1d6c944fad19aded6a26bd9c8ae115a4e1c20eb90b713"},
+    {file = "prompt_toolkit-3.0.28-py3-none-any.whl", hash = "sha256:30129d870dcb0b3b6a53efdc9d0a83ea96162ffd28ffe077e94215b233dc670c"},
-    {file = "prompt_toolkit-3.0.27.tar.gz", hash = "sha256:f2b6a8067a4fb959d3677d1ed764cc4e63e0f6f565b9a4fc7edc2b18bf80217b"},
+    {file = "prompt_toolkit-3.0.28.tar.gz", hash = "sha256:9f1cd16b1e86c2968f2519d7fb31dd9d669916f515612c269d14e9ed52b51650"},
 ]
 ptyprocess = [
    {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
@ -771,80 +773,80 @@ pytest = [
    {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"},
 ]
 regex = [
-    {file = "regex-2022.1.18-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:34316bf693b1d2d29c087ee7e4bb10cdfa39da5f9c50fa15b07489b4ab93a1b5"},
+    {file = "regex-2022.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ab69b4fe09e296261377d209068d52402fb85ef89dc78a9ac4a29a895f4e24a7"},
-    {file = "regex-2022.1.18-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a0b9f6a1a15d494b35f25ed07abda03209fa76c33564c09c9e81d34f4b919d7"},
+    {file = "regex-2022.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5bc5f921be39ccb65fdda741e04b2555917a4bced24b4df14eddc7569be3b493"},
-    {file = "regex-2022.1.18-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f99112aed4fb7cee00c7f77e8b964a9b10f69488cdff626ffd797d02e2e4484f"},
+    {file = "regex-2022.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:43eba5c46208deedec833663201752e865feddc840433285fbadee07b84b464d"},
-    {file = "regex-2022.1.18-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a2bf98ac92f58777c0fafc772bf0493e67fcf677302e0c0a630ee517a43b949"},
+    {file = "regex-2022.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c68d2c04f7701a418ec2e5631b7f3552efc32f6bcc1739369c6eeb1af55f62e0"},
-    {file = "regex-2022.1.18-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8618d9213a863c468a865e9d2ec50221015f7abf52221bc927152ef26c484b4c"},
+    {file = "regex-2022.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:caa2734ada16a44ae57b229d45091f06e30a9a52ace76d7574546ab23008c635"},
-    {file = "regex-2022.1.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b52cc45e71657bc4743a5606d9023459de929b2a198d545868e11898ba1c3f59"},
+    {file = "regex-2022.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef806f684f17dbd6263d72a54ad4073af42b42effa3eb42b877e750c24c76f86"},
-    {file = "regex-2022.1.18-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e12949e5071c20ec49ef00c75121ed2b076972132fc1913ddf5f76cae8d10b4"},
+    {file = "regex-2022.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:be319f4eb400ee567b722e9ea63d5b2bb31464e3cf1b016502e3ee2de4f86f5c"},
-    {file = "regex-2022.1.18-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b02e3e72665cd02afafb933453b0c9f6c59ff6e3708bd28d0d8580450e7e88af"},
+    {file = "regex-2022.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:42bb37e2b2d25d958c25903f6125a41aaaa1ed49ca62c103331f24b8a459142f"},
-    {file = "regex-2022.1.18-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:abfcb0ef78df0ee9df4ea81f03beea41849340ce33a4c4bd4dbb99e23ec781b6"},
+    {file = "regex-2022.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:fbc88d3ba402b5d041d204ec2449c4078898f89c4a6e6f0ed1c1a510ef1e221d"},
-    {file = "regex-2022.1.18-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6213713ac743b190ecbf3f316d6e41d099e774812d470422b3a0f137ea635832"},
+    {file = "regex-2022.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:91e0f7e7be77250b808a5f46d90bf0032527d3c032b2131b63dee54753a4d729"},
-    {file = "regex-2022.1.18-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:61ebbcd208d78658b09e19c78920f1ad38936a0aa0f9c459c46c197d11c580a0"},
+    {file = "regex-2022.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:cb3652bbe6720786b9137862205986f3ae54a09dec8499a995ed58292bdf77c2"},
-    {file = "regex-2022.1.18-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:b013f759cd69cb0a62de954d6d2096d648bc210034b79b1881406b07ed0a83f9"},
+    {file = "regex-2022.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:878c626cbca3b649e14e972c14539a01191d79e58934e3f3ef4a9e17f90277f8"},
-    {file = "regex-2022.1.18-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9187500d83fd0cef4669385cbb0961e227a41c0c9bc39219044e35810793edf7"},
+    {file = "regex-2022.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6df070a986fc064d865c381aecf0aaff914178fdf6874da2f2387e82d93cc5bd"},
-    {file = "regex-2022.1.18-cp310-cp310-win32.whl", hash = "sha256:94c623c331a48a5ccc7d25271399aff29729fa202c737ae3b4b28b89d2b0976d"},
+    {file = "regex-2022.3.2-cp310-cp310-win32.whl", hash = "sha256:b549d851f91a4efb3e65498bd4249b1447ab6035a9972f7fc215eb1f59328834"},
-    {file = "regex-2022.1.18-cp310-cp310-win_amd64.whl", hash = "sha256:1a171eaac36a08964d023eeff740b18a415f79aeb212169080c170ec42dd5184"},
+    {file = "regex-2022.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:8babb2b5751105dc0aef2a2e539f4ba391e738c62038d8cb331c710f6b0f3da7"},
-    {file = "regex-2022.1.18-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:49810f907dfe6de8da5da7d2b238d343e6add62f01a15d03e2195afc180059ed"},
+    {file = "regex-2022.3.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:1977bb64264815d3ef016625adc9df90e6d0e27e76260280c63eca993e3f455f"},
-    {file = "regex-2022.1.18-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d2f5c3f7057530afd7b739ed42eb04f1011203bc5e4663e1e1d01bb50f813e3"},
+    {file = "regex-2022.3.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e73652057473ad3e6934944af090852a02590c349357b79182c1b681da2c772"},
-    {file = "regex-2022.1.18-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:85ffd6b1cb0dfb037ede50ff3bef80d9bf7fa60515d192403af6745524524f3b"},
+    {file = "regex-2022.3.2-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b22ff939a8856a44f4822da38ef4868bd3a9ade22bb6d9062b36957c850e404f"},
-    {file = "regex-2022.1.18-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba37f11e1d020969e8a779c06b4af866ffb6b854d7229db63c5fdddfceaa917f"},
+    {file = "regex-2022.3.2-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:878f5d649ba1db9f52cc4ef491f7dba2d061cdc48dd444c54260eebc0b1729b9"},
-    {file = "regex-2022.1.18-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637e27ea1ebe4a561db75a880ac659ff439dec7f55588212e71700bb1ddd5af9"},
+    {file = "regex-2022.3.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0008650041531d0eadecc96a73d37c2dc4821cf51b0766e374cb4f1ddc4e1c14"},
-    {file = "regex-2022.1.18-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:37978254d9d00cda01acc1997513f786b6b971e57b778fbe7c20e30ae81a97f3"},
+    {file = "regex-2022.3.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:06b1df01cf2aef3a9790858af524ae2588762c8a90e784ba00d003f045306204"},
-    {file = "regex-2022.1.18-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e54a1eb9fd38f2779e973d2f8958fd575b532fe26013405d1afb9ee2374e7ab8"},
+    {file = "regex-2022.3.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:57484d39447f94967e83e56db1b1108c68918c44ab519b8ecfc34b790ca52bf7"},
-    {file = "regex-2022.1.18-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:768632fd8172ae03852e3245f11c8a425d95f65ff444ce46b3e673ae5b057b74"},
+    {file = "regex-2022.3.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:74d86e8924835f863c34e646392ef39039405f6ce52956d8af16497af4064a30"},
-    {file = "regex-2022.1.18-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:de2923886b5d3214be951bc2ce3f6b8ac0d6dfd4a0d0e2a4d2e5523d8046fdfb"},
+    {file = "regex-2022.3.2-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:ae17fc8103f3b63345709d3e9654a274eee1c6072592aec32b026efd401931d0"},
-    {file = "regex-2022.1.18-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:1333b3ce73269f986b1fa4d5d395643810074dc2de5b9d262eb258daf37dc98f"},
+    {file = "regex-2022.3.2-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:5f92a7cdc6a0ae2abd184e8dfd6ef2279989d24c85d2c85d0423206284103ede"},
-    {file = "regex-2022.1.18-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:d19a34f8a3429bd536996ad53597b805c10352a8561d8382e05830df389d2b43"},
+    {file = "regex-2022.3.2-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:5dcc4168536c8f68654f014a3db49b6b4a26b226f735708be2054314ed4964f4"},
-    {file = "regex-2022.1.18-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:8d2f355a951f60f0843f2368b39970e4667517e54e86b1508e76f92b44811a8a"},
+    {file = "regex-2022.3.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:1e30762ddddb22f7f14c4f59c34d3addabc789216d813b0f3e2788d7bcf0cf29"},
-    {file = "regex-2022.1.18-cp36-cp36m-win32.whl", hash = "sha256:2245441445099411b528379dee83e56eadf449db924648e5feb9b747473f42e3"},
+    {file = "regex-2022.3.2-cp36-cp36m-win32.whl", hash = "sha256:286ff9ec2709d56ae7517040be0d6c502642517ce9937ab6d89b1e7d0904f863"},
-    {file = "regex-2022.1.18-cp36-cp36m-win_amd64.whl", hash = "sha256:25716aa70a0d153cd844fe861d4f3315a6ccafce22b39d8aadbf7fcadff2b633"},
+    {file = "regex-2022.3.2-cp36-cp36m-win_amd64.whl", hash = "sha256:d326ff80ed531bf2507cba93011c30fff2dd51454c85f55df0f59f2030b1687b"},
-    {file = "regex-2022.1.18-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7e070d3aef50ac3856f2ef5ec7214798453da878bb5e5a16c16a61edf1817cc3"},
+    {file = "regex-2022.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9d828c5987d543d052b53c579a01a52d96b86f937b1777bbfe11ef2728929357"},
-    {file = "regex-2022.1.18-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22709d701e7037e64dae2a04855021b62efd64a66c3ceed99dfd684bfef09e38"},
+    {file = "regex-2022.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c87ac58b9baaf50b6c1b81a18d20eda7e2883aa9a4fb4f1ca70f2e443bfcdc57"},
-    {file = "regex-2022.1.18-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9099bf89078675c372339011ccfc9ec310310bf6c292b413c013eb90ffdcafc"},
+    {file = "regex-2022.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d6c2441538e4fadd4291c8420853431a229fcbefc1bf521810fbc2629d8ae8c2"},
-    {file = "regex-2022.1.18-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04611cc0f627fc4a50bc4a9a2e6178a974c6a6a4aa9c1cca921635d2c47b9c87"},
+    {file = "regex-2022.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f3356afbb301ec34a500b8ba8b47cba0b44ed4641c306e1dd981a08b416170b5"},
-    {file = "regex-2022.1.18-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:552a39987ac6655dad4bf6f17dd2b55c7b0c6e949d933b8846d2e312ee80005a"},
+    {file = "regex-2022.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d96eec8550fd2fd26f8e675f6d8b61b159482ad8ffa26991b894ed5ee19038b"},
-    {file = "regex-2022.1.18-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e031899cb2bc92c0cf4d45389eff5b078d1936860a1be3aa8c94fa25fb46ed8"},
+    {file = "regex-2022.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf668f26604e9f7aee9f8eaae4ca07a948168af90b96be97a4b7fa902a6d2ac1"},
-    {file = "regex-2022.1.18-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2dacb3dae6b8cc579637a7b72f008bff50a94cde5e36e432352f4ca57b9e54c4"},
+    {file = "regex-2022.3.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0eb0e2845e81bdea92b8281a3969632686502565abf4a0b9e4ab1471c863d8f3"},
-    {file = "regex-2022.1.18-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e5c31d70a478b0ca22a9d2d76d520ae996214019d39ed7dd93af872c7f301e52"},
+    {file = "regex-2022.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:87bc01226cd288f0bd9a4f9f07bf6827134dc97a96c22e2d28628e824c8de231"},
-    {file = "regex-2022.1.18-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bb804c7d0bfbd7e3f33924ff49757de9106c44e27979e2492819c16972ec0da2"},
+    {file = "regex-2022.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:09b4b6ccc61d4119342b26246ddd5a04accdeebe36bdfe865ad87a0784efd77f"},
-    {file = "regex-2022.1.18-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:36b2d700a27e168fa96272b42d28c7ac3ff72030c67b32f37c05616ebd22a202"},
+    {file = "regex-2022.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:9557545c10d52c845f270b665b52a6a972884725aa5cf12777374e18f2ea8960"},
-    {file = "regex-2022.1.18-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:16f81025bb3556eccb0681d7946e2b35ff254f9f888cff7d2120e8826330315c"},
+    {file = "regex-2022.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:0be0c34a39e5d04a62fd5342f0886d0e57592a4f4993b3f9d257c1f688b19737"},
-    {file = "regex-2022.1.18-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:da80047524eac2acf7c04c18ac7a7da05a9136241f642dd2ed94269ef0d0a45a"},
+    {file = "regex-2022.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7b103dffb9f6a47ed7ffdf352b78cfe058b1777617371226c1894e1be443afec"},
-    {file = "regex-2022.1.18-cp37-cp37m-win32.whl", hash = "sha256:6ca45359d7a21644793de0e29de497ef7f1ae7268e346c4faf87b421fea364e6"},
+    {file = "regex-2022.3.2-cp37-cp37m-win32.whl", hash = "sha256:f8169ec628880bdbca67082a9196e2106060a4a5cbd486ac51881a4df805a36f"},
-    {file = "regex-2022.1.18-cp37-cp37m-win_amd64.whl", hash = "sha256:38289f1690a7e27aacd049e420769b996826f3728756859420eeee21cc857118"},
+    {file = "regex-2022.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:4b9c16a807b17b17c4fa3a1d8c242467237be67ba92ad24ff51425329e7ae3d0"},
-    {file = "regex-2022.1.18-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6014038f52b4b2ac1fa41a58d439a8a00f015b5c0735a0cd4b09afe344c94899"},
+    {file = "regex-2022.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:67250b36edfa714ba62dc62d3f238e86db1065fccb538278804790f578253640"},
-    {file = "regex-2022.1.18-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0b5d6f9aed3153487252d00a18e53f19b7f52a1651bc1d0c4b5844bc286dfa52"},
+    {file = "regex-2022.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5510932596a0f33399b7fff1bd61c59c977f2b8ee987b36539ba97eb3513584a"},
-    {file = "regex-2022.1.18-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9d24b03daf7415f78abc2d25a208f234e2c585e5e6f92f0204d2ab7b9ab48e3"},
+    {file = "regex-2022.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6f7ee2289176cb1d2c59a24f50900f8b9580259fa9f1a739432242e7d254f93"},
-    {file = "regex-2022.1.18-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bf594cc7cc9d528338d66674c10a5b25e3cde7dd75c3e96784df8f371d77a298"},
+    {file = "regex-2022.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86d7a68fa53688e1f612c3246044157117403c7ce19ebab7d02daf45bd63913e"},
-    {file = "regex-2022.1.18-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd914db437ec25bfa410f8aa0aa2f3ba87cdfc04d9919d608d02330947afaeab"},
+    {file = "regex-2022.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aaf5317c961d93c1a200b9370fb1c6b6836cc7144fef3e5a951326912bf1f5a3"},
-    {file = "regex-2022.1.18-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90b6840b6448203228a9d8464a7a0d99aa8fa9f027ef95fe230579abaf8a6ee1"},
+    {file = "regex-2022.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad397bc7d51d69cb07ef89e44243f971a04ce1dca9bf24c992c362406c0c6573"},
-    {file = "regex-2022.1.18-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11772be1eb1748e0e197a40ffb82fb8fd0d6914cd147d841d9703e2bef24d288"},
+    {file = "regex-2022.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:297c42ede2c81f0cb6f34ea60b5cf6dc965d97fa6936c11fc3286019231f0d66"},
-    {file = "regex-2022.1.18-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a602bdc8607c99eb5b391592d58c92618dcd1537fdd87df1813f03fed49957a6"},
+    {file = "regex-2022.3.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:af4d8cc28e4c7a2f6a9fed544228c567340f8258b6d7ea815b62a72817bbd178"},
-    {file = "regex-2022.1.18-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7e26eac9e52e8ce86f915fd33380f1b6896a2b51994e40bb094841e5003429b4"},
+    {file = "regex-2022.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:452519bc4c973e961b1620c815ea6dd8944a12d68e71002be5a7aff0a8361571"},
-    {file = "regex-2022.1.18-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:519c0b3a6fbb68afaa0febf0d28f6c4b0a1074aefc484802ecb9709faf181607"},
+    {file = "regex-2022.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:cb34c2d66355fb70ae47b5595aafd7218e59bb9c00ad8cc3abd1406ca5874f07"},
-    {file = "regex-2022.1.18-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3c7ea86b9ca83e30fa4d4cd0eaf01db3ebcc7b2726a25990966627e39577d729"},
+    {file = "regex-2022.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3d146e5591cb67c5e836229a04723a30af795ef9b70a0bbd913572e14b7b940f"},
-    {file = "regex-2022.1.18-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:51f02ca184518702975b56affde6c573ebad4e411599005ce4468b1014b4786c"},
+    {file = "regex-2022.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:03299b0bcaa7824eb7c0ebd7ef1e3663302d1b533653bfe9dc7e595d453e2ae9"},
-    {file = "regex-2022.1.18-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:385ccf6d011b97768a640e9d4de25412204fbe8d6b9ae39ff115d4ff03f6fe5d"},
+    {file = "regex-2022.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:9ccb0a4ab926016867260c24c192d9df9586e834f5db83dfa2c8fffb3a6e5056"},
-    {file = "regex-2022.1.18-cp38-cp38-win32.whl", hash = "sha256:1f8c0ae0a0de4e19fddaaff036f508db175f6f03db318c80bbc239a1def62d02"},
+    {file = "regex-2022.3.2-cp38-cp38-win32.whl", hash = "sha256:f7e8f1ee28e0a05831c92dc1c0c1c94af5289963b7cf09eca5b5e3ce4f8c91b0"},
-    {file = "regex-2022.1.18-cp38-cp38-win_amd64.whl", hash = "sha256:760c54ad1b8a9b81951030a7e8e7c3ec0964c1cb9fee585a03ff53d9e531bb8e"},
+    {file = "regex-2022.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:35ed2f3c918a00b109157428abfc4e8d1ffabc37c8f9abc5939ebd1e95dabc47"},
-    {file = "regex-2022.1.18-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:93c20777a72cae8620203ac11c4010365706062aa13aaedd1a21bb07adbb9d5d"},
+    {file = "regex-2022.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:55820bc631684172b9b56a991d217ec7c2e580d956591dc2144985113980f5a3"},
-    {file = "regex-2022.1.18-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6aa427c55a0abec450bca10b64446331b5ca8f79b648531138f357569705bc4a"},
+    {file = "regex-2022.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:83f03f0bd88c12e63ca2d024adeee75234d69808b341e88343b0232329e1f1a1"},
-    {file = "regex-2022.1.18-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c38baee6bdb7fe1b110b6b3aaa555e6e872d322206b7245aa39572d3fc991ee4"},
+    {file = "regex-2022.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42d6007722d46bd2c95cce700181570b56edc0dcbadbfe7855ec26c3f2d7e008"},
-    {file = "regex-2022.1.18-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:752e7ddfb743344d447367baa85bccd3629c2c3940f70506eb5f01abce98ee68"},
+    {file = "regex-2022.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:320c2f4106962ecea0f33d8d31b985d3c185757c49c1fb735501515f963715ed"},
-    {file = "regex-2022.1.18-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8acef4d8a4353f6678fd1035422a937c2170de58a2b29f7da045d5249e934101"},
+    {file = "regex-2022.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbd3fe37353c62fd0eb19fb76f78aa693716262bcd5f9c14bb9e5aca4b3f0dc4"},
-    {file = "regex-2022.1.18-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c73d2166e4b210b73d1429c4f1ca97cea9cc090e5302df2a7a0a96ce55373f1c"},
+    {file = "regex-2022.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17e51ad1e6131c496b58d317bc9abec71f44eb1957d32629d06013a21bc99cac"},
-    {file = "regex-2022.1.18-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24c89346734a4e4d60ecf9b27cac4c1fee3431a413f7aa00be7c4d7bbacc2c4d"},
+    {file = "regex-2022.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:72bc3a5effa5974be6d965ed8301ac1e869bc18425c8a8fac179fbe7876e3aee"},
-    {file = "regex-2022.1.18-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:596f5ae2eeddb79b595583c2e0285312b2783b0ec759930c272dbf02f851ff75"},
+    {file = "regex-2022.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e5602a9b5074dcacc113bba4d2f011d2748f50e3201c8139ac5b68cf2a76bd8b"},
-    {file = "regex-2022.1.18-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ecfe51abf7f045e0b9cdde71ca9e153d11238679ef7b5da6c82093874adf3338"},
+    {file = "regex-2022.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:729aa8ca624c42f309397c5fc9e21db90bf7e2fdd872461aabdbada33de9063c"},
-    {file = "regex-2022.1.18-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1d6301f5288e9bdca65fab3de6b7de17362c5016d6bf8ee4ba4cbe833b2eda0f"},
+    {file = "regex-2022.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d6ecfd1970b3380a569d7b3ecc5dd70dba295897418ed9e31ec3c16a5ab099a5"},
-    {file = "regex-2022.1.18-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:93cce7d422a0093cfb3606beae38a8e47a25232eea0f292c878af580a9dc7605"},
+    {file = "regex-2022.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:13bbf0c9453c6d16e5867bda7f6c0c7cff1decf96c5498318bb87f8136d2abd4"},
-    {file = "regex-2022.1.18-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cf0db26a1f76aa6b3aa314a74b8facd586b7a5457d05b64f8082a62c9c49582a"},
+    {file = "regex-2022.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:58ba41e462653eaf68fc4a84ec4d350b26a98d030be1ab24aba1adcc78ffe447"},
-    {file = "regex-2022.1.18-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:defa0652696ff0ba48c8aff5a1fac1eef1ca6ac9c660b047fc8e7623c4eb5093"},
+    {file = "regex-2022.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c0446b2871335d5a5e9fcf1462f954586b09a845832263db95059dcd01442015"},
-    {file = "regex-2022.1.18-cp39-cp39-win32.whl", hash = "sha256:6db1b52c6f2c04fafc8da17ea506608e6be7086715dab498570c3e55e4f8fbd1"},
+    {file = "regex-2022.3.2-cp39-cp39-win32.whl", hash = "sha256:20e6a27959f162f979165e496add0d7d56d7038237092d1aba20b46de79158f1"},
-    {file = "regex-2022.1.18-cp39-cp39-win_amd64.whl", hash = "sha256:ebaeb93f90c0903233b11ce913a7cb8f6ee069158406e056f884854c737d2442"},
+    {file = "regex-2022.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:9efa41d1527b366c88f265a227b20bcec65bda879962e3fc8a2aee11e81266d7"},
-    {file = "regex-2022.1.18.tar.gz", hash = "sha256:97f32dc03a8054a4c4a5ab5d761ed4861e828b2c200febd4e46857069a483916"},
+    {file = "regex-2022.3.2.tar.gz", hash = "sha256:79e5af1ff258bc0fe0bdd6f69bc4ae33935a898e3cbefbbccf22e88a27fa053b"},
 ]
 toml = [
    {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
@ -885,12 +887,12 @@ typed-ast = [
    {file = "typed_ast-1.5.2.tar.gz", hash = "sha256:525a2d4088e70a9f75b08b3f87a51acc9cde640e19cc523c7e41aa355564ae27"},
 ]
 types-setuptools = [
-    {file = "types-setuptools-57.4.9.tar.gz", hash = "sha256:536ef74744f8e1e4be4fc719887f886e74e4cf3c792b4a06984320be4df450b5"},
+    {file = "types-setuptools-57.4.10.tar.gz", hash = "sha256:9a13513679c640f6616e2d9ab50d431c99ca8ae9848a97243f887c80fd5cf294"},
-    {file = "types_setuptools-57.4.9-py3-none-any.whl", hash = "sha256:948dc6863373750e2cd0b223a84f1fb608414cde5e55cf38ea657b93aeb411d2"},
+    {file = "types_setuptools-57.4.10-py3-none-any.whl", hash = "sha256:ddc98da82c12e1208012d65276641a132d3aadc78ecfff68fd3e17d85933a3c1"},
 ]
 typing-extensions = [
-    {file = "typing_extensions-4.0.1-py3-none-any.whl", hash = "sha256:7f001e5ac290a0c0401508864c7ec868be4e701886d5b573a9528ed3973d9d3b"},
+    {file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"},
-    {file = "typing_extensions-4.0.1.tar.gz", hash = "sha256:4ca091dea149f945ec56afb48dae714f21e8692ef22a395223bcd328961b6a0e"},
+    {file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"},
 ]
 wcwidth = [
    {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"},
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,6 +4,7 @@ version = "2.6.0"
 description = "Look up the frequencies of words in many languages, based on many sources of data."
 authors = ["Robyn Speer <rspeer@arborelia.net>"]
 license = "MIT"
 readme = "README.md"
 [tool.poetry.dependencies]
 python = "^3.7"
--- a/tests/test_apostrophes.py
+++ b/tests/test_apostrophes.py
@ -3,17 +3,21 @@ from wordfreq import tokenize, word_frequency
 def test_apostrophes():
    # Test that we handle apostrophes in French reasonably.
-    assert tokenize("qu'un", 'fr') == ['qu', 'un']
+    assert tokenize("qu'un", "fr") == ["qu", "un"]
-    assert tokenize("qu'un", 'fr', include_punctuation=True) == ["qu'", "un"]
+    assert tokenize("qu'un", "fr", include_punctuation=True) == ["qu'", "un"]
-    assert tokenize("langues d'oïl", 'fr') == ['langues', "d", 'oïl']
+    assert tokenize("langues d'oïl", "fr") == ["langues", "d", "oïl"]
-    assert tokenize("langues d'oïl", 'fr', include_punctuation=True) == ['langues', "d'", 'oïl']
+    assert tokenize("langues d'oïl", "fr", include_punctuation=True) == [
-    assert tokenize("l'heure", 'fr') == ['l', 'heure']
+        "langues",
-    assert tokenize("l'ànima", 'ca') == ['l', 'ànima']
+        "d'",
-    assert tokenize("l'anima", 'it') == ['l', 'anima']
+        "oïl",
-    assert tokenize("l'heure", 'fr', include_punctuation=True) == ["l'", 'heure']
+    ]
-    assert tokenize("L'Hôpital", 'fr', include_punctuation=True) == ["l'", 'hôpital']
+    assert tokenize("l'heure", "fr") == ["l", "heure"]
-    assert tokenize("aujourd'hui", 'fr') == ["aujourd'hui"]
+    assert tokenize("l'ànima", "ca") == ["l", "ànima"]
-    assert tokenize("This isn't French", 'en') == ['this', "isn't", 'french']
+    assert tokenize("l'anima", "it") == ["l", "anima"]
    assert tokenize("l'heure", "fr", include_punctuation=True) == ["l'", "heure"]
    assert tokenize("L'Hôpital", "fr", include_punctuation=True) == ["l'", "hôpital"]
    assert tokenize("aujourd'hui", "fr") == ["aujourd'hui"]
    assert tokenize("This isn't French", "en") == ["this", "isn't", "french"]
    # This next behavior is not ideal -- we would prefer "dell'" to be handled
    # the same as "l'" -- but this is the most consistent result we can get without
@ -21,26 +25,28 @@ def test_apostrophes():
    #
    # Versions of regex from 2019 and earlier would give ['dell', 'anima'], which
    # is better but inconsistent.
-    assert tokenize("dell'anima", 'it') == ["dell'anima"]
+    assert tokenize("dell'anima", "it") == ["dell'anima"]
    # Versions of regex from 2019 and earlier would give ['hawai', 'i'], and that's
    # an example of why we don't want the apostrophe-vowel fix to apply everywhere.
-    assert tokenize("hawai'i", 'en') == ["hawai'i"]
+    assert tokenize("hawai'i", "en") == ["hawai'i"]
 def test_catastrophes():
    # More apostrophes, but this time they're in Catalan, and there's other
    # mid-word punctuation going on too.
-    assert tokenize("M'acabo d'instal·lar.", 'ca') == ['m', 'acabo', 'd', 'instal·lar']
+    assert tokenize("M'acabo d'instal·lar.", "ca") == ["m", "acabo", "d", "instal·lar"]
-    assert (
+    assert tokenize("M'acabo d'instal·lar.", "ca", include_punctuation=True) == [
-        tokenize("M'acabo d'instal·lar.", 'ca', include_punctuation=True) ==
+        "m'",
-        ["m'", 'acabo', "d'", 'instal·lar', '.']
+        "acabo",
-    )
+        "d'",
        "instal·lar",
        ".",
    ]
 def test_alternate_codes():
    # Try over-long language codes for French and Catalan
-    assert tokenize("qu'un", 'fra') == ['qu', 'un']
+    assert tokenize("qu'un", "fra") == ["qu", "un"]
-    assert tokenize("qu'un", 'fre') == ['qu', 'un']
+    assert tokenize("qu'un", "fre") == ["qu", "un"]
-    assert tokenize("M'acabo d'instal·lar.", 'cat') == ['m', 'acabo', 'd', 'instal·lar']
+    assert tokenize("M'acabo d'instal·lar.", "cat") == ["m", "acabo", "d", "instal·lar"]
--- a/tests/test_at_sign.py
+++ b/tests/test_at_sign.py
@ -14,12 +14,12 @@ def test_gender_neutral_at():
        "tod@s",
        "l@s",
        "trabajador@s",
-        "migrantes"
+        "migrantes",
    ]
    text = "el distrito 22@ de Barcelona"
-    assert tokenize(text, 'es') == ["el", "distrito", "22@", "de", "barcelona"]
+    assert tokenize(text, "es") == ["el", "distrito", "22@", "de", "barcelona"]
-    assert lossy_tokenize(text, 'es') == ["el", "distrito", "00@", "de", "barcelona"]
+    assert lossy_tokenize(text, "es") == ["el", "distrito", "22@", "de", "barcelona"]
    # It also appears in Portuguese
    text = "direitos e deveres para @s membr@s da comunidade virtual"
@ -32,7 +32,7 @@ def test_gender_neutral_at():
        "membr@s",
        "da",
        "comunidade",
-        "virtual"
+        "virtual",
    ]
    # Because this is part of our tokenization, the language code doesn't
@ -43,10 +43,10 @@ def test_gender_neutral_at():
 def test_at_in_corpus():
    # We have a word frequency for "l@s"
-    assert word_frequency('l@s', 'es') > 0
+    assert word_frequency("l@s", "es") > 0
    # It's not just treated as a word break
-    assert word_frequency('l@s', 'es') < word_frequency('l s', 'es')
+    assert word_frequency("l@s", "es") < word_frequency("l s", "es")
 def test_punctuation_at():
@ -65,7 +65,7 @@ def test_punctuation_at():
        "ao",
        "lado",
        "do",
-        "nick"
+        "nick",
    ]
    assert tokenize(text, "pt", include_punctuation=True) == [
@ -83,7 +83,7 @@ def test_punctuation_at():
        "ao",
        "lado",
        "do",
-        "nick"
+        "nick",
    ]
    # If the @ is not at the end of the word or part of the word ending '@s',
@ -98,12 +98,9 @@ def test_punctuation_at():
        "la",
        "línea",
        "all:all",
-        "all"
+        "all",
    ]
    # Make sure not to catch e-mail addresses
    text = "info@something.example"
-    assert tokenize(text, "en") == [
+    assert tokenize(text, "en") == ["info", "something.example"]
        "info",
        "something.example"
    ]
--- a/tests/test_chinese.py
+++ b/tests/test_chinese.py
@ -9,92 +9,112 @@ def test_tokens():
    # (He was the Chinese Wikipedia's featured article of the day when I
    # wrote this test.)
-    hobart = '加勒特·霍巴特'  # Garret Hobart, or "jiā lè tè huò bā tè".
+    hobart = "加勒特·霍巴特"  # Garret Hobart, or "jiā lè tè huò bā tè".
    # He was the sixth American vice president to die in office.
-    fact_simplified  = '他是历史上第六位在任期内去世的美国副总统。'
+    fact_simplified = "他是历史上第六位在任期内去世的美国副总统。"
-    fact_traditional = '他是歷史上第六位在任期內去世的美國副總統。'
+    fact_traditional = "他是歷史上第六位在任期內去世的美國副總統。"
    # His name breaks into five pieces, with the only piece staying together
    # being the one that means 'Bart'. The dot is not included as a token.
-    assert tokenize(hobart, 'zh') == ['加', '勒', '特', '霍', '巴特']
+    assert tokenize(hobart, "zh") == ["加", "勒", "特", "霍", "巴特"]
-    assert tokenize(fact_simplified, 'zh') == [
+    assert tokenize(fact_simplified, "zh") == [
        # he / is / history / in / #6 / counter for people
-        '他', '是',  '历史', '上', '第六', '位',
+        "他",
        "是",
        "历史",
        "上",
        "第六",
        "位",
        # during / term of office / in / die
-        '在', '任期', '内', '去世',
+        "在",
        "任期",
        "内",
        "去世",
        # of / U.S. / deputy / president
-        '的', '美国', '副', '总统'
+        "的",
        "美国",
        "副",
        "总统",
    ]
    # Jieba's original tokenizer knows a lot of names, it seems.
-    assert tokenize(hobart, 'zh', external_wordlist=True) == ['加勒特', '霍巴特']
+    assert tokenize(hobart, "zh", external_wordlist=True) == ["加勒特", "霍巴特"]
    # We get almost the same tokens from the sentence using Jieba's own
    # wordlist, but it tokenizes "in history" as two words and
    # "sixth person" as one.
-    assert tokenize(fact_simplified, 'zh', external_wordlist=True) == [
+    assert tokenize(fact_simplified, "zh", external_wordlist=True) == [
        # he / is / history / in / sixth person
-        '他', '是', '历史', '上', '第六位',
+        "他",
        "是",
        "历史",
        "上",
        "第六位",
        # during / term of office / in / die
-        '在', '任期', '内', '去世',
+        "在",
        "任期",
        "内",
        "去世",
        # of / U.S. / deputy / president
-        '的', '美国', '副', '总统'
+        "的",
        "美国",
        "副",
        "总统",
    ]
    # Check that Traditional Chinese works at all
-    assert word_frequency(fact_traditional, 'zh') > 0
+    assert word_frequency(fact_traditional, "zh") > 0
    # You get the same token lengths if you look it up in Traditional Chinese,
    # but the words are different
-    simp_tokens = tokenize(fact_simplified, 'zh', include_punctuation=True)
+    simp_tokens = tokenize(fact_simplified, "zh", include_punctuation=True)
-    trad_tokens = tokenize(fact_traditional, 'zh', include_punctuation=True)
+    trad_tokens = tokenize(fact_traditional, "zh", include_punctuation=True)
-    assert ''.join(simp_tokens) == fact_simplified
+    assert "".join(simp_tokens) == fact_simplified
-    assert ''.join(trad_tokens) == fact_traditional
+    assert "".join(trad_tokens) == fact_traditional
    simp_lengths = [len(token) for token in simp_tokens]
    trad_lengths = [len(token) for token in trad_tokens]
    assert simp_lengths == trad_lengths
 def test_combination():
-    xiexie_freq = word_frequency('谢谢', 'zh')   # "Thanks"
+    xiexie_freq = word_frequency("谢谢", "zh")  # "Thanks"
-    assert word_frequency('谢谢谢谢', 'zh') == pytest.approx(xiexie_freq / 20, rel=0.01)
+    assert word_frequency("谢谢谢谢", "zh") == pytest.approx(xiexie_freq / 20, rel=0.01)
 def test_alternate_codes():
    # Tokenization of Chinese works when you use other language codes
    # that are not equal to 'zh'.
-    tokens = ['谢谢', '谢谢']
+    tokens = ["谢谢", "谢谢"]
    # Code with a region attached
-    assert tokenize('谢谢谢谢', 'zh-CN') == tokens
+    assert tokenize("谢谢谢谢", "zh-CN") == tokens
    # Over-long codes for Chinese
-    assert tokenize('谢谢谢谢', 'chi') == tokens
+    assert tokenize("谢谢谢谢", "chi") == tokens
-    assert tokenize('谢谢谢谢', 'zho') == tokens
+    assert tokenize("谢谢谢谢", "zho") == tokens
    # Separate codes for Mandarin and Cantonese
-    assert tokenize('谢谢谢谢', 'cmn') == tokens
+    assert tokenize("谢谢谢谢", "cmn") == tokens
-    assert tokenize('谢谢谢谢', 'yue') == tokens
+    assert tokenize("谢谢谢谢", "yue") == tokens
 def test_unreasonably_long():
    # This crashed earlier versions of wordfreq due to an overflow in
    # exponentiation. We've now changed the sequence of operations so it
    # will underflow instead.
-    lots_of_ls = 'l' * 800
+    lots_of_ls = "l" * 800
-    assert word_frequency(lots_of_ls, 'zh') == 0.
+    assert word_frequency(lots_of_ls, "zh") == 0.0
-    assert zipf_frequency(lots_of_ls, 'zh') == 0.
+    assert zipf_frequency(lots_of_ls, "zh") == 0.0
 def test_hyphens():
    # An edge case of Chinese tokenization that changed sometime around
    # jieba 0.42.
-    tok = tokenize('--------', 'zh', include_punctuation=True)
+    tok = tokenize("--------", "zh", include_punctuation=True)
-    assert tok == ['-'] * 8
+    assert tok == ["-"] * 8
    tok = tokenize('--------', 'zh', include_punctuation=True, external_wordlist=True)
    assert tok == ['--------']
    tok = tokenize("--------", "zh", include_punctuation=True, external_wordlist=True)
    assert tok == ["--------"]
--- a/tests/test_general.py
+++ b/tests/test_general.py
@ -1,16 +1,22 @@
 from wordfreq import (
-    word_frequency, available_languages, cB_to_freq,
+    word_frequency,
-    top_n_list, random_words, random_ascii_words, tokenize, lossy_tokenize
+    available_languages,
    cB_to_freq,
    top_n_list,
    random_words,
    random_ascii_words,
    tokenize,
    lossy_tokenize,
 )
 import pytest
 def test_freq_examples():
    # Stopwords are most common in the correct language
-    assert word_frequency('the', 'en') > word_frequency('de', 'en')
+    assert word_frequency("the", "en") > word_frequency("de", "en")
-    assert word_frequency('de', 'es') > word_frequency('the', 'es')
+    assert word_frequency("de", "es") > word_frequency("the", "es")
    # We get word frequencies from the 'large' list when available
-    assert word_frequency('infrequency', 'en') > 0.
+    assert word_frequency("infrequency", "en") > 0.0
 def test_languages():
@ -20,33 +26,33 @@ def test_languages():
    assert len(avail) >= 34
    # 'small' covers the same languages, but with some different lists
-    avail_small = available_languages('small')
+    avail_small = available_languages("small")
    assert len(avail_small) == len(avail)
    assert avail_small != avail
    # 'combined' is the same as 'small'
-    avail_old_name = available_languages('combined')
+    avail_old_name = available_languages("combined")
    assert avail_old_name == avail_small
    # 'large' covers fewer languages
-    avail_large = available_languages('large')
+    avail_large = available_languages("large")
    assert len(avail_large) >= 14
    assert len(avail) > len(avail_large)
    # Look up the digit '2' in the main word list for each language
    for lang in avail:
-        assert word_frequency('2', lang) > 0
+        assert word_frequency("2", lang) > 0
        # Make up a weirdly verbose language code and make sure
        # we still get it
-        new_lang_code = '%s-001-x-fake-extension' % lang.upper()
+        new_lang_code = "%s-001-x-fake-ext" % lang.upper()
-        assert word_frequency('2', new_lang_code) > 0
+        assert word_frequency("2", new_lang_code) > 0
 def test_minimums():
-    assert word_frequency('esquivalience', 'en') == 0
+    assert word_frequency("esquivalience", "en") == 0
-    assert word_frequency('esquivalience', 'en', minimum=1e-6) == 1e-6
+    assert word_frequency("esquivalience", "en", minimum=1e-6) == 1e-6
-    assert word_frequency('the', 'en', minimum=1) == 1
+    assert word_frequency("the", "en", minimum=1) == 1
 def test_most_common_words():
@ -59,61 +65,61 @@ def test_most_common_words():
        """
        return top_n_list(lang, 1)[0]
-    assert get_most_common('ar') == 'في'
+    assert get_most_common("ar") == "في"
-    assert get_most_common('bg') == 'на'
+    assert get_most_common("bg") == "на"
-    assert get_most_common('bn') == 'না'
+    assert get_most_common("bn") == "না"
-    assert get_most_common('ca') == 'de'
+    assert get_most_common("ca") == "de"
-    assert get_most_common('cs') == 'a'
+    assert get_most_common("cs") == "a"
-    assert get_most_common('da') == 'i'
+    assert get_most_common("da") == "i"
-    assert get_most_common('el') == 'και'
+    assert get_most_common("el") == "και"
-    assert get_most_common('de') == 'die'
+    assert get_most_common("de") == "die"
-    assert get_most_common('en') == 'the'
+    assert get_most_common("en") == "the"
-    assert get_most_common('es') == 'de'
+    assert get_most_common("es") == "de"
-    assert get_most_common('fi') == 'ja'
+    assert get_most_common("fi") == "ja"
-    assert get_most_common('fil') == 'sa'
+    assert get_most_common("fil") == "sa"
-    assert get_most_common('fr') == 'de'
+    assert get_most_common("fr") == "de"
-    assert get_most_common('he') == 'את'
+    assert get_most_common("he") == "את"
-    assert get_most_common('hi') == 'के'
+    assert get_most_common("hi") == "के"
-    assert get_most_common('hu') == 'a'
+    assert get_most_common("hu") == "a"
-    assert get_most_common('id') == 'yang'
+    assert get_most_common("id") == "yang"
-    assert get_most_common('is') == 'og'
+    assert get_most_common("is") == "og"
-    assert get_most_common('it') == 'di'
+    assert get_most_common("it") == "di"
-    assert get_most_common('ja') == 'の'
+    assert get_most_common("ja") == "の"
-    assert get_most_common('ko') == '이'
+    assert get_most_common("ko") == "이"
-    assert get_most_common('lt') == 'ir'
+    assert get_most_common("lt") == "ir"
-    assert get_most_common('lv') == 'un'
+    assert get_most_common("lv") == "un"
-    assert get_most_common('mk') == 'на'
+    assert get_most_common("mk") == "на"
-    assert get_most_common('ms') == 'yang'
+    assert get_most_common("ms") == "yang"
-    assert get_most_common('nb') == 'i'
+    assert get_most_common("nb") == "i"
-    assert get_most_common('nl') == 'de'
+    assert get_most_common("nl") == "de"
-    assert get_most_common('pl') == 'w'
+    assert get_most_common("pl") == "w"
-    assert get_most_common('pt') == 'de'
+    assert get_most_common("pt") == "de"
-    assert get_most_common('ro') == 'de'
+    assert get_most_common("ro") == "de"
-    assert get_most_common('ru') == 'в'
+    assert get_most_common("ru") == "в"
-    assert get_most_common('sh') == 'je'
+    assert get_most_common("sh") == "je"
-    assert get_most_common('sk') == 'a'
+    assert get_most_common("sk") == "a"
-    assert get_most_common('sl') == 'je'
+    assert get_most_common("sl") == "je"
-    assert get_most_common('sv') == 'är'
+    assert get_most_common("sv") == "är"
-    assert get_most_common('ta') == 'ஒரு'
+    assert get_most_common("ta") == "ஒரு"
-    assert get_most_common('tr') == 've'
+    assert get_most_common("tr") == "ve"
-    assert get_most_common('uk') == 'в'
+    assert get_most_common("uk") == "в"
-    assert get_most_common('ur') == 'کے'
+    assert get_most_common("ur") == "کے"
-    assert get_most_common('vi') == 'là'
+    assert get_most_common("vi") == "là"
-    assert get_most_common('zh') == '的'
+    assert get_most_common("zh") == "的"
 def test_language_matching():
-    freq = word_frequency('的', 'zh')
+    freq = word_frequency("的", "zh")
-    assert word_frequency('的', 'zh-TW') == freq
+    assert word_frequency("的", "zh-TW") == freq
-    assert word_frequency('的', 'zh-CN') == freq
+    assert word_frequency("的", "zh-CN") == freq
-    assert word_frequency('的', 'zh-Hant') == freq
+    assert word_frequency("的", "zh-Hant") == freq
-    assert word_frequency('的', 'zh-Hans') == freq
+    assert word_frequency("的", "zh-Hans") == freq
-    assert word_frequency('的', 'yue-HK') == freq
+    assert word_frequency("的", "yue-CN") == freq
-    assert word_frequency('的', 'cmn') == freq
+    assert word_frequency("的", "cmn") == freq
 def test_cB_conversion():
-    assert cB_to_freq(0) == 1.
+    assert cB_to_freq(0) == 1.0
    assert cB_to_freq(-100) == pytest.approx(0.1)
    assert cB_to_freq(-600) == pytest.approx(1e-6)
@ -126,101 +132,125 @@ def test_failed_cB_conversion():
 def test_tokenization():
    # We preserve apostrophes within words, so "can't" is a single word in the
    # data
-    assert (
+    assert tokenize("I don't split at apostrophes, you see.", "en") == [
-        tokenize("I don't split at apostrophes, you see.", 'en')
+        "i",
-        == ['i', "don't", 'split', 'at', 'apostrophes', 'you', 'see']
+        "don't",
-    )
+        "split",
        "at",
        "apostrophes",
        "you",
        "see",
    ]
-    assert (
+    assert tokenize(
-        tokenize("I don't split at apostrophes, you see.", 'en', include_punctuation=True)
+        "I don't split at apostrophes, you see.", "en", include_punctuation=True
-        == ['i', "don't", 'split', 'at', 'apostrophes', ',', 'you', 'see', '.']
+    ) == ["i", "don't", "split", "at", "apostrophes", ",", "you", "see", "."]
    )
    # Certain punctuation does not inherently split a word.
-    assert (
+    assert tokenize("Anything is possible at zombo.com", "en") == [
-        tokenize("Anything is possible at zombo.com", 'en')
+        "anything",
-        == ['anything', 'is', 'possible', 'at', 'zombo.com']
+        "is",
-    )
+        "possible",
        "at",
        "zombo.com",
    ]
    # Splits occur after symbols, and at splitting punctuation such as hyphens.
-    assert tokenize('😂test', 'en') == ['😂', 'test']
+    assert tokenize("😂test", "en") == ["😂", "test"]
-    assert tokenize("flip-flop", 'en') == ['flip', 'flop']
+    assert tokenize("flip-flop", "en") == ["flip", "flop"]
-    assert (
+    assert tokenize(
-        tokenize('this text has... punctuation :)', 'en', include_punctuation=True)
+        "this text has... punctuation :)", "en", include_punctuation=True
-        == ['this', 'text', 'has', '...', 'punctuation', ':)']
+    ) == ["this", "text", "has", "...", "punctuation", ":)"]
    )
    # Multi-codepoint emoji sequences such as 'medium-skinned woman with headscarf'
    # and 'David Bowie' stay together, because our Unicode segmentation algorithm
    # is up to date
-    assert tokenize('emoji test 🧕🏽', 'en') == ['emoji', 'test', '🧕🏽']
+    assert tokenize("emoji test 🧕🏽", "en") == ["emoji", "test", "🧕🏽"]
-    assert (
+    assert tokenize(
-        tokenize("👨‍🎤 Planet Earth is blue, and there's nothing I can do 🌎🚀", 'en')
+        "👨‍🎤 Planet Earth is blue, and there's nothing I can do 🌎🚀", "en"
-        == ['👨‍🎤', 'planet', 'earth', 'is', 'blue', 'and', "there's",
+    ) == [
-            'nothing', 'i', 'can', 'do', '🌎', '🚀']
+        "👨‍🎤",
-    )
+        "planet",
        "earth",
        "is",
        "blue",
        "and",
        "there's",
        "nothing",
        "i",
        "can",
        "do",
        "🌎",
        "🚀",
    ]
    # Water wave, surfer, flag of California (indicates ridiculously complete support
    # for Unicode 10 and Emoji 5.0)
-    assert tokenize("Surf's up 🌊🏄🏴󠁵󠁳󠁣󠁡󠁿'",'en') == ["surf's", "up", "🌊", "🏄", "🏴󠁵󠁳󠁣󠁡󠁿"]
+    assert tokenize("Surf's up 🌊🏄🏴󠁵󠁳󠁣󠁡󠁿'", "en") == ["surf's", "up", "🌊", "🏄", "🏴󠁵󠁳󠁣󠁡󠁿"]
 def test_casefolding():
-    assert tokenize('WEISS', 'de') == ['weiss']
+    assert tokenize("WEISS", "de") == ["weiss"]
-    assert tokenize('weiß', 'de') == ['weiss']
+    assert tokenize("weiß", "de") == ["weiss"]
-    assert tokenize('İstanbul', 'tr') == ['istanbul']
+    assert tokenize("İstanbul", "tr") == ["istanbul"]
-    assert tokenize('SIKISINCA', 'tr') == ['sıkısınca']
+    assert tokenize("SIKISINCA", "tr") == ["sıkısınca"]
-def test_number_smashing():
+def test_normalization():
-    assert tokenize('"715 - CRΣΣKS" by Bon Iver', 'en') == ['715', 'crσσks', 'by', 'bon', 'iver']
+    assert tokenize('"715 - CRΣΣKS" by Bon Iver', "en") == [
-    assert lossy_tokenize('"715 - CRΣΣKS" by Bon Iver', 'en') == ['000', 'crσσks', 'by', 'bon', 'iver']
+        "715",
-    assert (
+        "crσσks",
-        lossy_tokenize('"715 - CRΣΣKS" by Bon Iver', 'en', include_punctuation=True)
+        "by",
-        == ['"', '000', '-', 'crσσks', '"', 'by', 'bon', 'iver']
+        "bon",
-    )
+        "iver",
-    assert lossy_tokenize('1', 'en') == ['1']
+    ]
-    assert lossy_tokenize('3.14', 'en') == ['0.00']
+    assert lossy_tokenize('"715 - CRΣΣKS" by Bon Iver', "en") == [
-    assert lossy_tokenize('24601', 'en') == ['00000']
+        "715",
-    assert word_frequency('24601', 'en') == word_frequency('90210', 'en')
+        "crσσks",
        "by",
        "bon",
        "iver",
    ]
 def test_uncurl_quotes():
-    assert lossy_tokenize("let’s", 'en') == ["let's"]
+    assert lossy_tokenize("let’s", "en") == ["let's"]
-    assert word_frequency("let’s", 'en') == word_frequency("let's", 'en')
+    assert word_frequency("let’s", "en") == word_frequency("let's", "en")
 def test_phrase_freq():
-    ff = word_frequency("flip-flop", 'en')
+    ff = word_frequency("flip-flop", "en")
    assert ff > 0
-    phrase_freq = 1.0 / word_frequency('flip', 'en') + 1.0 / word_frequency('flop', 'en')
+    phrase_freq = 1.0 / word_frequency("flip", "en") + 1.0 / word_frequency(
        "flop", "en"
    )
    assert 1.0 / ff == pytest.approx(phrase_freq, rel=0.01)
 def test_not_really_random():
    # If your xkcd-style password comes out like this, maybe you shouldn't
    # use it
-    assert random_words(nwords=4, lang='en', bits_per_word=0) == 'the the the the'
+    assert random_words(nwords=4, lang="en", bits_per_word=0) == "the the the the"
    # This not only tests random_ascii_words, it makes sure we didn't end
    # up with 'eos' as a very common Japanese word
-    assert random_ascii_words(nwords=4, lang='ja', bits_per_word=0) == '00 00 00 00'
+    assert random_ascii_words(nwords=4, lang="ja", bits_per_word=0) == "1 1 1 1"
 def test_not_enough_ascii():
    with pytest.raises(ValueError):
-        random_ascii_words(lang='zh', bits_per_word=16)
+        random_ascii_words(lang="zh", bits_per_word=16)
 def test_arabic():
    # Remove tatweels
-    assert tokenize('متــــــــعب', 'ar') == ['متعب']
+    assert tokenize("متــــــــعب", "ar") == ["متعب"]
    # Remove combining marks
-    assert tokenize('حَرَكَات', 'ar') == ['حركات']
+    assert tokenize("حَرَكَات", "ar") == ["حركات"]
    # An Arabic ligature that is affected by NFKC normalization
-    assert tokenize('\ufefb', 'ar') == ['\u0644\u0627']
+    assert tokenize("\ufefb", "ar") == ["\u0644\u0627"]
 def test_ideographic_fallback():
@ -228,28 +258,33 @@ def test_ideographic_fallback():
    #
    # More complex examples like this, involving the multiple scripts of Japanese,
    # are in test_japanese.py.
-    assert tokenize('中国文字', 'en') == ['中国文字']
+    assert tokenize("中国文字", "en") == ["中国文字"]
 def test_other_languages():
    # Test that we leave Thai letters stuck together. If we had better Thai support,
    # we would actually split this into a three-word phrase.
-    assert tokenize('การเล่นดนตรี', 'th') == ['การเล่นดนตรี']
+    assert tokenize("การเล่นดนตรี", "th") == ["การเล่นดนตรี"]
-    assert tokenize('"การเล่นดนตรี" means "playing music"', 'en') == ['การเล่นดนตรี', 'means', 'playing', 'music']
+    assert tokenize('"การเล่นดนตรี" means "playing music"', "en") == [
        "การเล่นดนตรี",
        "means",
        "playing",
        "music",
    ]
    # Test Khmer, a script similar to Thai
-    assert tokenize('សូមស្វាគមន៍', 'km') == ['សូមស្វាគមន៍']
+    assert tokenize("សូមស្វាគមន៍", "km") == ["សូមស្វាគមន៍"]
    # Test Hindi -- tokens split where there are spaces, and not where there aren't
-    assert tokenize('हिन्दी विक्षनरी', 'hi') == ['हिन्दी', 'विक्षनरी']
+    assert tokenize("हिन्दी विक्षनरी", "hi") == ["हिन्दी", "विक्षनरी"]
    # Remove vowel points in Hebrew
-    assert tokenize('דֻּגְמָה', 'he') == ['דגמה']
+    assert tokenize("דֻּגְמָה", "he") == ["דגמה"]
    # Deal with commas, cedillas, and I's in Turkish
-    assert tokenize('kișinin', 'tr') == ['kişinin']
+    assert tokenize("kișinin", "tr") == ["kişinin"]
-    assert tokenize('KİȘİNİN', 'tr') == ['kişinin']
+    assert tokenize("KİȘİNİN", "tr") == ["kişinin"]
    # Deal with cedillas that should be commas-below in Romanian
-    assert tokenize('acelaşi', 'ro') == ['același']
+    assert tokenize("acelaşi", "ro") == ["același"]
-    assert tokenize('ACELAŞI', 'ro') == ['același']
+    assert tokenize("ACELAŞI", "ro") == ["același"]
--- a/tests/test_japanese.py
+++ b/tests/test_japanese.py
@ -3,7 +3,7 @@ import pytest
 def test_tokens():
-    assert tokenize('おはようございます', 'ja') == ['おはよう', 'ござい', 'ます']
+    assert tokenize("おはようございます", "ja") == ["おはよう", "ござい", "ます"]
 def test_simple_tokenize():
@ -17,13 +17,12 @@ def test_simple_tokenize():
    #
    # We used to try to infer word boundaries between hiragana and katakana,
    # but this leads to edge cases that are unsolvable without a dictionary.
-    ja_text = 'ひらがなカタカナromaji'
+    ja_text = "ひらがなカタカナromaji"
-    assert simple_tokenize(ja_text) == ['ひらがなカタカナ', 'romaji']
+    assert simple_tokenize(ja_text) == ["ひらがなカタカナ", "romaji"]
    # An example that would be multiple tokens if tokenized as 'ja' via MeCab,
    # but sticks together in simple_tokenize
-    assert simple_tokenize('おはようございます') == ['おはようございます']
+    assert simple_tokenize("おはようございます") == ["おはようございます"]
    # Names that use the weird possessive marker ヶ, which is technically a
    # katakana even though it's being used like a kanji, stay together as one
@ -43,17 +42,13 @@ def test_simple_tokenize():
    assert simple_tokenize("見ヶ〆料") == ["見ヶ〆料"]
 def test_combination():
-    ohayou_freq = word_frequency('おはよう', 'ja')
+    ohayou_freq = word_frequency("おはよう", "ja")
-    gozai_freq = word_frequency('ござい', 'ja')
+    gozai_freq = word_frequency("ござい", "ja")
-    masu_freq = word_frequency('ます', 'ja')
+    masu_freq = word_frequency("ます", "ja")
-    assert word_frequency('おはようおはよう', 'ja') == pytest.approx(ohayou_freq / 2, rel=0.01)
+    assert word_frequency("おはようおはよう", "ja") == pytest.approx(ohayou_freq / 2, rel=0.01)
-    assert (
+    assert 1.0 / word_frequency("おはようございます", "ja") == pytest.approx(
-        1.0 / word_frequency('おはようございます', 'ja') ==
+        1.0 / ohayou_freq + 1.0 / gozai_freq + 1.0 / masu_freq, rel=0.01
        pytest.approx(1.0 / ohayou_freq + 1.0 / gozai_freq + 1.0 / masu_freq, rel=0.01)
    )
--- a/tests/test_korean.py
+++ b/tests/test_korean.py
@ -3,16 +3,14 @@ import pytest
 def test_tokens():
-    assert tokenize('감사합니다', 'ko') == ['감사', '합니다']
+    assert tokenize("감사합니다", "ko") == ["감사", "합니다"]
 def test_combination():
-    gamsa_freq = word_frequency('감사', 'ko')
+    gamsa_freq = word_frequency("감사", "ko")
-    habnida_freq = word_frequency('합니다', 'ko')
+    habnida_freq = word_frequency("합니다", "ko")
-    assert word_frequency('감사감사', 'ko') == pytest.approx(gamsa_freq / 2, rel=0.01)
+    assert word_frequency("감사감사", "ko") == pytest.approx(gamsa_freq / 2, rel=0.01)
-    assert (
+    assert 1.0 / word_frequency("감사합니다", "ko") == pytest.approx(
-        1.0 / word_frequency('감사합니다', 'ko') ==
+        1.0 / gamsa_freq + 1.0 / habnida_freq, rel=0.01
        pytest.approx(1.0 / gamsa_freq + 1.0 / habnida_freq, rel=0.01)
    )
--- a/tests/test_numbers.py
+++ b/tests/test_numbers.py
@ -0,0 +1,58 @@
 from wordfreq import word_frequency
 from wordfreq.numbers import digit_freq, smash_numbers
 from pytest import approx
 def test_number_smashing():
    assert smash_numbers("1") == "1"
    assert smash_numbers("3.14") == "0.00"
    assert smash_numbers("24601") == "00000"
 def test_decimals():
    assert word_frequency("3.14", "el") > word_frequency("4.14", "el")
    assert word_frequency("3.14", "el") == word_frequency("3.15", "el")
    assert word_frequency("3,14", "de") > word_frequency("4,14", "de")
    assert word_frequency("3,14", "de") == word_frequency("3,15", "de")
 def test_year_distribution():
    assert word_frequency("2010", "en") > word_frequency("1010", "en")
    assert word_frequency("2010", "en") > word_frequency("3010", "en")
 def test_boundaries():
    assert word_frequency("9", "en") > word_frequency("10", "en")
    assert word_frequency("99", "en") > word_frequency("100", "en")
    assert word_frequency("999", "en") > word_frequency("1000", "en")
    assert word_frequency("9999", "en") > word_frequency("10000", "en")
 def test_multiple_words():
    once = word_frequency("2015b", "en")
    twice = word_frequency("2015b 2015b", "en")
    assert once == approx(2 * twice)
 def test_distribution():
    assert word_frequency("24601", "en") > word_frequency("90210", "en")
    assert word_frequency("7", "en") > word_frequency("007", "en")
    assert word_frequency("404", "en") == word_frequency("418", "en")
 def test_3digit_sum():
    """
    Test that the probability distribution given you have a 4-digit sequence
    adds up to approximately 1.
    """
    three_digit_sum = sum(digit_freq(f"{num:03d}") for num in range(0, 1000))
    assert three_digit_sum == approx(1.0)
 def test_4digit_sum():
    """
    Test that the probability distribution given you have a 4-digit sequence
    adds up to approximately 1.
    """
    four_digit_sum = sum(digit_freq(f"{num:04d}") for num in range(0, 10000))
    assert 0.999 < four_digit_sum < 1.0
--- a/tests/test_transliteration.py
+++ b/tests/test_transliteration.py
@ -5,14 +5,26 @@ from wordfreq.preprocess import preprocess_text
 def test_transliteration():
    # "Well, there's a lot of things you do not understand."
    # (from somewhere in OpenSubtitles
-    assert (
+    assert tokenize("Па, има ту много ствари које не схваташ.", "sr") == [
-        tokenize("Па, има ту много ствари које не схваташ.", 'sr') ==
+        "pa",
-        ['pa', 'ima', 'tu', 'mnogo', 'stvari', 'koje', 'ne', 'shvataš']
+        "ima",
-    )
+        "tu",
-    assert (
+        "mnogo",
-        tokenize("Pa, ima tu mnogo stvari koje ne shvataš.", 'sr') ==
+        "stvari",
-        ['pa', 'ima', 'tu', 'mnogo', 'stvari', 'koje', 'ne', 'shvataš']
+        "koje",
-    )
+        "ne",
        "shvataš",
    ]
    assert tokenize("Pa, ima tu mnogo stvari koje ne shvataš.", "sr") == [
        "pa",
        "ima",
        "tu",
        "mnogo",
        "stvari",
        "koje",
        "ne",
        "shvataš",
    ]
    # I don't have examples of complete sentences in Azerbaijani that are
    # naturally in Cyrillic, because it turns out everyone writes Azerbaijani
@ -20,14 +32,14 @@ def test_transliteration():
    # So here are some individual words.
    # 'library' in Azerbaijani Cyrillic
-    assert preprocess_text('китабхана', 'az') == 'kitabxana'
+    assert preprocess_text("китабхана", "az") == "kitabxana"
-    assert preprocess_text('КИТАБХАНА', 'az') == 'kitabxana'
+    assert preprocess_text("КИТАБХАНА", "az") == "kitabxana"
-    assert preprocess_text('KİTABXANA', 'az') == 'kitabxana'
+    assert preprocess_text("KİTABXANA", "az") == "kitabxana"
    # 'scream' in Azerbaijani Cyrillic
-    assert preprocess_text('бағырты', 'az') == 'bağırtı'
+    assert preprocess_text("бағырты", "az") == "bağırtı"
-    assert preprocess_text('БАҒЫРТЫ', 'az') == 'bağırtı'
+    assert preprocess_text("БАҒЫРТЫ", "az") == "bağırtı"
-    assert preprocess_text('BAĞIRTI', 'az') == 'bağırtı'
+    assert preprocess_text("BAĞIRTI", "az") == "bağırtı"
 def test_actually_russian():
@ -38,13 +50,12 @@ def test_actually_russian():
    # We make sure to handle this case so we don't end up with a mixed-script
    # word like "pacanы".
-    assert tokenize("сто из ста, пацаны!", 'sr') == ['sto', 'iz', 'sta', 'pacany']
+    assert tokenize("сто из ста, пацаны!", "sr") == ["sto", "iz", "sta", "pacany"]
-    assert tokenize("культуры", 'sr') == ["kul'tury"]
+    assert tokenize("культуры", "sr") == ["kul'tury"]
 def test_alternate_codes():
    # Try language codes for Serbo-Croatian that have been split, and now
    # are canonically mapped to Serbian
-    assert tokenize("культуры", 'sh') == ["kul'tury"]
+    assert tokenize("культуры", "sh") == ["kul'tury"]
-    assert tokenize("культуры", 'hbs') == ["kul'tury"]
+    assert tokenize("культуры", "hbs") == ["kul'tury"]
--- a/wordfreq/init.py
+++ b/wordfreq/init.py
@ -13,7 +13,7 @@ import warnings
 from .tokens import tokenize, simple_tokenize, lossy_tokenize
 from .language_info import get_language_info
-from .numbers import digit_freq
+from .numbers import digit_freq, has_digit_sequence, smash_numbers
 logger = logging.getLogger(__name__)
@ -234,7 +234,7 @@ _wf_cache: Dict[Tuple[str, str, str, float], float] = {}
 def _word_frequency(word: str, lang: str, wordlist: str, minimum: float) -> float:
    tokens = lossy_tokenize(word, lang)
-    dfreq = digit_freq(word)
+
    if not tokens:
        return minimum
@ -245,13 +245,20 @@ def _word_frequency(word: str, lang: str, wordlist: str, minimum: float) -> floa
    freqs = get_frequency_dict(lang, wordlist)
    one_over_result = 0.0
    for token in tokens:
-        if token not in freqs:
+        smashed = smash_numbers(token)
        if smashed not in freqs:
            # If any word is missing, just return the default value
            return minimum
-        # spread the frequency of digits over all digit combinations
+        freq = freqs[smashed]
-        freq = freqs[token]
+        if smashed != token:
            # If there is a digit sequence in the token, the digits are
            # internally replaced by 0s to aggregate their probabilities
            # together. We then assign a specific frequency to the digit
            # sequence using the `digit_freq` distribution.
            freq *= digit_freq(token)
        one_over_result += 1.0 / freq
    # Combine the frequencies of tokens we looked up.
    freq = 1.0 / one_over_result
    if get_language_info(lang)["tokenizer"] == "jieba":
@ -334,10 +341,15 @@ def top_n_list(
    Return a frequency list of length `n` in descending order of frequency.
    This list contains words from `wordlist`, of the given language.
    If `ascii_only`, then only ascii words are considered.
    The frequency list will not contain multi-digit sequences, because we
    estimate the frequencies of those using the functions in `numbers.py`,
    not using a wordlist that contains all of them.
    """
    results = []
    for word in iter_wordlist(lang, wordlist):
        if (not ascii_only) or max(word) <= "~":
            if not has_digit_sequence(word):
                results.append(word)
                if len(results) >= n:
                    break
--- a/wordfreq/numbers.py
+++ b/wordfreq/numbers.py
@ -1,4 +1,4 @@
-from .preprocess import MULTI_DIGIT_RE
+import regex
 # Frequencies of leading digits, according to Benford's law, sort of.
 # Benford's law doesn't describe numbers with leading zeroes, because "007"
@ -11,23 +11,37 @@ DIGIT_FREQS = [0.009, 0.300, 0.175, 0.124, 0.096, 0.078, 0.066, 0.057, 0.050, 0.
 #
 # We do this with a piecewise exponential function whose peak is a plateau covering
 # the years 2019 to 2039.
 #
 # YEAR_LOG_PEAK is chosen by experimentation to make this probability add up to about
 # .994. Here, that represents P(token represents a year) | P(token is 4 digits).
 # The other .006 represents P(token does not represent a year) | P(token is 4 digits).
-YEAR_LOG_PEAK = -1.875
+# Determined by experimentation: makes the probabilities of all years add up to 90%.
-NOT_YEAR_PROB = 0.006
+# The other 10% goes to NOT_YEAR_PROB. tests/test_numbers.py confirms that this
 # probability distribution adds up to 1.
 YEAR_LOG_PEAK = -1.9185
 NOT_YEAR_PROB = 0.1
 REFERENCE_YEAR = 2019
 PLATEAU_WIDTH = 20
 DIGIT_RE = regex.compile(r"\d")
 MULTI_DIGIT_RE = regex.compile(r"\d[\d.,]+")
 PURE_DIGIT_RE = regex.compile(r"\d+")
 def benford_freq(text: str) -> float:
    """
    Estimate the frequency of a digit sequence according to Benford's law.
    """
    first_digit = int(text[0])
    return DIGIT_FREQS[first_digit] / 10 ** (len(text) - 1)
 def year_freq(text: str) -> float:
    """
    Estimate the relative frequency of a particular 4-digit sequence representing
    a year.
    For example, suppose text == "1985". We're estimating the probability that a
    randomly-selected token from a large corpus will be "1985" and refer to the
    year, _given_ that it is 4 digits. Tokens that are not 4 digits are not involved
    in the probability distribution.
    """
    year = int(text)
    # Fitting a line to the curve seen at
@ -60,13 +74,38 @@ def year_freq(text: str) -> float:
 def digit_freq(text: str) -> float:
    """
    Get the relative frequency of a string of digits, using our estimates.
    """
    freq = 1.0
    for match in MULTI_DIGIT_RE.findall(text):
-        if len(match) == 4:
+        for submatch in PURE_DIGIT_RE.findall(match):
-            freq *= year_freq(match)
+            if len(submatch) == 4:
                freq *= year_freq(submatch)
            else:
-            freq *= benford_freq(match)
+                freq *= benford_freq(submatch)
    return freq
-print(sum(digit_freq("%04d" % year) for year in range(0, 10000)))
+def has_digit_sequence(text: str) -> bool:
    """
    Returns True iff the text has a digit sequence that will be normalized out
    and handled with `digit_freq`.
    """
    return bool(MULTI_DIGIT_RE.match(text))
 def _sub_zeroes(match: regex.Match) -> str:
    """
    Given a regex match, return what it matched with digits replaced by
    zeroes.
    """
    return DIGIT_RE.sub("0", match.group(0))
 def smash_numbers(text: str) -> str:
    """
    Replace sequences of multiple digits with zeroes, so we don't need to
    distinguish the frequencies of thousands of numbers.
    """
    return MULTI_DIGIT_RE.sub(_sub_zeroes, text)
--- a/wordfreq/preprocess.py
+++ b/wordfreq/preprocess.py
@ -7,10 +7,6 @@ from langcodes import Language
 MARK_RE = regex.compile(r"[\p{Mn}\N{ARABIC TATWEEL}]", regex.V1)
 DIGIT_RE = regex.compile(r"\d")
 MULTI_DIGIT_RE = regex.compile(r"\d[\d.,]+")
 def preprocess_text(text: str, language: Language) -> str:
    """
    This function applies pre-processing steps that convert forms of words
@ -251,19 +247,3 @@ def cedillas_to_commas(text: str) -> str:
        "\N{LATIN SMALL LETTER T WITH CEDILLA}",
        "\N{LATIN SMALL LETTER T WITH COMMA BELOW}",
    )
 def _sub_zeroes(match: regex.Match) -> str:
    """
    Given a regex match, return what it matched with digits replaced by
    zeroes.
    """
    return DIGIT_RE.sub("0", match.group(0))
 def smash_numbers(text: str) -> str:
    """
    Replace sequences of multiple digits with zeroes, so we don't need to
    distinguish the frequencies of thousands of numbers.
    """
    return MULTI_DIGIT_RE.sub(_sub_zeroes, text)
--- a/wordfreq/tokens.py
+++ b/wordfreq/tokens.py
@ -10,7 +10,7 @@ from .language_info import (
    SPACELESS_SCRIPTS,
    EXTRA_JAPANESE_CHARACTERS,
 )
-from .preprocess import preprocess_text, smash_numbers
+from .preprocess import preprocess_text
 # Placeholders for CJK functions that we'll import on demand
 _mecab_tokenize = None
@ -309,13 +309,6 @@ def lossy_tokenize(
    In particular:
    - Any sequence of 2 or more adjacent digits, possibly with intervening
      punctuation such as a decimal point, will replace each digit with '0'
      so that frequencies for numbers don't have to be counted separately.
      This is similar to but not quite identical to the word2vec Google News
      data, which replaces digits with '#' in tokens with more than one digit.
    - In Chinese, unless Traditional Chinese is specifically requested using
      'zh-Hant', all characters will be converted to Simplified Chinese.
@ -334,4 +327,4 @@ def lossy_tokenize(
        tokens = [_simplify_chinese(token) for token in tokens]
-    return [uncurl_quotes(smash_numbers(token)) for token in tokens]
+    return [uncurl_quotes(token) for token in tokens]