From 2be781fd1abf27392bb20073f0822c475044fb7d Mon Sep 17 00:00:00 2001 From: Elia Robyn Lake Date: Tue, 21 Nov 2023 18:07:04 -0500 Subject: [PATCH] v3.1: support py3.12, update formatting, replace pkg_resources with locate --- .gitignore | 2 + mypy.ini | 1 + poetry.lock | 658 ++++++++++++++++++++++++++------------ pyproject.toml | 72 ++++- tests/test_apostrophes.py | 2 +- tests/test_at_sign.py | 2 +- tests/test_chinese.py | 2 +- tests/test_general.py | 45 +-- tests/test_japanese.py | 2 +- tests/test_korean.py | 2 +- tests/test_numbers.py | 2 +- wordfreq/__init__.py | 61 ++-- wordfreq/chinese.py | 31 +- wordfreq/language_info.py | 10 +- wordfreq/mecab.py | 15 +- wordfreq/preprocess.py | 5 +- wordfreq/tokens.py | 46 ++- wordfreq/transliterate.py | 212 +++++++----- wordfreq/util.py | 15 + 19 files changed, 766 insertions(+), 419 deletions(-) create mode 100644 wordfreq/util.py diff --git a/.gitignore b/.gitignore index 55d8d6b..113a380 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ wordfreq-data.tar.gz build.dot .pytest_cache .tox +.vscode +prof diff --git a/mypy.ini b/mypy.ini index df9adb2..4c189bc 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,5 +1,6 @@ [mypy] python_version = 3.7 +files = wordfreq [mypy-ipadic] ignore_missing_imports = True diff --git a/poetry.lock b/poetry.lock index 27eab71..2bbfbc0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,3 +1,5 @@ +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. + [[package]] name = "appnope" version = "0.1.3" @@ -5,6 +7,10 @@ description = "Disable App Nap on macOS >= 10.9" category = "dev" optional = false python-versions = "*" +files = [ + {file = "appnope-0.1.3-py2.py3-none-any.whl", hash = "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e"}, + {file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"}, +] [[package]] name = "attrs" @@ -13,12 +19,16 @@ description = "Classes Without Boilerplate" category = "dev" optional = false python-versions = ">=3.5" +files = [ + {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, + {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, +] [package.extras] -dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] -docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] -tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] -tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "cloudpickle"] +dev = ["cloudpickle", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy (>=0.900,!=0.940)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "sphinx", "sphinx-notfound-page", "zope.interface"] +docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"] +tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "zope.interface"] +tests-no-zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"] [[package]] name = "backcall" @@ -27,41 +37,10 @@ description = "Specifications for callback functions passed in to an API" category = "dev" optional = false python-versions = "*" - -[[package]] -name = "black" -version = "22.10.0" -description = "The uncompromising code formatter." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -click = ">=8.0.0" -mypy-extensions = ">=0.4.3" -pathspec = ">=0.9.0" -platformdirs = ">=2" -tomli = {version = ">=1.1.0", markers = "python_full_version < \"3.11.0a7\""} -typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\" and implementation_name == \"cpython\""} -typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} - -[package.extras] -colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)"] -jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] -uvloop = ["uvloop (>=0.15.2)"] - -[[package]] -name = "click" -version = "8.1.3" -description = "Composable command line interface toolkit" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} +files = [ + {file = "backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255"}, + {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, +] [[package]] name = "colorama" @@ -70,6 +49,10 @@ description = "Cross-platform colored terminal text." category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] [[package]] name = "decorator" @@ -78,6 +61,10 @@ description = "Decorators for Humans" category = "dev" optional = false python-versions = ">=3.5" +files = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] [[package]] name = "exceptiongroup" @@ -86,24 +73,14 @@ description = "Backport of PEP 654 (exception groups)" category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.0.0rc9-py3-none-any.whl", hash = "sha256:2e3c3fc1538a094aab74fad52d6c33fc94de3dfee3ee01f187c0e0c72aec5337"}, + {file = "exceptiongroup-1.0.0rc9.tar.gz", hash = "sha256:9086a4a21ef9b31c72181c77c040a074ba0889ee56a7b289ff0afb0d97655f96"}, +] [package.extras] test = ["pytest (>=6)"] -[[package]] -name = "flake8" -version = "4.0.1" -description = "the modular source code checker: pep8 pyflakes and co" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -importlib-metadata = {version = "<4.3", markers = "python_version < \"3.8\""} -mccabe = ">=0.6.0,<0.7.0" -pycodestyle = ">=2.8.0,<2.9.0" -pyflakes = ">=2.4.0,<2.5.0" - [[package]] name = "ftfy" version = "6.1.1" @@ -111,25 +88,25 @@ description = "Fixes mojibake and other problems with Unicode, after the fact" category = "main" optional = false python-versions = ">=3.7,<4" +files = [ + {file = "ftfy-6.1.1-py3-none-any.whl", hash = "sha256:0ffd33fce16b54cccaec78d6ec73d95ad370e5df5a25255c8966a6147bd667ca"}, + {file = "ftfy-6.1.1.tar.gz", hash = "sha256:bfc2019f84fcd851419152320a6375604a0f1459c281b5b199b2cd0d2e727f8f"}, +] [package.dependencies] wcwidth = ">=0.2.5" [[package]] -name = "importlib-metadata" -version = "4.2.0" -description = "Read metadata from Python packages" +name = "gprof2dot" +version = "2022.7.29" +description = "Generate a dot graph from the output of several profilers." category = "dev" optional = false -python-versions = ">=3.6" - -[package.dependencies] -typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} -zipp = ">=0.5" - -[package.extras] -docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] -testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pep517", "pyfakefs", "flufl.flake8", "pytest-black (>=0.3.7)", "pytest-mypy", "importlib-resources (>=1.3)"] +python-versions = ">=2.7" +files = [ + {file = "gprof2dot-2022.7.29-py2.py3-none-any.whl", hash = "sha256:f165b3851d3c52ee4915eb1bd6cca571e5759823c2cd0f71a79bda93c2dc85d6"}, + {file = "gprof2dot-2022.7.29.tar.gz", hash = "sha256:45b4d298bd36608fccf9511c3fd88a773f7a1abc04d6cd39445b11ba43133ec5"}, +] [[package]] name = "iniconfig" @@ -138,6 +115,10 @@ description = "iniconfig: brain-dead simple config-ini parsing" category = "dev" optional = false python-versions = "*" +files = [ + {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, + {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, +] [[package]] name = "ipadic" @@ -146,6 +127,9 @@ description = "IPAdic packaged for Python" category = "main" optional = false python-versions = "*" +files = [ + {file = "ipadic-1.0.0.tar.gz", hash = "sha256:f5923d31eca6131acaaf18ed28d8998665b1347b640d3a6476f64650e9a71c07"}, +] [[package]] name = "ipython" @@ -154,6 +138,10 @@ description = "IPython: Productive Interactive Computing" category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "ipython-7.34.0-py3-none-any.whl", hash = "sha256:c175d2440a1caff76116eb719d40538fbb316e214eda85c5515c303aacbfb23e"}, + {file = "ipython-7.34.0.tar.gz", hash = "sha256:af3bdb46aa292bce5615b1b2ebc76c2080c5f77f54bda2ec72461317273e7cd6"}, +] [package.dependencies] appnope = {version = "*", markers = "sys_platform == \"darwin\""} @@ -166,6 +154,7 @@ pexpect = {version = ">4.3", markers = "sys_platform != \"win32\""} pickleshare = "*" prompt-toolkit = ">=2.0.0,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.1.0" pygments = "*" +setuptools = ">=18.5" traitlets = ">=4.2" [package.extras] @@ -174,10 +163,10 @@ doc = ["Sphinx (>=1.3)"] kernel = ["ipykernel"] nbconvert = ["nbconvert"] nbformat = ["nbformat"] -notebook = ["notebook", "ipywidgets"] +notebook = ["ipywidgets", "notebook"] parallel = ["ipyparallel"] qtconsole = ["qtconsole"] -test = ["nose (>=0.10.1)", "requests", "testpath", "pygments", "nbformat", "ipykernel", "numpy (>=1.17)"] +test = ["ipykernel", "nbformat", "nose (>=0.10.1)", "numpy (>=1.17)", "pygments", "requests", "testpath"] [[package]] name = "jedi" @@ -186,6 +175,10 @@ description = "An autocompletion tool for Python that can be used for text edito category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "jedi-0.18.1-py2.py3-none-any.whl", hash = "sha256:637c9635fcf47945ceb91cd7f320234a7be540ded6f3e99a50cb6febdfd1ba8d"}, + {file = "jedi-0.18.1.tar.gz", hash = "sha256:74137626a64a99c8eb6ae5832d99b3bdd7d29a3850fe2aa80a4126b2a7d949ab"}, +] [package.dependencies] parso = ">=0.8.0,<0.9.0" @@ -201,6 +194,9 @@ description = "Chinese Words Segmentation Utilities" category = "main" optional = false python-versions = "*" +files = [ + {file = "jieba-0.42.1.tar.gz", hash = "sha256:055ca12f62674fafed09427f176506079bc135638a14e23e25be909131928db2"}, +] [[package]] name = "langcodes" @@ -209,10 +205,26 @@ description = "Tools for labeling human languages with IETF language tags" category = "main" optional = false python-versions = ">=3.6" +files = [ + {file = "langcodes-3.3.0-py3-none-any.whl", hash = "sha256:4d89fc9acb6e9c8fdef70bcdf376113a3db09b67285d9e1d534de6d8818e7e69"}, + {file = "langcodes-3.3.0.tar.gz", hash = "sha256:794d07d5a28781231ac335a1561b8442f8648ca07cd518310aeb45d6f0807ef6"}, +] [package.extras] data = ["language-data (>=1.1,<2.0)"] +[[package]] +name = "locate" +version = "1.1.1" +description = "Locate the file location of your current running script." +category = "main" +optional = false +python-versions = ">=3.4" +files = [ + {file = "locate-1.1.1-py3-none-any.whl", hash = "sha256:9e5e2f3516639240f4d975c08e95ae6a24ff4dd63d228f927541cdec30105755"}, + {file = "locate-1.1.1.tar.gz", hash = "sha256:432750f5b7e89f8c99942ca7d8722ccd1e7954b20e6a973027fccb6cc00af857"}, +] + [[package]] name = "matplotlib-inline" version = "0.1.6" @@ -220,18 +232,14 @@ description = "Inline Matplotlib backend for Jupyter" category = "dev" optional = false python-versions = ">=3.5" +files = [ + {file = "matplotlib-inline-0.1.6.tar.gz", hash = "sha256:f887e5f10ba98e8d2b150ddcf4702c1e5f8b3a20005eb0f74bfdbd360ee6f304"}, + {file = "matplotlib_inline-0.1.6-py3-none-any.whl", hash = "sha256:f1f41aab5328aa5aaea9b16d083b128102f8712542f819fe7e6a420ff581b311"}, +] [package.dependencies] traitlets = "*" -[[package]] -name = "mccabe" -version = "0.6.1" -description = "McCabe checker, plugin for flake8" -category = "dev" -optional = false -python-versions = "*" - [[package]] name = "mecab-ko-dic" version = "1.0.0" @@ -239,6 +247,9 @@ description = "mecab-ko-dic packaged for Python" category = "main" optional = false python-versions = "*" +files = [ + {file = "mecab-ko-dic-1.0.0.tar.gz", hash = "sha256:3ba22858736e02e8a0e92f2a7f099528c733ae47701b29d12c75e982a85d1f11"}, +] [[package]] name = "mecab-python3" @@ -247,6 +258,29 @@ description = "Python wrapper for the MeCab morphological analyzer for Japanese" category = "main" optional = false python-versions = "*" +files = [ + {file = "mecab-python3-1.0.5.tar.gz", hash = "sha256:e703d78c88a671abb8170351644850015d9bbfab31530a3b40d12481a6779a11"}, + {file = "mecab_python3-1.0.5-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:8a64bd228704ed9b24da5cbd6c4e325ef22310227153ef481f9037183351aa10"}, + {file = "mecab_python3-1.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf083884116fa05ca0394c4c8d62013a4954fbac414c33a1931906ddf0f3585a"}, + {file = "mecab_python3-1.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1fe020df27b249f43df3d38b84473d226e36d6d4a31f951cedbddabfcc450e36"}, + {file = "mecab_python3-1.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:644f781de083311fcf81f7d55f21a756ceef7ebae7c111bd50a2c9d0855c1927"}, + {file = "mecab_python3-1.0.5-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:4309a91f0d5b66d3f0e8c9ba5a4d3cf7dbac1334269338704599820e051d1d7f"}, + {file = "mecab_python3-1.0.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7be2d1cd2ecd1f04b91eb0e26c906f21b50b8526e977f7f01f3901f9a6306944"}, + {file = "mecab_python3-1.0.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:644bbde31ab1244ff18fb1dcac1e5fee8121f8b27a5c3e041c01ebc301df9266"}, + {file = "mecab_python3-1.0.5-cp36-cp36m-win_amd64.whl", hash = "sha256:401a2d1608b6503cb755d7d864ad74b64a7a4346309235f84577de807bb29050"}, + {file = "mecab_python3-1.0.5-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:5f91d5d8a9ac0ea7351e5e2423df98dd463b02013e006b18096cd365de37b2a9"}, + {file = "mecab_python3-1.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc8ce0151b973f4ca15e651619264442011568ebe48c6fce51d55e64f7e5c2e1"}, + {file = "mecab_python3-1.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e381df4c55f3ec5bccbb5625c65c54ecf982c215574d1102aff2803ac1a24cd"}, + {file = "mecab_python3-1.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:8eaaa78227f470c4cf1d6c2a87b92889041f317517fbe65e635b86ea0c84a194"}, + {file = "mecab_python3-1.0.5-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:dd8601565dd1331ee5cd67bcc45f713cebc14b730ee2e956ed120a0ec6e4fd8a"}, + {file = "mecab_python3-1.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76a40f717f9592bd12edc7bcf1fa869f4c8058e5d0b80d4cc6c301435afb1f96"}, + {file = "mecab_python3-1.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f299d6ef96495371f5a622a7004a205e303dabba1fc3a7f9a07e741e315ed2b"}, + {file = "mecab_python3-1.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:4cdb07edbbd508d9b98ac9529e0ff0b89d93e50a6beeb7b8b946439594bf5e01"}, + {file = "mecab_python3-1.0.5-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:eb412a25e485e33d7ab69262b58f7365b727f8c447e4c9c1c56b5fd91414ecd2"}, + {file = "mecab_python3-1.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91e8ac11ef4440418312dd4f1f200f7957fdc0148bb49dc049264c5d07bed527"}, + {file = "mecab_python3-1.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae1c126cf4982035794042280998066c8b6d26eb89136731078d9105a7070c13"}, + {file = "mecab_python3-1.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:34a196c6a410e57f975ee077d075ac994b94bb6930b04e207e59e7c7521ecb58"}, +] [package.extras] unidic = ["unidic"] @@ -254,37 +288,129 @@ unidic-lite = ["unidic-lite"] [[package]] name = "msgpack" -version = "1.0.4" +version = "1.0.7" description = "MessagePack serializer" category = "main" optional = false -python-versions = "*" +python-versions = ">=3.8" +files = [ + {file = "msgpack-1.0.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:04ad6069c86e531682f9e1e71b71c1c3937d6014a7c3e9edd2aa81ad58842862"}, + {file = "msgpack-1.0.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cca1b62fe70d761a282496b96a5e51c44c213e410a964bdffe0928e611368329"}, + {file = "msgpack-1.0.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e50ebce52f41370707f1e21a59514e3375e3edd6e1832f5e5235237db933c98b"}, + {file = "msgpack-1.0.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a7b4f35de6a304b5533c238bee86b670b75b03d31b7797929caa7a624b5dda6"}, + {file = "msgpack-1.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28efb066cde83c479dfe5a48141a53bc7e5f13f785b92ddde336c716663039ee"}, + {file = "msgpack-1.0.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4cb14ce54d9b857be9591ac364cb08dc2d6a5c4318c1182cb1d02274029d590d"}, + {file = "msgpack-1.0.7-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b573a43ef7c368ba4ea06050a957c2a7550f729c31f11dd616d2ac4aba99888d"}, + {file = "msgpack-1.0.7-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ccf9a39706b604d884d2cb1e27fe973bc55f2890c52f38df742bc1d79ab9f5e1"}, + {file = "msgpack-1.0.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cb70766519500281815dfd7a87d3a178acf7ce95390544b8c90587d76b227681"}, + {file = "msgpack-1.0.7-cp310-cp310-win32.whl", hash = "sha256:b610ff0f24e9f11c9ae653c67ff8cc03c075131401b3e5ef4b82570d1728f8a9"}, + {file = "msgpack-1.0.7-cp310-cp310-win_amd64.whl", hash = "sha256:a40821a89dc373d6427e2b44b572efc36a2778d3f543299e2f24eb1a5de65415"}, + {file = "msgpack-1.0.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:576eb384292b139821c41995523654ad82d1916da6a60cff129c715a6223ea84"}, + {file = "msgpack-1.0.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:730076207cb816138cf1af7f7237b208340a2c5e749707457d70705715c93b93"}, + {file = "msgpack-1.0.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:85765fdf4b27eb5086f05ac0491090fc76f4f2b28e09d9350c31aac25a5aaff8"}, + {file = "msgpack-1.0.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3476fae43db72bd11f29a5147ae2f3cb22e2f1a91d575ef130d2bf49afd21c46"}, + {file = "msgpack-1.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d4c80667de2e36970ebf74f42d1088cc9ee7ef5f4e8c35eee1b40eafd33ca5b"}, + {file = "msgpack-1.0.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b0bf0effb196ed76b7ad883848143427a73c355ae8e569fa538365064188b8e"}, + {file = "msgpack-1.0.7-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f9a7c509542db4eceed3dcf21ee5267ab565a83555c9b88a8109dcecc4709002"}, + {file = "msgpack-1.0.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:84b0daf226913133f899ea9b30618722d45feffa67e4fe867b0b5ae83a34060c"}, + {file = "msgpack-1.0.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ec79ff6159dffcc30853b2ad612ed572af86c92b5168aa3fc01a67b0fa40665e"}, + {file = "msgpack-1.0.7-cp311-cp311-win32.whl", hash = "sha256:3e7bf4442b310ff154b7bb9d81eb2c016b7d597e364f97d72b1acc3817a0fdc1"}, + {file = "msgpack-1.0.7-cp311-cp311-win_amd64.whl", hash = "sha256:3f0c8c6dfa6605ab8ff0611995ee30d4f9fcff89966cf562733b4008a3d60d82"}, + {file = "msgpack-1.0.7-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f0936e08e0003f66bfd97e74ee530427707297b0d0361247e9b4f59ab78ddc8b"}, + {file = "msgpack-1.0.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:98bbd754a422a0b123c66a4c341de0474cad4a5c10c164ceed6ea090f3563db4"}, + {file = "msgpack-1.0.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b291f0ee7961a597cbbcc77709374087fa2a9afe7bdb6a40dbbd9b127e79afee"}, + {file = "msgpack-1.0.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebbbba226f0a108a7366bf4b59bf0f30a12fd5e75100c630267d94d7f0ad20e5"}, + {file = "msgpack-1.0.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e2d69948e4132813b8d1131f29f9101bc2c915f26089a6d632001a5c1349672"}, + {file = "msgpack-1.0.7-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bdf38ba2d393c7911ae989c3bbba510ebbcdf4ecbdbfec36272abe350c454075"}, + {file = "msgpack-1.0.7-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:993584fc821c58d5993521bfdcd31a4adf025c7d745bbd4d12ccfecf695af5ba"}, + {file = "msgpack-1.0.7-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:52700dc63a4676669b341ba33520f4d6e43d3ca58d422e22ba66d1736b0a6e4c"}, + {file = "msgpack-1.0.7-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e45ae4927759289c30ccba8d9fdce62bb414977ba158286b5ddaf8df2cddb5c5"}, + {file = "msgpack-1.0.7-cp312-cp312-win32.whl", hash = "sha256:27dcd6f46a21c18fa5e5deed92a43d4554e3df8d8ca5a47bf0615d6a5f39dbc9"}, + {file = "msgpack-1.0.7-cp312-cp312-win_amd64.whl", hash = "sha256:7687e22a31e976a0e7fc99c2f4d11ca45eff652a81eb8c8085e9609298916dcf"}, + {file = "msgpack-1.0.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5b6ccc0c85916998d788b295765ea0e9cb9aac7e4a8ed71d12e7d8ac31c23c95"}, + {file = "msgpack-1.0.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:235a31ec7db685f5c82233bddf9858748b89b8119bf4538d514536c485c15fe0"}, + {file = "msgpack-1.0.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cab3db8bab4b7e635c1c97270d7a4b2a90c070b33cbc00c99ef3f9be03d3e1f7"}, + {file = "msgpack-1.0.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bfdd914e55e0d2c9e1526de210f6fe8ffe9705f2b1dfcc4aecc92a4cb4b533d"}, + {file = "msgpack-1.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36e17c4592231a7dbd2ed09027823ab295d2791b3b1efb2aee874b10548b7524"}, + {file = "msgpack-1.0.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38949d30b11ae5f95c3c91917ee7a6b239f5ec276f271f28638dec9156f82cfc"}, + {file = "msgpack-1.0.7-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ff1d0899f104f3921d94579a5638847f783c9b04f2d5f229392ca77fba5b82fc"}, + {file = "msgpack-1.0.7-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dc43f1ec66eb8440567186ae2f8c447d91e0372d793dfe8c222aec857b81a8cf"}, + {file = "msgpack-1.0.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dd632777ff3beaaf629f1ab4396caf7ba0bdd075d948a69460d13d44357aca4c"}, + {file = "msgpack-1.0.7-cp38-cp38-win32.whl", hash = "sha256:4e71bc4416de195d6e9b4ee93ad3f2f6b2ce11d042b4d7a7ee00bbe0358bd0c2"}, + {file = "msgpack-1.0.7-cp38-cp38-win_amd64.whl", hash = "sha256:8f5b234f567cf76ee489502ceb7165c2a5cecec081db2b37e35332b537f8157c"}, + {file = "msgpack-1.0.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bfef2bb6ef068827bbd021017a107194956918ab43ce4d6dc945ffa13efbc25f"}, + {file = "msgpack-1.0.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:484ae3240666ad34cfa31eea7b8c6cd2f1fdaae21d73ce2974211df099a95d81"}, + {file = "msgpack-1.0.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3967e4ad1aa9da62fd53e346ed17d7b2e922cba5ab93bdd46febcac39be636fc"}, + {file = "msgpack-1.0.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dd178c4c80706546702c59529ffc005681bd6dc2ea234c450661b205445a34d"}, + {file = "msgpack-1.0.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6ffbc252eb0d229aeb2f9ad051200668fc3a9aaa8994e49f0cb2ffe2b7867e7"}, + {file = "msgpack-1.0.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:822ea70dc4018c7e6223f13affd1c5c30c0f5c12ac1f96cd8e9949acddb48a61"}, + {file = "msgpack-1.0.7-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:384d779f0d6f1b110eae74cb0659d9aa6ff35aaf547b3955abf2ab4c901c4819"}, + {file = "msgpack-1.0.7-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f64e376cd20d3f030190e8c32e1c64582eba56ac6dc7d5b0b49a9d44021b52fd"}, + {file = "msgpack-1.0.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5ed82f5a7af3697b1c4786053736f24a0efd0a1b8a130d4c7bfee4b9ded0f08f"}, + {file = "msgpack-1.0.7-cp39-cp39-win32.whl", hash = "sha256:f26a07a6e877c76a88e3cecac8531908d980d3d5067ff69213653649ec0f60ad"}, + {file = "msgpack-1.0.7-cp39-cp39-win_amd64.whl", hash = "sha256:1dc93e8e4653bdb5910aed79f11e165c85732067614f180f70534f056da97db3"}, + {file = "msgpack-1.0.7.tar.gz", hash = "sha256:572efc93db7a4d27e404501975ca6d2d9775705c2d922390d878fcf768d92c87"}, +] [[package]] name = "mypy" -version = "0.931" +version = "1.7.0" description = "Optional static typing for Python" category = "dev" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" +files = [ + {file = "mypy-1.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5da84d7bf257fd8f66b4f759a904fd2c5a765f70d8b52dde62b521972a0a2357"}, + {file = "mypy-1.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a3637c03f4025f6405737570d6cbfa4f1400eb3c649317634d273687a09ffc2f"}, + {file = "mypy-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b633f188fc5ae1b6edca39dae566974d7ef4e9aaaae00bc36efe1f855e5173ac"}, + {file = "mypy-1.7.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d6ed9a3997b90c6f891138e3f83fb8f475c74db4ccaa942a1c7bf99e83a989a1"}, + {file = "mypy-1.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:1fe46e96ae319df21359c8db77e1aecac8e5949da4773c0274c0ef3d8d1268a9"}, + {file = "mypy-1.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:df67fbeb666ee8828f675fee724cc2cbd2e4828cc3df56703e02fe6a421b7401"}, + {file = "mypy-1.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a79cdc12a02eb526d808a32a934c6fe6df07b05f3573d210e41808020aed8b5d"}, + {file = "mypy-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f65f385a6f43211effe8c682e8ec3f55d79391f70a201575def73d08db68ead1"}, + {file = "mypy-1.7.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0e81ffd120ee24959b449b647c4b2fbfcf8acf3465e082b8d58fd6c4c2b27e46"}, + {file = "mypy-1.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:f29386804c3577c83d76520abf18cfcd7d68264c7e431c5907d250ab502658ee"}, + {file = "mypy-1.7.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:87c076c174e2c7ef8ab416c4e252d94c08cd4980a10967754f91571070bf5fbe"}, + {file = "mypy-1.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6cb8d5f6d0fcd9e708bb190b224089e45902cacef6f6915481806b0c77f7786d"}, + {file = "mypy-1.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d93e76c2256aa50d9c82a88e2f569232e9862c9982095f6d54e13509f01222fc"}, + {file = "mypy-1.7.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cddee95dea7990e2215576fae95f6b78a8c12f4c089d7e4367564704e99118d3"}, + {file = "mypy-1.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:d01921dbd691c4061a3e2ecdbfbfad029410c5c2b1ee88946bf45c62c6c91210"}, + {file = "mypy-1.7.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:185cff9b9a7fec1f9f7d8352dff8a4c713b2e3eea9c6c4b5ff7f0edf46b91e41"}, + {file = "mypy-1.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7a7b1e399c47b18feb6f8ad4a3eef3813e28c1e871ea7d4ea5d444b2ac03c418"}, + {file = "mypy-1.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc9fe455ad58a20ec68599139ed1113b21f977b536a91b42bef3ffed5cce7391"}, + {file = "mypy-1.7.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d0fa29919d2e720c8dbaf07d5578f93d7b313c3e9954c8ec05b6d83da592e5d9"}, + {file = "mypy-1.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:2b53655a295c1ed1af9e96b462a736bf083adba7b314ae775563e3fb4e6795f5"}, + {file = "mypy-1.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c1b06b4b109e342f7dccc9efda965fc3970a604db70f8560ddfdee7ef19afb05"}, + {file = "mypy-1.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bf7a2f0a6907f231d5e41adba1a82d7d88cf1f61a70335889412dec99feeb0f8"}, + {file = "mypy-1.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:551d4a0cdcbd1d2cccdcc7cb516bb4ae888794929f5b040bb51aae1846062901"}, + {file = "mypy-1.7.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:55d28d7963bef00c330cb6461db80b0b72afe2f3c4e2963c99517cf06454e665"}, + {file = "mypy-1.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:870bd1ffc8a5862e593185a4c169804f2744112b4a7c55b93eb50f48e7a77010"}, + {file = "mypy-1.7.0-py3-none-any.whl", hash = "sha256:96650d9a4c651bc2a4991cf46f100973f656d69edc7faf91844e87fe627f7e96"}, + {file = "mypy-1.7.0.tar.gz", hash = "sha256:1e280b5697202efa698372d2f39e9a6713a0395a756b1c6bd48995f8d72690dc"}, +] [package.dependencies] -mypy-extensions = ">=0.4.3" -tomli = ">=1.1.0" -typed-ast = {version = ">=1.4.0,<2", markers = "python_version < \"3.8\""} -typing-extensions = ">=3.10" +mypy-extensions = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=4.1.0" [package.extras] dmypy = ["psutil (>=4.0)"] -python2 = ["typed-ast (>=1.4.0,<2)"] +install-types = ["pip"] +mypyc = ["setuptools (>=50)"] +reports = ["lxml"] [[package]] name = "mypy-extensions" -version = "0.4.3" -description = "Experimental type system extensions for programs checked with the mypy typechecker." +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." category = "dev" optional = false -python-versions = "*" +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] [[package]] name = "packaging" @@ -293,6 +419,10 @@ description = "Core utilities for Python packages" category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, + {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, +] [package.dependencies] pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" @@ -304,19 +434,15 @@ description = "A Python Parser" category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "parso-0.8.3-py2.py3-none-any.whl", hash = "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75"}, + {file = "parso-0.8.3.tar.gz", hash = "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0"}, +] [package.extras] qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] testing = ["docopt", "pytest (<6.0.0)"] -[[package]] -name = "pathspec" -version = "0.10.1" -description = "Utility library for gitignore style pattern matching of file paths." -category = "dev" -optional = false -python-versions = ">=3.7" - [[package]] name = "pexpect" version = "4.8.0" @@ -324,6 +450,10 @@ description = "Pexpect allows easy control of interactive console applications." category = "dev" optional = false python-versions = "*" +files = [ + {file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"}, + {file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"}, +] [package.dependencies] ptyprocess = ">=0.5" @@ -335,18 +465,10 @@ description = "Tiny 'shelve'-like database with concurrency support" category = "dev" optional = false python-versions = "*" - -[[package]] -name = "platformdirs" -version = "2.5.2" -description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)", "sphinx (>=4)"] -test = ["appdirs (==1.4.4)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)", "pytest (>=6)"] +files = [ + {file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"}, + {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"}, +] [[package]] name = "pluggy" @@ -355,13 +477,14 @@ description = "plugin and hook calling mechanisms for python" category = "dev" optional = false python-versions = ">=3.6" - -[package.dependencies] -importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} +files = [ + {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, + {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, +] [package.extras] -testing = ["pytest-benchmark", "pytest"] -dev = ["tox", "pre-commit"] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] [[package]] name = "prompt-toolkit" @@ -370,6 +493,10 @@ description = "Library for building powerful interactive command lines in Python category = "dev" optional = false python-versions = ">=3.6.2" +files = [ + {file = "prompt_toolkit-3.0.31-py3-none-any.whl", hash = "sha256:9696f386133df0fc8ca5af4895afe5d78f5fcfe5258111c2a79a1c3e41ffa96d"}, + {file = "prompt_toolkit-3.0.31.tar.gz", hash = "sha256:9ada952c9d1787f52ff6d5f3484d0b4df8952787c087edf6a1f7c2cb1ea88148"}, +] [package.dependencies] wcwidth = "*" @@ -381,22 +508,10 @@ description = "Run a subprocess in a pseudo terminal" category = "dev" optional = false python-versions = "*" - -[[package]] -name = "pycodestyle" -version = "2.8.0" -description = "Python style guide checker" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[[package]] -name = "pyflakes" -version = "2.4.0" -description = "passive checker of Python programs" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, + {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, +] [[package]] name = "pygments" @@ -405,6 +520,10 @@ description = "Pygments is a syntax highlighting package written in Python." category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "Pygments-2.13.0-py3-none-any.whl", hash = "sha256:f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42"}, + {file = "Pygments-2.13.0.tar.gz", hash = "sha256:56a8508ae95f98e2b9bdf93a6be5ae3f7d8af858b43e02c5a2ff083726be40c1"}, +] [package.extras] plugins = ["importlib-metadata"] @@ -416,9 +535,13 @@ description = "pyparsing module - Classes and methods to define and execute pars category = "dev" optional = false python-versions = ">=3.6.8" +files = [ + {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, + {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, +] [package.extras] -diagrams = ["railroad-diagrams", "jinja2"] +diagrams = ["jinja2", "railroad-diagrams"] [[package]] name = "pytest" @@ -427,12 +550,15 @@ description = "pytest: simple powerful testing with Python" category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "pytest-7.2.0-py3-none-any.whl", hash = "sha256:892f933d339f068883b6fd5a459f03d85bfcb355e4981e146d2c7616c21fef71"}, + {file = "pytest-7.2.0.tar.gz", hash = "sha256:c4014eb40e10f11f355ad4e3c2fb2c6c6d1919c73f3b5a433de4708202cade59"}, +] [package.dependencies] attrs = ">=19.2.0" colorama = {version = "*", markers = "sys_platform == \"win32\""} exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} -importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} iniconfig = "*" packaging = "*" pluggy = ">=0.12,<2.0" @@ -441,13 +567,179 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] +[[package]] +name = "pytest-profiling" +version = "1.7.0" +description = "Profiling plugin for py.test" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "pytest-profiling-1.7.0.tar.gz", hash = "sha256:93938f147662225d2b8bd5af89587b979652426a8a6ffd7e73ec4a23e24b7f29"}, + {file = "pytest_profiling-1.7.0-py2.py3-none-any.whl", hash = "sha256:999cc9ac94f2e528e3f5d43465da277429984a1c237ae9818f8cfd0b06acb019"}, +] + +[package.dependencies] +gprof2dot = "*" +pytest = "*" +six = "*" + +[package.extras] +tests = ["pytest-virtualenv"] + [[package]] name = "regex" -version = "2022.9.13" +version = "2023.10.3" description = "Alternative regular expression module, to replace re." category = "main" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" +files = [ + {file = "regex-2023.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4c34d4f73ea738223a094d8e0ffd6d2c1a1b4c175da34d6b0de3d8d69bee6bcc"}, + {file = "regex-2023.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a8f4e49fc3ce020f65411432183e6775f24e02dff617281094ba6ab079ef0915"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cd1bccf99d3ef1ab6ba835308ad85be040e6a11b0977ef7ea8c8005f01a3c29"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:81dce2ddc9f6e8f543d94b05d56e70d03a0774d32f6cca53e978dc01e4fc75b8"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c6b4d23c04831e3ab61717a707a5d763b300213db49ca680edf8bf13ab5d91b"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c15ad0aee158a15e17e0495e1e18741573d04eb6da06d8b84af726cfc1ed02ee"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6239d4e2e0b52c8bd38c51b760cd870069f0bdf99700a62cd509d7a031749a55"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4a8bf76e3182797c6b1afa5b822d1d5802ff30284abe4599e1247be4fd6b03be"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d9c727bbcf0065cbb20f39d2b4f932f8fa1631c3e01fcedc979bd4f51fe051c5"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3ccf2716add72f80714b9a63899b67fa711b654be3fcdd34fa391d2d274ce767"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:107ac60d1bfdc3edb53be75e2a52aff7481b92817cfdddd9b4519ccf0e54a6ff"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:00ba3c9818e33f1fa974693fb55d24cdc8ebafcb2e4207680669d8f8d7cca79a"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f0a47efb1dbef13af9c9a54a94a0b814902e547b7f21acb29434504d18f36e3a"}, + {file = "regex-2023.10.3-cp310-cp310-win32.whl", hash = "sha256:36362386b813fa6c9146da6149a001b7bd063dabc4d49522a1f7aa65b725c7ec"}, + {file = "regex-2023.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:c65a3b5330b54103e7d21cac3f6bf3900d46f6d50138d73343d9e5b2900b2353"}, + {file = "regex-2023.10.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90a79bce019c442604662d17bf69df99090e24cdc6ad95b18b6725c2988a490e"}, + {file = "regex-2023.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c7964c2183c3e6cce3f497e3a9f49d182e969f2dc3aeeadfa18945ff7bdd7051"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ef80829117a8061f974b2fda8ec799717242353bff55f8a29411794d635d964"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5addc9d0209a9afca5fc070f93b726bf7003bd63a427f65ef797a931782e7edc"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c148bec483cc4b421562b4bcedb8e28a3b84fcc8f0aa4418e10898f3c2c0eb9b"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d1f21af4c1539051049796a0f50aa342f9a27cde57318f2fc41ed50b0dbc4ac"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b9ac09853b2a3e0d0082104036579809679e7715671cfbf89d83c1cb2a30f58"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ebedc192abbc7fd13c5ee800e83a6df252bec691eb2c4bedc9f8b2e2903f5e2a"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d8a993c0a0ffd5f2d3bda23d0cd75e7086736f8f8268de8a82fbc4bd0ac6791e"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:be6b7b8d42d3090b6c80793524fa66c57ad7ee3fe9722b258aec6d0672543fd0"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4023e2efc35a30e66e938de5aef42b520c20e7eda7bb5fb12c35e5d09a4c43f6"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0d47840dc05e0ba04fe2e26f15126de7c755496d5a8aae4a08bda4dd8d646c54"}, + {file = "regex-2023.10.3-cp311-cp311-win32.whl", hash = "sha256:9145f092b5d1977ec8c0ab46e7b3381b2fd069957b9862a43bd383e5c01d18c2"}, + {file = "regex-2023.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:b6104f9a46bd8743e4f738afef69b153c4b8b592d35ae46db07fc28ae3d5fb7c"}, + {file = "regex-2023.10.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:bff507ae210371d4b1fe316d03433ac099f184d570a1a611e541923f78f05037"}, + {file = "regex-2023.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be5e22bbb67924dea15039c3282fa4cc6cdfbe0cbbd1c0515f9223186fc2ec5f"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a992f702c9be9c72fa46f01ca6e18d131906a7180950958f766c2aa294d4b41"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7434a61b158be563c1362d9071358f8ab91b8d928728cd2882af060481244c9e"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2169b2dcabf4e608416f7f9468737583ce5f0a6e8677c4efbf795ce81109d7c"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9e908ef5889cda4de038892b9accc36d33d72fb3e12c747e2799a0e806ec841"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12bd4bc2c632742c7ce20db48e0d99afdc05e03f0b4c1af90542e05b809a03d9"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bc72c231f5449d86d6c7d9cc7cd819b6eb30134bb770b8cfdc0765e48ef9c420"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bce8814b076f0ce5766dc87d5a056b0e9437b8e0cd351b9a6c4e1134a7dfbda9"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:ba7cd6dc4d585ea544c1412019921570ebd8a597fabf475acc4528210d7c4a6f"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b0c7d2f698e83f15228ba41c135501cfe7d5740181d5903e250e47f617eb4292"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5a8f91c64f390ecee09ff793319f30a0f32492e99f5dc1c72bc361f23ccd0a9a"}, + {file = "regex-2023.10.3-cp312-cp312-win32.whl", hash = "sha256:ad08a69728ff3c79866d729b095872afe1e0557251da4abb2c5faff15a91d19a"}, + {file = "regex-2023.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:39cdf8d141d6d44e8d5a12a8569d5a227f645c87df4f92179bd06e2e2705e76b"}, + {file = "regex-2023.10.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4a3ee019a9befe84fa3e917a2dd378807e423d013377a884c1970a3c2792d293"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76066d7ff61ba6bf3cb5efe2428fc82aac91802844c022d849a1f0f53820502d"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfe50b61bab1b1ec260fa7cd91106fa9fece57e6beba05630afe27c71259c59b"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fd88f373cb71e6b59b7fa597e47e518282455c2734fd4306a05ca219a1991b0"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3ab05a182c7937fb374f7e946f04fb23a0c0699c0450e9fb02ef567412d2fa3"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dac37cf08fcf2094159922edc7a2784cfcc5c70f8354469f79ed085f0328ebdf"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e54ddd0bb8fb626aa1f9ba7b36629564544954fff9669b15da3610c22b9a0991"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3367007ad1951fde612bf65b0dffc8fd681a4ab98ac86957d16491400d661302"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:16f8740eb6dbacc7113e3097b0a36065a02e37b47c936b551805d40340fb9971"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:f4f2ca6df64cbdd27f27b34f35adb640b5d2d77264228554e68deda54456eb11"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:39807cbcbe406efca2a233884e169d056c35aa7e9f343d4e78665246a332f597"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7eece6fbd3eae4a92d7c748ae825cbc1ee41a89bb1c3db05b5578ed3cfcfd7cb"}, + {file = "regex-2023.10.3-cp37-cp37m-win32.whl", hash = "sha256:ce615c92d90df8373d9e13acddd154152645c0dc060871abf6bd43809673d20a"}, + {file = "regex-2023.10.3-cp37-cp37m-win_amd64.whl", hash = "sha256:0f649fa32fe734c4abdfd4edbb8381c74abf5f34bc0b3271ce687b23729299ed"}, + {file = "regex-2023.10.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9b98b7681a9437262947f41c7fac567c7e1f6eddd94b0483596d320092004533"}, + {file = "regex-2023.10.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:91dc1d531f80c862441d7b66c4505cd6ea9d312f01fb2f4654f40c6fdf5cc37a"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82fcc1f1cc3ff1ab8a57ba619b149b907072e750815c5ba63e7aa2e1163384a4"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7979b834ec7a33aafae34a90aad9f914c41fd6eaa8474e66953f3f6f7cbd4368"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ef71561f82a89af6cfcbee47f0fabfdb6e63788a9258e913955d89fdd96902ab"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd829712de97753367153ed84f2de752b86cd1f7a88b55a3a775eb52eafe8a94"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00e871d83a45eee2f8688d7e6849609c2ca2a04a6d48fba3dff4deef35d14f07"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:706e7b739fdd17cb89e1fbf712d9dc21311fc2333f6d435eac2d4ee81985098c"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:cc3f1c053b73f20c7ad88b0d1d23be7e7b3901229ce89f5000a8399746a6e039"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6f85739e80d13644b981a88f529d79c5bdf646b460ba190bffcaf6d57b2a9863"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:741ba2f511cc9626b7561a440f87d658aabb3d6b744a86a3c025f866b4d19e7f"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e77c90ab5997e85901da85131fd36acd0ed2221368199b65f0d11bca44549711"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:979c24cbefaf2420c4e377ecd1f165ea08cc3d1fbb44bdc51bccbbf7c66a2cb4"}, + {file = "regex-2023.10.3-cp38-cp38-win32.whl", hash = "sha256:58837f9d221744d4c92d2cf7201c6acd19623b50c643b56992cbd2b745485d3d"}, + {file = "regex-2023.10.3-cp38-cp38-win_amd64.whl", hash = "sha256:c55853684fe08d4897c37dfc5faeff70607a5f1806c8be148f1695be4a63414b"}, + {file = "regex-2023.10.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2c54e23836650bdf2c18222c87f6f840d4943944146ca479858404fedeb9f9af"}, + {file = "regex-2023.10.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:69c0771ca5653c7d4b65203cbfc5e66db9375f1078689459fe196fe08b7b4930"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ac965a998e1388e6ff2e9781f499ad1eaa41e962a40d11c7823c9952c77123e"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c0e8fae5b27caa34177bdfa5a960c46ff2f78ee2d45c6db15ae3f64ecadde14"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6c56c3d47da04f921b73ff9415fbaa939f684d47293f071aa9cbb13c94afc17d"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ef1e014eed78ab650bef9a6a9cbe50b052c0aebe553fb2881e0453717573f52"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d29338556a59423d9ff7b6eb0cb89ead2b0875e08fe522f3e068b955c3e7b59b"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9c6d0ced3c06d0f183b73d3c5920727268d2201aa0fe6d55c60d68c792ff3588"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:994645a46c6a740ee8ce8df7911d4aee458d9b1bc5639bc968226763d07f00fa"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:66e2fe786ef28da2b28e222c89502b2af984858091675044d93cb50e6f46d7af"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:11175910f62b2b8c055f2b089e0fedd694fe2be3941b3e2633653bc51064c528"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:06e9abc0e4c9ab4779c74ad99c3fc10d3967d03114449acc2c2762ad4472b8ca"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:fb02e4257376ae25c6dd95a5aec377f9b18c09be6ebdefa7ad209b9137b73d48"}, + {file = "regex-2023.10.3-cp39-cp39-win32.whl", hash = "sha256:3b2c3502603fab52d7619b882c25a6850b766ebd1b18de3df23b2f939360e1bd"}, + {file = "regex-2023.10.3-cp39-cp39-win_amd64.whl", hash = "sha256:adbccd17dcaff65704c856bd29951c58a1bd4b2b0f8ad6b826dbd543fe740988"}, + {file = "regex-2023.10.3.tar.gz", hash = "sha256:3fef4f844d2290ee0ba57addcec17eec9e3df73f10a2748485dfd6a3a188cc0f"}, +] + +[[package]] +name = "ruff" +version = "0.1.6" +description = "An extremely fast Python linter and code formatter, written in Rust." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.1.6-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:88b8cdf6abf98130991cbc9f6438f35f6e8d41a02622cc5ee130a02a0ed28703"}, + {file = "ruff-0.1.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5c549ed437680b6105a1299d2cd30e4964211606eeb48a0ff7a93ef70b902248"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cf5f701062e294f2167e66d11b092bba7af6a057668ed618a9253e1e90cfd76"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:05991ee20d4ac4bb78385360c684e4b417edd971030ab12a4fbd075ff535050e"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87455a0c1f739b3c069e2f4c43b66479a54dea0276dd5d4d67b091265f6fd1dc"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:683aa5bdda5a48cb8266fcde8eea2a6af4e5700a392c56ea5fb5f0d4bfdc0240"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:137852105586dcbf80c1717facb6781555c4e99f520c9c827bd414fac67ddfb6"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd98138a98d48a1c36c394fd6b84cd943ac92a08278aa8ac8c0fdefcf7138f35"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a0cd909d25f227ac5c36d4e7e681577275fb74ba3b11d288aff7ec47e3ae745"}, + {file = "ruff-0.1.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8fd1c62a47aa88a02707b5dd20c5ff20d035d634aa74826b42a1da77861b5ff"}, + {file = "ruff-0.1.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fd89b45d374935829134a082617954120d7a1470a9f0ec0e7f3ead983edc48cc"}, + {file = "ruff-0.1.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:491262006e92f825b145cd1e52948073c56560243b55fb3b4ecb142f6f0e9543"}, + {file = "ruff-0.1.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:ea284789861b8b5ca9d5443591a92a397ac183d4351882ab52f6296b4fdd5462"}, + {file = "ruff-0.1.6-py3-none-win32.whl", hash = "sha256:1610e14750826dfc207ccbcdd7331b6bd285607d4181df9c1c6ae26646d6848a"}, + {file = "ruff-0.1.6-py3-none-win_amd64.whl", hash = "sha256:4558b3e178145491e9bc3b2ee3c4b42f19d19384eaa5c59d10acf6e8f8b57e33"}, + {file = "ruff-0.1.6-py3-none-win_arm64.whl", hash = "sha256:03910e81df0d8db0e30050725a5802441c2022ea3ae4fe0609b76081731accbc"}, + {file = "ruff-0.1.6.tar.gz", hash = "sha256:1b09f29b16c6ead5ea6b097ef2764b42372aebe363722f1605ecbcd2b9207184"}, +] + +[[package]] +name = "setuptools" +version = "69.0.2" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-69.0.2-py3-none-any.whl", hash = "sha256:1e8fdff6797d3865f37397be788a4e3cba233608e9b509382a2777d25ebde7f2"}, + {file = "setuptools-69.0.2.tar.gz", hash = "sha256:735896e78a4742605974de002ac60562d286fa8051a7e2299445e8e8fbb01aa6"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] [[package]] name = "tomli" @@ -456,6 +748,10 @@ description = "A lil' TOML parser" category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] [[package]] name = "traitlets" @@ -464,27 +760,15 @@ description = "" category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "traitlets-5.5.0-py3-none-any.whl", hash = "sha256:1201b2c9f76097195989cdf7f65db9897593b0dfd69e4ac96016661bb6f0d30f"}, + {file = "traitlets-5.5.0.tar.gz", hash = "sha256:b122f9ff2f2f6c1709dab289a05555be011c87828e911c0cf4074b85cb780a79"}, +] [package.extras] docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["pre-commit", "pytest"] -[[package]] -name = "typed-ast" -version = "1.5.4" -description = "a fork of Python 2 and 3 ast modules with type comment support" -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "types-setuptools" -version = "57.4.18" -description = "Typing stubs for setuptools" -category = "dev" -optional = false -python-versions = "*" - [[package]] name = "typing-extensions" version = "4.4.0" @@ -492,6 +776,10 @@ description = "Backported and Experimental Type Hints for Python 3.7+" category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "typing_extensions-4.4.0-py3-none-any.whl", hash = "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e"}, + {file = "typing_extensions-4.4.0.tar.gz", hash = "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa"}, +] [[package]] name = "wcwidth" @@ -500,73 +788,17 @@ description = "Measures the displayed width of unicode strings in a terminal" category = "main" optional = false python-versions = "*" - -[[package]] -name = "zipp" -version = "3.10.0" -description = "Backport of pathlib-compatible object wrapper for zip files" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "furo", "jaraco.tidelift (>=1.4)"] -testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "jaraco.itertools", "func-timeout", "jaraco.functools", "more-itertools", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"] +files = [ + {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, + {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"}, +] [extras] -cjk = ["mecab-python3", "ipadic", "mecab-ko-dic", "jieba"] +cjk = ["ipadic", "jieba", "mecab-ko-dic", "mecab-python3"] jieba = ["jieba"] -mecab = ["mecab-python3", "ipadic", "mecab-ko-dic"] +mecab = ["ipadic", "mecab-ko-dic", "mecab-python3"] [metadata] -lock-version = "1.1" -python-versions = "^3.7" -content-hash = "3701c1761f9f8e16cefafdfdf04e64cb436dc96cda64fc524ed9c8b8204046a6" - -[metadata.files] -appnope = [] -attrs = [] -backcall = [] -black = [] -click = [] -colorama = [] -decorator = [] -exceptiongroup = [] -flake8 = [] -ftfy = [] -importlib-metadata = [] -iniconfig = [] -ipadic = [] -ipython = [] -jedi = [] -jieba = [] -langcodes = [] -matplotlib-inline = [] -mccabe = [] -mecab-ko-dic = [] -mecab-python3 = [] -msgpack = [] -mypy = [] -mypy-extensions = [] -packaging = [] -parso = [] -pathspec = [] -pexpect = [] -pickleshare = [] -platformdirs = [] -pluggy = [] -prompt-toolkit = [] -ptyprocess = [] -pycodestyle = [] -pyflakes = [] -pygments = [] -pyparsing = [] -pytest = [] -regex = [] -tomli = [] -traitlets = [] -typed-ast = [] -types-setuptools = [] -typing-extensions = [] -wcwidth = [] -zipp = [] +lock-version = "2.0" +python-versions = ">= 3.8, < 4" +content-hash = "c3aecf3636c5f6b3aa9472843a96a57b2fc0b3f2f36e683046eedfdf42839e32" diff --git a/pyproject.toml b/pyproject.toml index edaa13d..8afe248 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "wordfreq" -version = "3.0.3" +version = "3.1.0" description = "Look up the frequencies of words in many languages, based on many sources of data." authors = ["Robyn Speer "] license = "Apache-2.0" @@ -8,33 +8,75 @@ readme = "README.md" homepage = "https://github.com/rspeer/wordfreq/" [tool.poetry.dependencies] -python = "^3.7" -msgpack = ">= 1.0" +python = ">= 3.8, < 4" +msgpack = "^1.0.7" langcodes = ">= 3.0" -regex = ">= 2021.7.6" +regex = ">= 2023.10.3" ftfy = ">= 6.1" -mecab-python3 = {version = "^1.0.5", optional = true} -ipadic = {version = "^1.0.0", optional = true} -mecab-ko-dic = {version = "^1.0.0", optional = true} -jieba = {version = ">=0.42", optional = true} +mecab-python3 = { version = "^1.0.5", optional = true } +ipadic = { version = "^1.0.0", optional = true } +mecab-ko-dic = { version = "^1.0.0", optional = true } +jieba = { version = ">=0.42", optional = true } +locate = "^1.1.1" -[tool.poetry.dev-dependencies] +[tool.poetry.group.dev.dependencies] pytest = "^7.2.0" -mecab-python3 = "^1.0.4" -jieba = ">= 0.42" +mecab-python3 = "^1.0.5" +jieba = "^0.42.1" ipadic = "^1.0.0" mecab-ko-dic = "^1.0.0" ipython = ">=7" -black = "^22.1.0" -flake8 = "^4.0.1" -types-setuptools = "^57.4.9" -mypy = "^0.931" +mypy = "^1.7.0" +ruff = "^0.1.6" +setuptools = "^69.0.2" # implicit dependency in jieba +pytest-profiling = "^1.7.0" [tool.poetry.extras] cjk = ["mecab-python3", "ipadic", "mecab-ko-dic", "jieba"] mecab = ["mecab-python3", "ipadic", "mecab-ko-dic"] jieba = ["jieba"] +[tool.ruff] +# ruff is a Python linter and formatter, which reimplements black, flake8, pylint, and more +line-length = 99 +show-fixes = true +src = ["src"] +target-version = "py38" + +[tool.ruff.lint] +extend-select = [ + "D", # docstring style + "I", # isort + "UP", # pyupgrade -- catches obsolete code patterns + "ANN", # type annotations + "ASYNC", # async code checks + "B", # flake8-bugbear -- catches bug-prone usage + "C4", # list comprehensions + "FA", # correct use of `from __future__ import annotations` + "INP", # checks for presence of __init__.py + "T20", # print statements + "TID", # tidy imports + "PTH", # use pathlib instead of os.path + "PLE", # pylint errors +] +ignore = [ + "D100", # modules without top-of-module docstrings are okay + "D104", # __init__.py doesn't need a docstring + "D107", # __init__ method doesn't need a docstring + "D2", # don't check whitespace in docstrings + "D4", # don't check grammar and style in docstrings + "E501", # long lines after auto-formatting, such as long strings, are okay + "ANN002", # we don't demand a type on *args, particularly because it's difficult to specify + "ANN003", # we don't demand a type on **kwargs, particularly because it's difficult to specify + "ANN101", # self does not need a type + "ANN102", # `cls` in classmethod does not need a type + "PTH123", # we don't need to construct a Path just to open a file by filename +] + +[tool.ruff.lint.per-file-ignores] +# We are less strict about test code +"tests/**" = ["D", "ANN", "T20", "INP"] + [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" diff --git a/tests/test_apostrophes.py b/tests/test_apostrophes.py index 28240ff..045855f 100644 --- a/tests/test_apostrophes.py +++ b/tests/test_apostrophes.py @@ -1,4 +1,4 @@ -from wordfreq import tokenize, word_frequency +from wordfreq import tokenize def test_apostrophes(): diff --git a/tests/test_at_sign.py b/tests/test_at_sign.py index 13a10f4..a56abfc 100644 --- a/tests/test_at_sign.py +++ b/tests/test_at_sign.py @@ -1,4 +1,4 @@ -from wordfreq import tokenize, lossy_tokenize, word_frequency +from wordfreq import lossy_tokenize, tokenize, word_frequency def test_gender_neutral_at(): diff --git a/tests/test_chinese.py b/tests/test_chinese.py index ba9b024..c88f166 100644 --- a/tests/test_chinese.py +++ b/tests/test_chinese.py @@ -1,5 +1,5 @@ -from wordfreq import tokenize, word_frequency, zipf_frequency import pytest +from wordfreq import tokenize, word_frequency, zipf_frequency def test_tokens(): diff --git a/tests/test_general.py b/tests/test_general.py index 068c4ab..d1457d1 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -1,14 +1,14 @@ +import pytest from wordfreq import ( - word_frequency, available_languages, cB_to_freq, - top_n_list, - random_words, - random_ascii_words, - tokenize, lossy_tokenize, + random_ascii_words, + random_words, + tokenize, + top_n_list, + word_frequency, ) -import pytest def test_freq_examples(): @@ -142,9 +142,17 @@ def test_tokenization(): "see", ] - assert tokenize( - "I don't split at apostrophes, you see.", "en", include_punctuation=True - ) == ["i", "don't", "split", "at", "apostrophes", ",", "you", "see", "."] + assert tokenize("I don't split at apostrophes, you see.", "en", include_punctuation=True) == [ + "i", + "don't", + "split", + "at", + "apostrophes", + ",", + "you", + "see", + ".", + ] # Certain punctuation does not inherently split a word. assert tokenize("Anything is possible at zombo.com", "en") == [ @@ -158,17 +166,20 @@ def test_tokenization(): # Splits occur after symbols, and at splitting punctuation such as hyphens. assert tokenize("😂test", "en") == ["😂", "test"] assert tokenize("flip-flop", "en") == ["flip", "flop"] - assert tokenize( - "this text has... punctuation :)", "en", include_punctuation=True - ) == ["this", "text", "has", "...", "punctuation", ":)"] + assert tokenize("this text has... punctuation :)", "en", include_punctuation=True) == [ + "this", + "text", + "has", + "...", + "punctuation", + ":)", + ] # Multi-codepoint emoji sequences such as 'medium-skinned woman with headscarf' # and 'David Bowie' stay together, because our Unicode segmentation algorithm # is up to date assert tokenize("emoji test 🧕🏽", "en") == ["emoji", "test", "🧕🏽"] - assert tokenize( - "👨‍🎤 Planet Earth is blue, and there's nothing I can do 🌎🚀", "en" - ) == [ + assert tokenize("👨‍🎤 Planet Earth is blue, and there's nothing I can do 🌎🚀", "en") == [ "👨‍🎤", "planet", "earth", @@ -221,9 +232,7 @@ def test_uncurl_quotes(): def test_phrase_freq(): ff = word_frequency("flip-flop", "en") assert ff > 0 - phrase_freq = 1.0 / word_frequency("flip", "en") + 1.0 / word_frequency( - "flop", "en" - ) + phrase_freq = 1.0 / word_frequency("flip", "en") + 1.0 / word_frequency("flop", "en") assert 1.0 / ff == pytest.approx(phrase_freq, rel=0.01) diff --git a/tests/test_japanese.py b/tests/test_japanese.py index f5e1c52..b4a32c0 100644 --- a/tests/test_japanese.py +++ b/tests/test_japanese.py @@ -1,5 +1,5 @@ -from wordfreq import tokenize, simple_tokenize, word_frequency import pytest +from wordfreq import simple_tokenize, tokenize, word_frequency def test_tokens(): diff --git a/tests/test_korean.py b/tests/test_korean.py index fcada25..0b91a6d 100644 --- a/tests/test_korean.py +++ b/tests/test_korean.py @@ -1,5 +1,5 @@ -from wordfreq import tokenize, word_frequency import pytest +from wordfreq import tokenize, word_frequency def test_tokens(): diff --git a/tests/test_numbers.py b/tests/test_numbers.py index 339fbc8..6a91443 100644 --- a/tests/test_numbers.py +++ b/tests/test_numbers.py @@ -1,6 +1,6 @@ +from pytest import approx from wordfreq import word_frequency from wordfreq.numbers import digit_freq, smash_numbers -from pytest import approx def test_number_smashing(): diff --git a/wordfreq/__init__.py b/wordfreq/__init__.py index 54d45b2..bad1feb 100644 --- a/wordfreq/__init__.py +++ b/wordfreq/__init__.py @@ -1,25 +1,28 @@ -from pkg_resources import resource_filename -from functools import lru_cache -from typing import List, Dict, Iterator, Tuple -import langcodes -import msgpack +from __future__ import annotations + import gzip import itertools -import pathlib -import random import logging import math +import random import warnings +from functools import lru_cache +from typing import Iterator + +import langcodes +import msgpack -from wordfreq.tokens import tokenize, simple_tokenize, lossy_tokenize from wordfreq.language_info import get_language_info from wordfreq.numbers import digit_freq, has_digit_sequence, smash_numbers +from wordfreq.tokens import lossy_tokenize, simple_tokenize, tokenize + +from .util import data_path logger = logging.getLogger(__name__) CACHE_SIZE = 100000 -DATA_PATH = pathlib.Path(resource_filename("wordfreq", "data")) +DATA_PATH = data_path() # We'll divide the frequency by 10 for each token boundary that was inferred. # (We determined the factor of 10 empirically by looking at words in the @@ -35,7 +38,7 @@ tokenize = tokenize simple_tokenize = simple_tokenize -def read_cBpack(filename: str) -> List[List[str]]: +def read_cBpack(filename: str) -> list[list[str]]: """ Read a file from an idiosyncratic format that we use for storing approximate word frequencies, called "cBpack". @@ -79,16 +82,12 @@ def read_cBpack(filename: str) -> List[List[str]]: with gzip.open(filename, "rb") as infile: data = msgpack.load(infile, raw=False) header = data[0] - if ( - not isinstance(header, dict) - or header.get("format") != "cB" - or header.get("version") != 1 - ): + if not isinstance(header, dict) or header.get("format") != "cB" or header.get("version") != 1: raise ValueError("Unexpected header: %r" % header) return data[1:] -def available_languages(wordlist: str = "best") -> Dict[str, str]: +def available_languages(wordlist: str = "best") -> dict[str, str]: """ Given a wordlist name, return a dictionary of language codes to filenames, representing all the languages in which that wordlist is available. @@ -114,7 +113,7 @@ def available_languages(wordlist: str = "best") -> Dict[str, str]: @lru_cache(maxsize=None) def get_frequency_list( lang: str, wordlist: str = "best", match_cutoff: None = None -) -> List[List[str]]: +) -> list[list[str]]: """ Read the raw data from a wordlist file, returning it as a list of lists. (See `read_cBpack` for what this represents.) @@ -125,19 +124,21 @@ def get_frequency_list( Looking up the alternate code 'por' will also get the same list. """ if match_cutoff is not None: - warnings.warn("The `match_cutoff` parameter is deprecated", DeprecationWarning) + warnings.warn( + "The `match_cutoff` parameter is deprecated", DeprecationWarning, stacklevel=2 + ) available = available_languages(wordlist) # the max_distance is high because we unify scripts, such as Traditional # vs. Simplified Chinese, in one wordlist best, _distance = langcodes.closest_match(lang, list(available), max_distance=60) if best == "und": - raise LookupError("No wordlist %r available for language %r" % (wordlist, lang)) + raise LookupError(f"No wordlist {wordlist!r} available for language {lang!r}") if best != lang: logger.warning( - "You asked for word frequencies in language %r. Using the " - "nearest match, which is %r." % (lang, best) + f"You asked for word frequencies in language {lang!r}. Using the " + f"nearest match, which is {best!r}." ) return read_cBpack(available[best]) @@ -198,13 +199,15 @@ def freq_to_zipf(freq: float) -> float: @lru_cache(maxsize=None) def get_frequency_dict( lang: str, wordlist: str = "best", match_cutoff: None = None -) -> Dict[str, float]: +) -> dict[str, float]: """ Get a word frequency list as a dictionary, mapping tokens to frequencies as floating-point probabilities. """ if match_cutoff is not None: - warnings.warn("The `match_cutoff` parameter is deprecated", DeprecationWarning) + warnings.warn( + "The `match_cutoff` parameter is deprecated", DeprecationWarning, stacklevel=2 + ) freqs = {} pack = get_frequency_list(lang, wordlist) for index, bucket in enumerate(pack): @@ -229,7 +232,7 @@ def iter_wordlist(lang: str, wordlist: str = "best") -> Iterator[str]: # This dict and inner function are used to implement a "drop everything" cache # for word_frequency(); the overheads of lru_cache() are comparable to the time # it takes to look up frequencies from scratch, so something faster is needed. -_wf_cache: Dict[Tuple[str, str, str, float], float] = {} +_wf_cache: dict[tuple[str, str, str, float], float] = {} def _word_frequency(word: str, lang: str, wordlist: str, minimum: float) -> float: @@ -277,9 +280,7 @@ def _word_frequency(word: str, lang: str, wordlist: str, minimum: float) -> floa return round(unrounded, leading_zeroes + 3) -def word_frequency( - word: str, lang: str, wordlist: str = "best", minimum: float = 0.0 -) -> float: +def word_frequency(word: str, lang: str, wordlist: str = "best", minimum: float = 0.0) -> float: """ Get the frequency of `word` in the language with code `lang`, from the specified `wordlist`. @@ -306,7 +307,7 @@ def word_frequency( return _wf_cache[args] -def zipf_frequency(word: str, lang: str, wordlist: str = "best", minimum: float = 0.0): +def zipf_frequency(word: str, lang: str, wordlist: str = "best", minimum: float = 0.0) -> float: """ Get the frequency of `word`, in the language with code `lang`, on the Zipf scale. @@ -334,9 +335,7 @@ def zipf_frequency(word: str, lang: str, wordlist: str = "best", minimum: float @lru_cache(maxsize=100) -def top_n_list( - lang: str, n: int, wordlist: str = "best", ascii_only: bool = False -) -> List[str]: +def top_n_list(lang: str, n: int, wordlist: str = "best", ascii_only: bool = False) -> list[str]: """ Return a frequency list of length `n` in descending order of frequency. This list contains words from `wordlist`, of the given language. diff --git a/wordfreq/chinese.py b/wordfreq/chinese.py index 73fefa0..450137c 100644 --- a/wordfreq/chinese.py +++ b/wordfreq/chinese.py @@ -1,21 +1,22 @@ -from pkg_resources import resource_filename -from typing import List -import jieba -import msgpack +from __future__ import annotations + import gzip -DICT_FILENAME = resource_filename("wordfreq", "data/jieba_zh.txt") -ORIG_DICT_FILENAME = resource_filename("wordfreq", "data/jieba_zh_orig.txt") -SIMP_MAP_FILENAME = resource_filename("wordfreq", "data/_chinese_mapping.msgpack.gz") +import jieba +import msgpack + +from .util import data_path + +DICT_FILENAME = data_path("jieba_zh.txt") +ORIG_DICT_FILENAME = data_path("jieba_zh_orig.txt") +SIMP_MAP_FILENAME = data_path("_chinese_mapping.msgpack.gz") try: - SIMPLIFIED_MAP = msgpack.load( - gzip.open(SIMP_MAP_FILENAME), raw=False, strict_map_key=False - ) + SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), raw=False, strict_map_key=False) except TypeError: # work around incompatibility between pure-Python msgpack and C msgpack SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), raw=False) -jieba_tokenizer = None -jieba_orig_tokenizer = None +jieba_tokenizer: jieba.Tokenizer | None = None +jieba_orig_tokenizer: jieba.Tokenizer | None = None def simplify_chinese(text: str) -> str: @@ -32,7 +33,7 @@ def simplify_chinese(text: str) -> str: return text.translate(SIMPLIFIED_MAP).casefold() -def jieba_tokenize(text: str, external_wordlist: bool = False) -> List[str]: +def jieba_tokenize(text: str, external_wordlist: bool = False) -> list[str]: """ Tokenize the given text into tokens whose word frequencies can probably be looked up. This uses Jieba, a word-frequency-based tokenizer. @@ -61,8 +62,6 @@ def jieba_tokenize(text: str, external_wordlist: bool = False) -> List[str]: # those spans from the original text, even if it's in Traditional # Chinese tokens = [] - for _token, start, end in jieba_tokenizer.tokenize( - simplify_chinese(text), HMM=False - ): + for _token, start, end in jieba_tokenizer.tokenize(simplify_chinese(text), HMM=False): tokens.append(text[start:end]) return tokens diff --git a/wordfreq/language_info.py b/wordfreq/language_info.py index 2856eb8..e1e74c1 100644 --- a/wordfreq/language_info.py +++ b/wordfreq/language_info.py @@ -1,6 +1,8 @@ +from __future__ import annotations + from functools import lru_cache + from langcodes import Language, closest_match -from typing import List, Union # Text in scripts written without spaces has to be handled specially in our # tokenization regex (see TOKEN_RE in tokens.py). Also, when one of these is @@ -44,9 +46,7 @@ EXTRA_JAPANESE_CHARACTERS = "ー々〻〆" # happens in ConceptNet. -def _language_in_list( - language: Language, targets: List[str], max_distance: int = 10 -) -> bool: +def _language_in_list(language: Language, targets: list[str], max_distance: int = 10) -> bool: """ A helper function to determine whether this language matches one of the target languages, with a match score above a certain threshold. @@ -59,7 +59,7 @@ def _language_in_list( @lru_cache(maxsize=None) -def get_language_info(language: Union[str, Language]) -> dict: +def get_language_info(language: str | Language) -> dict: """ Looks up the things we need to know about how to handle text in a given language. This will return a dictionary with the following fields: diff --git a/wordfreq/mecab.py b/wordfreq/mecab.py index e1db124..d3ec1a9 100644 --- a/wordfreq/mecab.py +++ b/wordfreq/mecab.py @@ -1,7 +1,8 @@ -import MeCab +from __future__ import annotations + import unicodedata -from typing import Dict, List +import MeCab def make_mecab_analyzer(lang: str) -> MeCab.Tagger: @@ -22,10 +23,10 @@ def make_mecab_analyzer(lang: str) -> MeCab.Tagger: # The constructed analyzers will go in this dictionary. -MECAB_ANALYZERS: Dict[str, MeCab.Tagger] = {} +MECAB_ANALYZERS: dict[str, MeCab.Tagger] = {} -def mecab_tokenize(text: str, lang: str) -> List[str]: +def mecab_tokenize(text: str, lang: str) -> list[str]: """ Use the mecab-python3 package to tokenize the given text. The `lang` must be 'ja' for Japanese or 'ko' for Korean. @@ -42,8 +43,4 @@ def mecab_tokenize(text: str, lang: str) -> List[str]: analyzed = analyzer.parse(text) if not analyzed: return [] - return [ - line.split("\t")[0] - for line in analyzed.split("\n") - if line != "" and line != "EOS" - ] + return [line.split("\t")[0] for line in analyzed.split("\n") if line != "" and line != "EOS"] diff --git a/wordfreq/preprocess.py b/wordfreq/preprocess.py index 0c4dcb2..b07ccad 100644 --- a/wordfreq/preprocess.py +++ b/wordfreq/preprocess.py @@ -1,9 +1,10 @@ -import regex import unicodedata +import regex +from langcodes import Language + from .language_info import get_language_info from .transliterate import transliterate -from langcodes import Language MARK_RE = regex.compile(r"[\p{Mn}\N{ARABIC TATWEEL}]", regex.V1) diff --git a/wordfreq/tokens.py b/wordfreq/tokens.py index 48d0b00..0bc56f7 100644 --- a/wordfreq/tokens.py +++ b/wordfreq/tokens.py @@ -1,31 +1,31 @@ -import regex -import unicodedata +from __future__ import annotations + import logging +import unicodedata + import langcodes -from typing import List +import regex from ftfy.fixes import uncurl_quotes from .language_info import ( - get_language_info, - SPACELESS_SCRIPTS, EXTRA_JAPANESE_CHARACTERS, + SPACELESS_SCRIPTS, + get_language_info, ) from .preprocess import preprocess_text # Placeholders for CJK functions that we'll import on demand -_mecab_tokenize = None -_jieba_tokenize = None -_simplify_chinese = None +_mecab_tokenize = None # type: ignore +_jieba_tokenize = None # type: ignore +_simplify_chinese = None # type: ignore -_WARNED_LANGUAGES = set() +_WARNED_LANGUAGES: set[str] = set() logger = logging.getLogger(__name__) def _make_spaceless_expr() -> str: scripts = sorted(SPACELESS_SCRIPTS) - pieces = [r"\p{IsIdeo}"] + [ - r"\p{Script=%s}" % script_code for script_code in scripts - ] + pieces = [r"\p{IsIdeo}"] + [r"\p{Script=%s}" % script_code for script_code in scripts] return "".join(pieces) + EXTRA_JAPANESE_CHARACTERS @@ -148,11 +148,7 @@ TOKEN_RE = regex.compile( # part of the token in Case 3. \w\w?' -""".replace( - "", SPACELESS_EXPR - ).replace( - "", INITIAL_VOWEL_EXPR - ), +""".replace("", SPACELESS_EXPR).replace("", INITIAL_VOWEL_EXPR), regex.V1 | regex.WORD | regex.VERBOSE, ) @@ -167,11 +163,7 @@ TOKEN_RE_WITH_PUNCTUATION = regex.compile( (?=[\w\p{So}]) (?!\w\w?') \X+? (?: @s? (?!w) | \b) | # Case 3 \w\w?' # Case 4 -""".replace( - "", SPACELESS_EXPR - ).replace( - "", INITIAL_VOWEL_EXPR - ), +""".replace("", SPACELESS_EXPR).replace("", INITIAL_VOWEL_EXPR), regex.V1 | regex.WORD | regex.VERBOSE, ) @@ -180,7 +172,7 @@ TOKEN_RE_WITH_PUNCTUATION = regex.compile( PUNCT_RE = regex.compile(r"[\p{punct}]+") -def simple_tokenize(text: str, include_punctuation: bool = False) -> List[str]: +def simple_tokenize(text: str, include_punctuation: bool = False) -> list[str]: """ Tokenize the given text using a straightforward, Unicode-aware token expression. @@ -220,7 +212,7 @@ def tokenize( lang: str, include_punctuation: bool = False, external_wordlist: bool = False, -) -> List[str]: +) -> list[str]: """ Tokenize this text in a way that's relatively simple but appropriate for the language. Strings that are looked up in wordfreq will be run through @@ -286,9 +278,7 @@ def tokenize( if info["tokenizer"] != "regex" and lang not in _WARNED_LANGUAGES: logger.warning( "The language '{}' is in the '{}' script, which we don't " - "have a tokenizer for. The results will be bad.".format( - lang, info["script"] - ) + "have a tokenizer for. The results will be bad.".format(lang, info["script"]) ) _WARNED_LANGUAGES.add(lang) tokens = simple_tokenize(text, include_punctuation=include_punctuation) @@ -301,7 +291,7 @@ def lossy_tokenize( lang: str, include_punctuation: bool = False, external_wordlist: bool = False, -) -> List[str]: +) -> list[str]: """ Get a list of tokens for this text, with largely the same results and options as `tokenize`, but aggressively normalize some text in a lossy way diff --git a/wordfreq/transliterate.py b/wordfreq/transliterate.py index 594fbc7..93c6489 100644 --- a/wordfreq/transliterate.py +++ b/wordfreq/transliterate.py @@ -1,96 +1,156 @@ +from __future__ import annotations + # This table comes from # https://github.com/opendatakosovo/cyrillic-transliteration/blob/master/cyrtranslit/mapping.py, # from the 'cyrtranslit' module. We originally had to reimplement it because # 'cyrtranslit' didn't work in Python 3; now it does, but we've made the table # more robust than the one in cyrtranslit. SR_LATN_TABLE = { - ord('А'): 'A', ord('а'): 'a', - ord('Б'): 'B', ord('б'): 'b', - ord('В'): 'V', ord('в'): 'v', - ord('Г'): 'G', ord('г'): 'g', - ord('Д'): 'D', ord('д'): 'd', - ord('Ђ'): 'Đ', ord('ђ'): 'đ', - ord('Е'): 'E', ord('е'): 'e', - ord('Ж'): 'Ž', ord('ж'): 'ž', - ord('З'): 'Z', ord('з'): 'z', - ord('И'): 'I', ord('и'): 'i', - ord('Ј'): 'J', ord('ј'): 'j', - ord('К'): 'K', ord('к'): 'k', - ord('Л'): 'L', ord('л'): 'l', - ord('Љ'): 'Lj', ord('љ'): 'lj', - ord('М'): 'M', ord('м'): 'm', - ord('Н'): 'N', ord('н'): 'n', - ord('Њ'): 'Nj', ord('њ'): 'nj', - ord('О'): 'O', ord('о'): 'o', - ord('П'): 'P', ord('п'): 'p', - ord('Р'): 'R', ord('р'): 'r', - ord('С'): 'S', ord('с'): 's', - ord('Т'): 'T', ord('т'): 't', - ord('Ћ'): 'Ć', ord('ћ'): 'ć', - ord('У'): 'U', ord('у'): 'u', - ord('Ф'): 'F', ord('ф'): 'f', - ord('Х'): 'H', ord('х'): 'h', - ord('Ц'): 'C', ord('ц'): 'c', - ord('Ч'): 'Č', ord('ч'): 'č', - ord('Џ'): 'Dž', ord('џ'): 'dž', - ord('Ш'): 'Š', ord('ш'): 'š', - + ord("А"): "A", + ord("а"): "a", + ord("Б"): "B", + ord("б"): "b", + ord("В"): "V", + ord("в"): "v", + ord("Г"): "G", + ord("г"): "g", + ord("Д"): "D", + ord("д"): "d", + ord("Ђ"): "Đ", + ord("ђ"): "đ", + ord("Е"): "E", + ord("е"): "e", + ord("Ж"): "Ž", + ord("ж"): "ž", + ord("З"): "Z", + ord("з"): "z", + ord("И"): "I", + ord("и"): "i", + ord("Ј"): "J", + ord("ј"): "j", + ord("К"): "K", + ord("к"): "k", + ord("Л"): "L", + ord("л"): "l", + ord("Љ"): "Lj", + ord("љ"): "lj", + ord("М"): "M", + ord("м"): "m", + ord("Н"): "N", + ord("н"): "n", + ord("Њ"): "Nj", + ord("њ"): "nj", + ord("О"): "O", + ord("о"): "o", + ord("П"): "P", + ord("п"): "p", + ord("Р"): "R", + ord("р"): "r", + ord("С"): "S", + ord("с"): "s", + ord("Т"): "T", + ord("т"): "t", + ord("Ћ"): "Ć", + ord("ћ"): "ć", + ord("У"): "U", + ord("у"): "u", + ord("Ф"): "F", + ord("ф"): "f", + ord("Х"): "H", + ord("х"): "h", + ord("Ц"): "C", + ord("ц"): "c", + ord("Ч"): "Č", + ord("ч"): "č", + ord("Џ"): "Dž", + ord("џ"): "dž", + ord("Ш"): "Š", + ord("ш"): "š", # Handle Cyrillic letters from other languages. We hope these cases don't # come up often when we're trying to transliterate Serbian, but if these # letters show up in loan-words or code-switching text, we can at least # transliterate them approximately instead of leaving them as Cyrillic # letters surrounded by Latin. - # Russian letters - ord('Ё'): 'Jo', ord('ё'): 'jo', - ord('Й'): 'J', ord('й'): 'j', - ord('Щ'): 'Šč', ord('щ'): 'šč', - ord('Ъ'): '', ord('ъ'): '', - ord('Ы'): 'Y', ord('ы'): 'y', - ord('Ь'): "'", ord('ь'): "'", - ord('Э'): 'E', ord('э'): 'e', - ord('Ю'): 'Ju', ord('ю'): 'ju', - ord('Я'): 'Ja', ord('я'): 'ja', - + ord("Ё"): "Jo", + ord("ё"): "jo", + ord("Й"): "J", + ord("й"): "j", + ord("Щ"): "Šč", + ord("щ"): "šč", + ord("Ъ"): "", + ord("ъ"): "", + ord("Ы"): "Y", + ord("ы"): "y", + ord("Ь"): "'", + ord("ь"): "'", + ord("Э"): "E", + ord("э"): "e", + ord("Ю"): "Ju", + ord("ю"): "ju", + ord("Я"): "Ja", + ord("я"): "ja", # Belarusian letter - ord('Ў'): 'Ŭ', ord('ў'): 'ŭ', - + ord("Ў"): "Ŭ", + ord("ў"): "ŭ", # Ukrainian letters - ord('Є'): 'Je', ord('є'): 'je', - ord('І'): 'I', ord('і'): 'i', - ord('Ї'): 'Ï', ord('ї'): 'ï', - ord('Ґ'): 'G', ord('ґ'): 'g', - + ord("Є"): "Je", + ord("є"): "je", + ord("І"): "I", + ord("і"): "i", + ord("Ї"): "Ï", + ord("ї"): "ï", + ord("Ґ"): "G", + ord("ґ"): "g", # Macedonian letters - ord('Ѕ'): 'Dz', ord('ѕ'): 'dz', - ord('Ѓ'): 'Ǵ', ord('ѓ'): 'ǵ', - ord('Ќ'): 'Ḱ', ord('ќ'): 'ḱ', + ord("Ѕ"): "Dz", + ord("ѕ"): "dz", + ord("Ѓ"): "Ǵ", + ord("ѓ"): "ǵ", + ord("Ќ"): "Ḱ", + ord("ќ"): "ḱ", } AZ_LATN_TABLE = SR_LATN_TABLE.copy() -AZ_LATN_TABLE.update({ - # Distinct Azerbaijani letters - ord('Ҹ'): 'C', ord('ҹ'): 'c', - ord('Ә'): 'Ə', ord('ә'): 'ə', - ord('Ғ'): 'Ğ', ord('ғ'): 'ğ', - ord('Һ'): 'H', ord('һ'): 'h', - ord('Ө'): 'Ö', ord('ө'): 'ö', - ord('Ҝ'): 'G', ord('ҝ'): 'g', - ord('Ү'): 'Ü', ord('ү'): 'ü', - - # Azerbaijani letters with different transliterations - ord('Ч'): 'Ç', ord('ч'): 'ç', - ord('Х'): 'X', ord('х'): 'x', - ord('Ы'): 'I', ord('ы'): 'ı', - ord('И'): 'İ', ord('и'): 'i', - ord('Ж'): 'J', ord('ж'): 'j', - ord('Ј'): 'Y', ord('ј'): 'y', - ord('Г'): 'Q', ord('г'): 'q', - ord('Ш'): 'Ş', ord('ш'): 'ş', -}) +AZ_LATN_TABLE.update( + { + # Distinct Azerbaijani letters + ord("Ҹ"): "C", + ord("ҹ"): "c", + ord("Ә"): "Ə", + ord("ә"): "ə", + ord("Ғ"): "Ğ", + ord("ғ"): "ğ", + ord("Һ"): "H", + ord("һ"): "h", + ord("Ө"): "Ö", + ord("ө"): "ö", + ord("Ҝ"): "G", + ord("ҝ"): "g", + ord("Ү"): "Ü", + ord("ү"): "ü", + # Azerbaijani letters with different transliterations + ord("Ч"): "Ç", + ord("ч"): "ç", + ord("Х"): "X", + ord("х"): "x", + ord("Ы"): "I", + ord("ы"): "ı", + ord("И"): "İ", + ord("и"): "i", + ord("Ж"): "J", + ord("ж"): "j", + ord("Ј"): "Y", + ord("ј"): "y", + ord("Г"): "Q", + ord("г"): "q", + ord("Ш"): "Ş", + ord("ш"): "ş", + } +) -def transliterate(table, text): +def transliterate(table: dict[int, str], text: str) -> str: """ Transliterate text according to one of the tables above. @@ -101,9 +161,9 @@ def transliterate(table, text): Latin alphabet. - 'az-Latn' means the same for Azerbaijani Cyrillic to Latn. """ - if table == 'sr-Latn': + if table == "sr-Latn": return text.translate(SR_LATN_TABLE) - elif table == 'az-Latn': + elif table == "az-Latn": return text.translate(AZ_LATN_TABLE) else: - raise ValueError("Unknown transliteration table: {!r}".format(table)) + raise ValueError(f"Unknown transliteration table: {table!r}") diff --git a/wordfreq/util.py b/wordfreq/util.py new file mode 100644 index 0000000..14889c6 --- /dev/null +++ b/wordfreq/util.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from pathlib import Path + +import locate + + +def data_path(filename: str | None = None) -> Path: + """ + Get a path to a file in the data directory. + """ + if filename is None: + return Path(locate.this_dir(), "data") + else: + return Path(locate.this_dir(), "data", filename)