mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 17:31:41 +00:00
work on rel. frequencies of numbers, and other features
This commit is contained in:
parent
538145c05c
commit
342c1d0f0e
21
mypy.ini
Normal file
21
mypy.ini
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
[mypy]
|
||||||
|
python_version = 3.7
|
||||||
|
|
||||||
|
[mypy-ipadic]
|
||||||
|
ignore_missing_imports = True
|
||||||
|
|
||||||
|
[mypy-jieba]
|
||||||
|
ignore_missing_imports = True
|
||||||
|
|
||||||
|
[mypy-MeCab]
|
||||||
|
ignore_missing_imports = True
|
||||||
|
|
||||||
|
[mypy-mecab_ko_dic]
|
||||||
|
ignore_missing_imports = True
|
||||||
|
|
||||||
|
[mypy-msgpack]
|
||||||
|
ignore_missing_imports = True
|
||||||
|
|
||||||
|
[mypy-regex]
|
||||||
|
ignore_missing_imports = True
|
||||||
|
|
48
number-freq-notes.txt
Normal file
48
number-freq-notes.txt
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
906 0.00000047
|
||||||
|
1006 0.00000038
|
||||||
|
1106 0.00000028
|
||||||
|
1206 0.00000028
|
||||||
|
1306 0.00000025
|
||||||
|
1406 0.00000022
|
||||||
|
1506 0.00000028
|
||||||
|
1606 0.00000052
|
||||||
|
1633 0.00000048
|
||||||
|
1678 0.00000048
|
||||||
|
1706 0.00000040
|
||||||
|
1733 0.00000047
|
||||||
|
1754 0.00000063
|
||||||
|
1778 0.00000111
|
||||||
|
1806 0.00000130
|
||||||
|
1823 0.00000135
|
||||||
|
1833 0.00000494
|
||||||
|
1856 0.00000325
|
||||||
|
1866 0.00000299
|
||||||
|
1876 0.00000290
|
||||||
|
1886 0.00000331
|
||||||
|
1896 0.00000423
|
||||||
|
1906 0.00000496
|
||||||
|
1916 0.00000715
|
||||||
|
1921 0.00000928
|
||||||
|
1926 0.00001130
|
||||||
|
1936 0.00001037
|
||||||
|
1946 0.00000921
|
||||||
|
1956 0.00001100
|
||||||
|
1966 0.00001800
|
||||||
|
1976 0.00002500
|
||||||
|
1986 0.00003500
|
||||||
|
1996 0.00007100
|
||||||
|
2006 0.00011704
|
||||||
|
2009 0.00013361
|
||||||
|
2011 0.00015103
|
||||||
|
2014 0.00016833
|
||||||
|
2015 0.00016840
|
||||||
|
2016 0.00014277
|
||||||
|
2017 0.00010960
|
||||||
|
2018 0.00005095
|
||||||
|
2019 0.00002800
|
||||||
|
2021 0.00000090
|
||||||
|
2023 0.00000042
|
||||||
|
2026 0.00000026
|
||||||
|
2036 0.00000014
|
||||||
|
2046 0.00000010
|
||||||
|
|
902
poetry.lock
generated
Normal file
902
poetry.lock
generated
Normal file
@ -0,0 +1,902 @@
|
|||||||
|
[[package]]
|
||||||
|
name = "appnope"
|
||||||
|
version = "0.1.2"
|
||||||
|
description = "Disable App Nap on macOS >= 10.9"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "atomicwrites"
|
||||||
|
version = "1.4.0"
|
||||||
|
description = "Atomic file writes."
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "attrs"
|
||||||
|
version = "21.4.0"
|
||||||
|
description = "Classes Without Boilerplate"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"]
|
||||||
|
docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"]
|
||||||
|
tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"]
|
||||||
|
tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "backcall"
|
||||||
|
version = "0.2.0"
|
||||||
|
description = "Specifications for callback functions passed in to an API"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "black"
|
||||||
|
version = "22.1.0"
|
||||||
|
description = "The uncompromising code formatter."
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6.2"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
click = ">=8.0.0"
|
||||||
|
mypy-extensions = ">=0.4.3"
|
||||||
|
pathspec = ">=0.9.0"
|
||||||
|
platformdirs = ">=2"
|
||||||
|
tomli = ">=1.1.0"
|
||||||
|
typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\" and implementation_name == \"cpython\""}
|
||||||
|
typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
colorama = ["colorama (>=0.4.3)"]
|
||||||
|
d = ["aiohttp (>=3.7.4)"]
|
||||||
|
jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
|
||||||
|
uvloop = ["uvloop (>=0.15.2)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "click"
|
||||||
|
version = "8.0.3"
|
||||||
|
description = "Composable command line interface toolkit"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
colorama = {version = "*", markers = "platform_system == \"Windows\""}
|
||||||
|
importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "colorama"
|
||||||
|
version = "0.4.4"
|
||||||
|
description = "Cross-platform colored terminal text."
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "decorator"
|
||||||
|
version = "5.1.1"
|
||||||
|
description = "Decorators for Humans"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "flake8"
|
||||||
|
version = "4.0.1"
|
||||||
|
description = "the modular source code checker: pep8 pyflakes and co"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
importlib-metadata = {version = "<4.3", markers = "python_version < \"3.8\""}
|
||||||
|
mccabe = ">=0.6.0,<0.7.0"
|
||||||
|
pycodestyle = ">=2.8.0,<2.9.0"
|
||||||
|
pyflakes = ">=2.4.0,<2.5.0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ftfy"
|
||||||
|
version = "6.0.3"
|
||||||
|
description = "Fixes some problems with Unicode text after the fact"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
wcwidth = "*"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
docs = ["furo", "sphinx"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "importlib-metadata"
|
||||||
|
version = "4.2.0"
|
||||||
|
description = "Read metadata from Python packages"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""}
|
||||||
|
zipp = ">=0.5"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"]
|
||||||
|
testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pep517", "pyfakefs", "flufl.flake8", "pytest-black (>=0.3.7)", "pytest-mypy", "importlib-resources (>=1.3)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "iniconfig"
|
||||||
|
version = "1.1.1"
|
||||||
|
description = "iniconfig: brain-dead simple config-ini parsing"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ipadic"
|
||||||
|
version = "1.0.0"
|
||||||
|
description = "IPAdic packaged for Python"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ipython"
|
||||||
|
version = "7.31.1"
|
||||||
|
description = "IPython: Productive Interactive Computing"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
appnope = {version = "*", markers = "sys_platform == \"darwin\""}
|
||||||
|
backcall = "*"
|
||||||
|
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
||||||
|
decorator = "*"
|
||||||
|
jedi = ">=0.16"
|
||||||
|
matplotlib-inline = "*"
|
||||||
|
pexpect = {version = ">4.3", markers = "sys_platform != \"win32\""}
|
||||||
|
pickleshare = "*"
|
||||||
|
prompt-toolkit = ">=2.0.0,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.1.0"
|
||||||
|
pygments = "*"
|
||||||
|
traitlets = ">=4.2"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
all = ["Sphinx (>=1.3)", "ipykernel", "ipyparallel", "ipywidgets", "nbconvert", "nbformat", "nose (>=0.10.1)", "notebook", "numpy (>=1.17)", "pygments", "qtconsole", "requests", "testpath"]
|
||||||
|
doc = ["Sphinx (>=1.3)"]
|
||||||
|
kernel = ["ipykernel"]
|
||||||
|
nbconvert = ["nbconvert"]
|
||||||
|
nbformat = ["nbformat"]
|
||||||
|
notebook = ["notebook", "ipywidgets"]
|
||||||
|
parallel = ["ipyparallel"]
|
||||||
|
qtconsole = ["qtconsole"]
|
||||||
|
test = ["nose (>=0.10.1)", "requests", "testpath", "pygments", "nbformat", "ipykernel", "numpy (>=1.17)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "jedi"
|
||||||
|
version = "0.18.1"
|
||||||
|
description = "An autocompletion tool for Python that can be used for text editors."
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
parso = ">=0.8.0,<0.9.0"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
qa = ["flake8 (==3.8.3)", "mypy (==0.782)"]
|
||||||
|
testing = ["Django (<3.1)", "colorama", "docopt", "pytest (<7.0.0)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "jieba"
|
||||||
|
version = "0.42.1"
|
||||||
|
description = "Chinese Words Segmentation Utilities"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "langcodes"
|
||||||
|
version = "3.3.0"
|
||||||
|
description = "Tools for labeling human languages with IETF language tags"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
data = ["language-data (>=1.1,<2.0)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "matplotlib-inline"
|
||||||
|
version = "0.1.3"
|
||||||
|
description = "Inline Matplotlib backend for Jupyter"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.5"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
traitlets = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mccabe"
|
||||||
|
version = "0.6.1"
|
||||||
|
description = "McCabe checker, plugin for flake8"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mecab-ko-dic"
|
||||||
|
version = "1.0.0"
|
||||||
|
description = "mecab-ko-dic packaged for Python"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mecab-python3"
|
||||||
|
version = "1.0.4"
|
||||||
|
description = "Python wrapper for the MeCab morphological analyzer for Japanese"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
unidic = ["unidic"]
|
||||||
|
unidic-lite = ["unidic-lite"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "msgpack"
|
||||||
|
version = "1.0.3"
|
||||||
|
description = "MessagePack (de)serializer."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mypy"
|
||||||
|
version = "0.931"
|
||||||
|
description = "Optional static typing for Python"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
mypy-extensions = ">=0.4.3"
|
||||||
|
tomli = ">=1.1.0"
|
||||||
|
typed-ast = {version = ">=1.4.0,<2", markers = "python_version < \"3.8\""}
|
||||||
|
typing-extensions = ">=3.10"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dmypy = ["psutil (>=4.0)"]
|
||||||
|
python2 = ["typed-ast (>=1.4.0,<2)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mypy-extensions"
|
||||||
|
version = "0.4.3"
|
||||||
|
description = "Experimental type system extensions for programs checked with the mypy typechecker."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "packaging"
|
||||||
|
version = "21.3"
|
||||||
|
description = "Core utilities for Python packages"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "parso"
|
||||||
|
version = "0.8.3"
|
||||||
|
description = "A Python Parser"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
qa = ["flake8 (==3.8.3)", "mypy (==0.782)"]
|
||||||
|
testing = ["docopt", "pytest (<6.0.0)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pathspec"
|
||||||
|
version = "0.9.0"
|
||||||
|
description = "Utility library for gitignore style pattern matching of file paths."
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pexpect"
|
||||||
|
version = "4.8.0"
|
||||||
|
description = "Pexpect allows easy control of interactive console applications."
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
ptyprocess = ">=0.5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pickleshare"
|
||||||
|
version = "0.7.5"
|
||||||
|
description = "Tiny 'shelve'-like database with concurrency support"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "platformdirs"
|
||||||
|
version = "2.5.0"
|
||||||
|
description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
docs = ["Sphinx (>=4)", "furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)"]
|
||||||
|
test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pluggy"
|
||||||
|
version = "1.0.0"
|
||||||
|
description = "plugin and hook calling mechanisms for python"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["pre-commit", "tox"]
|
||||||
|
testing = ["pytest", "pytest-benchmark"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "prompt-toolkit"
|
||||||
|
version = "3.0.27"
|
||||||
|
description = "Library for building powerful interactive command lines in Python"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6.2"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
wcwidth = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ptyprocess"
|
||||||
|
version = "0.7.0"
|
||||||
|
description = "Run a subprocess in a pseudo terminal"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "py"
|
||||||
|
version = "1.11.0"
|
||||||
|
description = "library with cross-python path, ini-parsing, io, code, log facilities"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pycodestyle"
|
||||||
|
version = "2.8.0"
|
||||||
|
description = "Python style guide checker"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyflakes"
|
||||||
|
version = "2.4.0"
|
||||||
|
description = "passive checker of Python programs"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pygments"
|
||||||
|
version = "2.11.2"
|
||||||
|
description = "Pygments is a syntax highlighting package written in Python."
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyparsing"
|
||||||
|
version = "3.0.7"
|
||||||
|
description = "Python parsing module"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
diagrams = ["jinja2", "railroad-diagrams"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pytest"
|
||||||
|
version = "6.2.5"
|
||||||
|
description = "pytest: simple powerful testing with Python"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
|
||||||
|
attrs = ">=19.2.0"
|
||||||
|
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
||||||
|
importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
|
||||||
|
iniconfig = "*"
|
||||||
|
packaging = "*"
|
||||||
|
pluggy = ">=0.12,<2.0"
|
||||||
|
py = ">=1.8.2"
|
||||||
|
toml = "*"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "2022.1.18"
|
||||||
|
description = "Alternative regular expression module, to replace re."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "toml"
|
||||||
|
version = "0.10.2"
|
||||||
|
description = "Python Library for Tom's Obvious, Minimal Language"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tomli"
|
||||||
|
version = "2.0.1"
|
||||||
|
description = "A lil' TOML parser"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "traitlets"
|
||||||
|
version = "5.1.1"
|
||||||
|
description = "Traitlets Python configuration system"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
test = ["pytest"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typed-ast"
|
||||||
|
version = "1.5.2"
|
||||||
|
description = "a fork of Python 2 and 3 ast modules with type comment support"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "types-setuptools"
|
||||||
|
version = "57.4.9"
|
||||||
|
description = "Typing stubs for setuptools"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typing-extensions"
|
||||||
|
version = "4.0.1"
|
||||||
|
description = "Backported and Experimental Type Hints for Python 3.6+"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wcwidth"
|
||||||
|
version = "0.2.5"
|
||||||
|
description = "Measures the displayed width of unicode strings in a terminal"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zipp"
|
||||||
|
version = "3.7.0"
|
||||||
|
description = "Backport of pathlib-compatible object wrapper for zip files"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"]
|
||||||
|
testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy"]
|
||||||
|
|
||||||
|
[metadata]
|
||||||
|
lock-version = "1.1"
|
||||||
|
python-versions = "^3.7"
|
||||||
|
content-hash = "a3b1a9c3b80e338764f1907a77e31f59d6e1e231092b7813182e09e55d7c2f45"
|
||||||
|
|
||||||
|
[metadata.files]
|
||||||
|
appnope = [
|
||||||
|
{file = "appnope-0.1.2-py2.py3-none-any.whl", hash = "sha256:93aa393e9d6c54c5cd570ccadd8edad61ea0c4b9ea7a01409020c9aa019eb442"},
|
||||||
|
{file = "appnope-0.1.2.tar.gz", hash = "sha256:dd83cd4b5b460958838f6eb3000c660b1f9caf2a5b1de4264e941512f603258a"},
|
||||||
|
]
|
||||||
|
atomicwrites = [
|
||||||
|
{file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"},
|
||||||
|
{file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"},
|
||||||
|
]
|
||||||
|
attrs = [
|
||||||
|
{file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"},
|
||||||
|
{file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"},
|
||||||
|
]
|
||||||
|
backcall = [
|
||||||
|
{file = "backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255"},
|
||||||
|
{file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"},
|
||||||
|
]
|
||||||
|
black = [
|
||||||
|
{file = "black-22.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1297c63b9e1b96a3d0da2d85d11cd9bf8664251fd69ddac068b98dc4f34f73b6"},
|
||||||
|
{file = "black-22.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2ff96450d3ad9ea499fc4c60e425a1439c2120cbbc1ab959ff20f7c76ec7e866"},
|
||||||
|
{file = "black-22.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e21e1f1efa65a50e3960edd068b6ae6d64ad6235bd8bfea116a03b21836af71"},
|
||||||
|
{file = "black-22.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2f69158a7d120fd641d1fa9a921d898e20d52e44a74a6fbbcc570a62a6bc8ab"},
|
||||||
|
{file = "black-22.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:228b5ae2c8e3d6227e4bde5920d2fc66cc3400fde7bcc74f480cb07ef0b570d5"},
|
||||||
|
{file = "black-22.1.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b1a5ed73ab4c482208d20434f700d514f66ffe2840f63a6252ecc43a9bc77e8a"},
|
||||||
|
{file = "black-22.1.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35944b7100af4a985abfcaa860b06af15590deb1f392f06c8683b4381e8eeaf0"},
|
||||||
|
{file = "black-22.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7835fee5238fc0a0baf6c9268fb816b5f5cd9b8793423a75e8cd663c48d073ba"},
|
||||||
|
{file = "black-22.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:dae63f2dbf82882fa3b2a3c49c32bffe144970a573cd68d247af6560fc493ae1"},
|
||||||
|
{file = "black-22.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fa1db02410b1924b6749c245ab38d30621564e658297484952f3d8a39fce7e8"},
|
||||||
|
{file = "black-22.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c8226f50b8c34a14608b848dc23a46e5d08397d009446353dad45e04af0c8e28"},
|
||||||
|
{file = "black-22.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2d6f331c02f0f40aa51a22e479c8209d37fcd520c77721c034517d44eecf5912"},
|
||||||
|
{file = "black-22.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:742ce9af3086e5bd07e58c8feb09dbb2b047b7f566eb5f5bc63fd455814979f3"},
|
||||||
|
{file = "black-22.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:fdb8754b453fb15fad3f72cd9cad3e16776f0964d67cf30ebcbf10327a3777a3"},
|
||||||
|
{file = "black-22.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5660feab44c2e3cb24b2419b998846cbb01c23c7fe645fee45087efa3da2d61"},
|
||||||
|
{file = "black-22.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:6f2f01381f91c1efb1451998bd65a129b3ed6f64f79663a55fe0e9b74a5f81fd"},
|
||||||
|
{file = "black-22.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:efbadd9b52c060a8fc3b9658744091cb33c31f830b3f074422ed27bad2b18e8f"},
|
||||||
|
{file = "black-22.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8871fcb4b447206904932b54b567923e5be802b9b19b744fdff092bd2f3118d0"},
|
||||||
|
{file = "black-22.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ccad888050f5393f0d6029deea2a33e5ae371fd182a697313bdbd835d3edaf9c"},
|
||||||
|
{file = "black-22.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07e5c049442d7ca1a2fc273c79d1aecbbf1bc858f62e8184abe1ad175c4f7cc2"},
|
||||||
|
{file = "black-22.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:373922fc66676133ddc3e754e4509196a8c392fec3f5ca4486673e685a421321"},
|
||||||
|
{file = "black-22.1.0-py3-none-any.whl", hash = "sha256:3524739d76b6b3ed1132422bf9d82123cd1705086723bc3e235ca39fd21c667d"},
|
||||||
|
{file = "black-22.1.0.tar.gz", hash = "sha256:a7c0192d35635f6fc1174be575cb7915e92e5dd629ee79fdaf0dcfa41a80afb5"},
|
||||||
|
]
|
||||||
|
click = [
|
||||||
|
{file = "click-8.0.3-py3-none-any.whl", hash = "sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3"},
|
||||||
|
{file = "click-8.0.3.tar.gz", hash = "sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b"},
|
||||||
|
]
|
||||||
|
colorama = [
|
||||||
|
{file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
|
||||||
|
{file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
|
||||||
|
]
|
||||||
|
decorator = [
|
||||||
|
{file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
|
||||||
|
{file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
|
||||||
|
]
|
||||||
|
flake8 = [
|
||||||
|
{file = "flake8-4.0.1-py2.py3-none-any.whl", hash = "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d"},
|
||||||
|
{file = "flake8-4.0.1.tar.gz", hash = "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"},
|
||||||
|
]
|
||||||
|
ftfy = [
|
||||||
|
{file = "ftfy-6.0.3.tar.gz", hash = "sha256:ba71121a9c8d7790d3e833c6c1021143f3e5c4118293ec3afb5d43ed9ca8e72b"},
|
||||||
|
]
|
||||||
|
importlib-metadata = [
|
||||||
|
{file = "importlib_metadata-4.2.0-py3-none-any.whl", hash = "sha256:057e92c15bc8d9e8109738a48db0ccb31b4d9d5cfbee5a8670879a30be66304b"},
|
||||||
|
{file = "importlib_metadata-4.2.0.tar.gz", hash = "sha256:b7e52a1f8dec14a75ea73e0891f3060099ca1d8e6a462a4dff11c3e119ea1b31"},
|
||||||
|
]
|
||||||
|
iniconfig = [
|
||||||
|
{file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"},
|
||||||
|
{file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
|
||||||
|
]
|
||||||
|
ipadic = [
|
||||||
|
{file = "ipadic-1.0.0.tar.gz", hash = "sha256:f5923d31eca6131acaaf18ed28d8998665b1347b640d3a6476f64650e9a71c07"},
|
||||||
|
]
|
||||||
|
ipython = [
|
||||||
|
{file = "ipython-7.31.1-py3-none-any.whl", hash = "sha256:55df3e0bd0f94e715abd968bedd89d4e8a7bce4bf498fb123fed4f5398fea874"},
|
||||||
|
{file = "ipython-7.31.1.tar.gz", hash = "sha256:b5548ec5329a4bcf054a5deed5099b0f9622eb9ea51aaa7104d215fece201d8c"},
|
||||||
|
]
|
||||||
|
jedi = [
|
||||||
|
{file = "jedi-0.18.1-py2.py3-none-any.whl", hash = "sha256:637c9635fcf47945ceb91cd7f320234a7be540ded6f3e99a50cb6febdfd1ba8d"},
|
||||||
|
{file = "jedi-0.18.1.tar.gz", hash = "sha256:74137626a64a99c8eb6ae5832d99b3bdd7d29a3850fe2aa80a4126b2a7d949ab"},
|
||||||
|
]
|
||||||
|
jieba = [
|
||||||
|
{file = "jieba-0.42.1.tar.gz", hash = "sha256:055ca12f62674fafed09427f176506079bc135638a14e23e25be909131928db2"},
|
||||||
|
]
|
||||||
|
langcodes = [
|
||||||
|
{file = "langcodes-3.3.0-py3-none-any.whl", hash = "sha256:4d89fc9acb6e9c8fdef70bcdf376113a3db09b67285d9e1d534de6d8818e7e69"},
|
||||||
|
{file = "langcodes-3.3.0.tar.gz", hash = "sha256:794d07d5a28781231ac335a1561b8442f8648ca07cd518310aeb45d6f0807ef6"},
|
||||||
|
]
|
||||||
|
matplotlib-inline = [
|
||||||
|
{file = "matplotlib-inline-0.1.3.tar.gz", hash = "sha256:a04bfba22e0d1395479f866853ec1ee28eea1485c1d69a6faf00dc3e24ff34ee"},
|
||||||
|
{file = "matplotlib_inline-0.1.3-py3-none-any.whl", hash = "sha256:aed605ba3b72462d64d475a21a9296f400a19c4f74a31b59103d2a99ffd5aa5c"},
|
||||||
|
]
|
||||||
|
mccabe = [
|
||||||
|
{file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"},
|
||||||
|
{file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"},
|
||||||
|
]
|
||||||
|
mecab-ko-dic = [
|
||||||
|
{file = "mecab-ko-dic-1.0.0.tar.gz", hash = "sha256:3ba22858736e02e8a0e92f2a7f099528c733ae47701b29d12c75e982a85d1f11"},
|
||||||
|
]
|
||||||
|
mecab-python3 = [
|
||||||
|
{file = "mecab-python3-1.0.4.tar.gz", hash = "sha256:b150ad5fe4260539b4ef184657e552ef81307fbbe60ae1f258bc814549ea90f8"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:3c7e87c65160e5e4edb08cb80dbce50f4e711c53f45063321aab72ab2566ffe4"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2fbed960ef82f4192b31efd88af1f3c24cd1692b62720ed70d7e314a50f581e"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cb6eb6cc47e3937a2edfaa9595dc2d165ed9f025e3a53bd0a5033a12fa6bcdcf"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp36-cp36m-win_amd64.whl", hash = "sha256:b149b51f0f62c9512d219c9e79c6db2eb66e70863a97eb412d8fc3ba7a25f351"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:c1606b35df0136b3e9dc7add2e69d2c1151e69fd5675c0cde62d0b017b2319e7"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53b0b899ef03f364bfd7fa28f260ee1e893e4f47ff90a141a522709b892f0a4e"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:52a789c708f8b89044236201eb03c7fe5517fad5210a9de2230c7d99a2a8c760"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:d6ca73c0dec72038290faa6de17d57d771535eb47c22346e170dffcb82d696bb"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:18e14dfe3d8c66cfa1c9f49e3bc8ac480b79a433ec9e5b5d2c1fb73f36ec7c3e"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:221256b84be0ee29dc8fa450210236b40707b9d63cfc70de5102d2531622d062"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:de39b82f44d97fc0fd636644ad14c9662f51afcd73775379d5a8b1eb20ee85a6"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:96d9e8c098401fb4b5bd32258f4952f3b22cdb30ab291f5ff82eae1d0941cbed"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:dcd62ebf2eecde1263119b92ff5379a046bb8231cb999fafda00f0925dfcb67e"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178b632b717e3249054a7ad4c0fbc60ce8493d357afa7673d535ffa11e45eaba"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:fbfad60261ad3b9390b8615528fc013302a3e8febba220f799216c1a1154ee7e"},
|
||||||
|
{file = "mecab_python3-1.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:445b4f5ee5674d85f6de2726ec28991801844ff71eb096129da5f5ba077d5a87"},
|
||||||
|
]
|
||||||
|
msgpack = [
|
||||||
|
{file = "msgpack-1.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:96acc674bb9c9be63fa8b6dabc3248fdc575c4adc005c440ad02f87ca7edd079"},
|
||||||
|
{file = "msgpack-1.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2c3ca57c96c8e69c1a0d2926a6acf2d9a522b41dc4253a8945c4c6cd4981a4e3"},
|
||||||
|
{file = "msgpack-1.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0a792c091bac433dfe0a70ac17fc2087d4595ab835b47b89defc8bbabcf5c73"},
|
||||||
|
{file = "msgpack-1.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c58cdec1cb5fcea8c2f1771d7b5fec79307d056874f746690bd2bdd609ab147"},
|
||||||
|
{file = "msgpack-1.0.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2f97c0f35b3b096a330bb4a1a9247d0bd7e1f3a2eba7ab69795501504b1c2c39"},
|
||||||
|
{file = "msgpack-1.0.3-cp310-cp310-win32.whl", hash = "sha256:36a64a10b16c2ab31dcd5f32d9787ed41fe68ab23dd66957ca2826c7f10d0b85"},
|
||||||
|
{file = "msgpack-1.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:c1ba333b4024c17c7591f0f372e2daa3c31db495a9b2af3cf664aef3c14354f7"},
|
||||||
|
{file = "msgpack-1.0.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:c2140cf7a3ec475ef0938edb6eb363fa704159e0bf71dde15d953bacc1cf9d7d"},
|
||||||
|
{file = "msgpack-1.0.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f4c22717c74d44bcd7af353024ce71c6b55346dad5e2cc1ddc17ce8c4507c6b"},
|
||||||
|
{file = "msgpack-1.0.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d733a15ade190540c703de209ffbc42a3367600421b62ac0c09fde594da6ec"},
|
||||||
|
{file = "msgpack-1.0.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7e03b06f2982aa98d4ddd082a210c3db200471da523f9ac197f2828e80e7770"},
|
||||||
|
{file = "msgpack-1.0.3-cp36-cp36m-win32.whl", hash = "sha256:3d875631ecab42f65f9dce6f55ce6d736696ced240f2634633188de2f5f21af9"},
|
||||||
|
{file = "msgpack-1.0.3-cp36-cp36m-win_amd64.whl", hash = "sha256:40fb89b4625d12d6027a19f4df18a4de5c64f6f3314325049f219683e07e678a"},
|
||||||
|
{file = "msgpack-1.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6eef0cf8db3857b2b556213d97dd82de76e28a6524853a9beb3264983391dc1a"},
|
||||||
|
{file = "msgpack-1.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d8c332f53ffff01953ad25131272506500b14750c1d0ce8614b17d098252fbc"},
|
||||||
|
{file = "msgpack-1.0.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c0903bd93cbd34653dd63bbfcb99d7539c372795201f39d16fdfde4418de43a"},
|
||||||
|
{file = "msgpack-1.0.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bf1e6bfed4860d72106f4e0a1ab519546982b45689937b40257cfd820650b920"},
|
||||||
|
{file = "msgpack-1.0.3-cp37-cp37m-win32.whl", hash = "sha256:d02cea2252abc3756b2ac31f781f7a98e89ff9759b2e7450a1c7a0d13302ff50"},
|
||||||
|
{file = "msgpack-1.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:2f30dd0dc4dfe6231ad253b6f9f7128ac3202ae49edd3f10d311adc358772dba"},
|
||||||
|
{file = "msgpack-1.0.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f201d34dc89342fabb2a10ed7c9a9aaaed9b7af0f16a5923f1ae562b31258dea"},
|
||||||
|
{file = "msgpack-1.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bb87f23ae7d14b7b3c21009c4b1705ec107cb21ee71975992f6aca571fb4a42a"},
|
||||||
|
{file = "msgpack-1.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a3a5c4b16e9d0edb823fe54b59b5660cc8d4782d7bf2c214cb4b91a1940a8ef"},
|
||||||
|
{file = "msgpack-1.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f74da1e5fcf20ade12c6bf1baa17a2dc3604958922de8dc83cbe3eff22e8b611"},
|
||||||
|
{file = "msgpack-1.0.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:73a80bd6eb6bcb338c1ec0da273f87420829c266379c8c82fa14c23fb586cfa1"},
|
||||||
|
{file = "msgpack-1.0.3-cp38-cp38-win32.whl", hash = "sha256:9fce00156e79af37bb6db4e7587b30d11e7ac6a02cb5bac387f023808cd7d7f4"},
|
||||||
|
{file = "msgpack-1.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:9b6f2d714c506e79cbead331de9aae6837c8dd36190d02da74cb409b36162e8a"},
|
||||||
|
{file = "msgpack-1.0.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:89908aea5f46ee1474cc37fbc146677f8529ac99201bc2faf4ef8edc023c2bf3"},
|
||||||
|
{file = "msgpack-1.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:973ad69fd7e31159eae8f580f3f707b718b61141838321c6fa4d891c4a2cca52"},
|
||||||
|
{file = "msgpack-1.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da24375ab4c50e5b7486c115a3198d207954fe10aaa5708f7b65105df09109b2"},
|
||||||
|
{file = "msgpack-1.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a598d0685e4ae07a0672b59792d2cc767d09d7a7f39fd9bd37ff84e060b1a996"},
|
||||||
|
{file = "msgpack-1.0.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e4c309a68cb5d6bbd0c50d5c71a25ae81f268c2dc675c6f4ea8ab2feec2ac4e2"},
|
||||||
|
{file = "msgpack-1.0.3-cp39-cp39-win32.whl", hash = "sha256:494471d65b25a8751d19c83f1a482fd411d7ca7a3b9e17d25980a74075ba0e88"},
|
||||||
|
{file = "msgpack-1.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:f01b26c2290cbd74316990ba84a14ac3d599af9cebefc543d241a66e785cf17d"},
|
||||||
|
{file = "msgpack-1.0.3.tar.gz", hash = "sha256:51fdc7fb93615286428ee7758cecc2f374d5ff363bdd884c7ea622a7a327a81e"},
|
||||||
|
]
|
||||||
|
mypy = [
|
||||||
|
{file = "mypy-0.931-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3c5b42d0815e15518b1f0990cff7a705805961613e701db60387e6fb663fe78a"},
|
||||||
|
{file = "mypy-0.931-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c89702cac5b302f0c5d33b172d2b55b5df2bede3344a2fbed99ff96bddb2cf00"},
|
||||||
|
{file = "mypy-0.931-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:300717a07ad09525401a508ef5d105e6b56646f7942eb92715a1c8d610149714"},
|
||||||
|
{file = "mypy-0.931-cp310-cp310-win_amd64.whl", hash = "sha256:7b3f6f557ba4afc7f2ce6d3215d5db279bcf120b3cfd0add20a5d4f4abdae5bc"},
|
||||||
|
{file = "mypy-0.931-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:1bf752559797c897cdd2c65f7b60c2b6969ffe458417b8d947b8340cc9cec08d"},
|
||||||
|
{file = "mypy-0.931-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4365c60266b95a3f216a3047f1d8e3f895da6c7402e9e1ddfab96393122cc58d"},
|
||||||
|
{file = "mypy-0.931-cp36-cp36m-win_amd64.whl", hash = "sha256:1b65714dc296a7991000b6ee59a35b3f550e0073411ac9d3202f6516621ba66c"},
|
||||||
|
{file = "mypy-0.931-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e839191b8da5b4e5d805f940537efcaa13ea5dd98418f06dc585d2891d228cf0"},
|
||||||
|
{file = "mypy-0.931-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:50c7346a46dc76a4ed88f3277d4959de8a2bd0a0fa47fa87a4cde36fe247ac05"},
|
||||||
|
{file = "mypy-0.931-cp37-cp37m-win_amd64.whl", hash = "sha256:d8f1ff62f7a879c9fe5917b3f9eb93a79b78aad47b533911b853a757223f72e7"},
|
||||||
|
{file = "mypy-0.931-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f9fe20d0872b26c4bba1c1be02c5340de1019530302cf2dcc85c7f9fc3252ae0"},
|
||||||
|
{file = "mypy-0.931-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1b06268df7eb53a8feea99cbfff77a6e2b205e70bf31743e786678ef87ee8069"},
|
||||||
|
{file = "mypy-0.931-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8c11003aaeaf7cc2d0f1bc101c1cc9454ec4cc9cb825aef3cafff8a5fdf4c799"},
|
||||||
|
{file = "mypy-0.931-cp38-cp38-win_amd64.whl", hash = "sha256:d9d2b84b2007cea426e327d2483238f040c49405a6bf4074f605f0156c91a47a"},
|
||||||
|
{file = "mypy-0.931-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ff3bf387c14c805ab1388185dd22d6b210824e164d4bb324b195ff34e322d166"},
|
||||||
|
{file = "mypy-0.931-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5b56154f8c09427bae082b32275a21f500b24d93c88d69a5e82f3978018a0266"},
|
||||||
|
{file = "mypy-0.931-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8ca7f8c4b1584d63c9a0f827c37ba7a47226c19a23a753d52e5b5eddb201afcd"},
|
||||||
|
{file = "mypy-0.931-cp39-cp39-win_amd64.whl", hash = "sha256:74f7eccbfd436abe9c352ad9fb65872cc0f1f0a868e9d9c44db0893440f0c697"},
|
||||||
|
{file = "mypy-0.931-py3-none-any.whl", hash = "sha256:1171f2e0859cfff2d366da2c7092b06130f232c636a3f7301e3feb8b41f6377d"},
|
||||||
|
{file = "mypy-0.931.tar.gz", hash = "sha256:0038b21890867793581e4cb0d810829f5fd4441aa75796b53033af3aa30430ce"},
|
||||||
|
]
|
||||||
|
mypy-extensions = [
|
||||||
|
{file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"},
|
||||||
|
{file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"},
|
||||||
|
]
|
||||||
|
packaging = [
|
||||||
|
{file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
|
||||||
|
{file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
|
||||||
|
]
|
||||||
|
parso = [
|
||||||
|
{file = "parso-0.8.3-py2.py3-none-any.whl", hash = "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75"},
|
||||||
|
{file = "parso-0.8.3.tar.gz", hash = "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0"},
|
||||||
|
]
|
||||||
|
pathspec = [
|
||||||
|
{file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"},
|
||||||
|
{file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"},
|
||||||
|
]
|
||||||
|
pexpect = [
|
||||||
|
{file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"},
|
||||||
|
{file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"},
|
||||||
|
]
|
||||||
|
pickleshare = [
|
||||||
|
{file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"},
|
||||||
|
{file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"},
|
||||||
|
]
|
||||||
|
platformdirs = [
|
||||||
|
{file = "platformdirs-2.5.0-py3-none-any.whl", hash = "sha256:30671902352e97b1eafd74ade8e4a694782bd3471685e78c32d0fdfd3aa7e7bb"},
|
||||||
|
{file = "platformdirs-2.5.0.tar.gz", hash = "sha256:8ec11dfba28ecc0715eb5fb0147a87b1bf325f349f3da9aab2cd6b50b96b692b"},
|
||||||
|
]
|
||||||
|
pluggy = [
|
||||||
|
{file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
|
||||||
|
{file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
|
||||||
|
]
|
||||||
|
prompt-toolkit = [
|
||||||
|
{file = "prompt_toolkit-3.0.27-py3-none-any.whl", hash = "sha256:cb7dae7d2c59188c85a1d6c944fad19aded6a26bd9c8ae115a4e1c20eb90b713"},
|
||||||
|
{file = "prompt_toolkit-3.0.27.tar.gz", hash = "sha256:f2b6a8067a4fb959d3677d1ed764cc4e63e0f6f565b9a4fc7edc2b18bf80217b"},
|
||||||
|
]
|
||||||
|
ptyprocess = [
|
||||||
|
{file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
|
||||||
|
{file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"},
|
||||||
|
]
|
||||||
|
py = [
|
||||||
|
{file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
|
||||||
|
{file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
|
||||||
|
]
|
||||||
|
pycodestyle = [
|
||||||
|
{file = "pycodestyle-2.8.0-py2.py3-none-any.whl", hash = "sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20"},
|
||||||
|
{file = "pycodestyle-2.8.0.tar.gz", hash = "sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"},
|
||||||
|
]
|
||||||
|
pyflakes = [
|
||||||
|
{file = "pyflakes-2.4.0-py2.py3-none-any.whl", hash = "sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"},
|
||||||
|
{file = "pyflakes-2.4.0.tar.gz", hash = "sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c"},
|
||||||
|
]
|
||||||
|
pygments = [
|
||||||
|
{file = "Pygments-2.11.2-py3-none-any.whl", hash = "sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65"},
|
||||||
|
{file = "Pygments-2.11.2.tar.gz", hash = "sha256:4e426f72023d88d03b2fa258de560726ce890ff3b630f88c21cbb8b2503b8c6a"},
|
||||||
|
]
|
||||||
|
pyparsing = [
|
||||||
|
{file = "pyparsing-3.0.7-py3-none-any.whl", hash = "sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484"},
|
||||||
|
{file = "pyparsing-3.0.7.tar.gz", hash = "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea"},
|
||||||
|
]
|
||||||
|
pytest = [
|
||||||
|
{file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"},
|
||||||
|
{file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"},
|
||||||
|
]
|
||||||
|
regex = [
|
||||||
|
{file = "regex-2022.1.18-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:34316bf693b1d2d29c087ee7e4bb10cdfa39da5f9c50fa15b07489b4ab93a1b5"},
|
||||||
|
{file = "regex-2022.1.18-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a0b9f6a1a15d494b35f25ed07abda03209fa76c33564c09c9e81d34f4b919d7"},
|
||||||
|
{file = "regex-2022.1.18-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f99112aed4fb7cee00c7f77e8b964a9b10f69488cdff626ffd797d02e2e4484f"},
|
||||||
|
{file = "regex-2022.1.18-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a2bf98ac92f58777c0fafc772bf0493e67fcf677302e0c0a630ee517a43b949"},
|
||||||
|
{file = "regex-2022.1.18-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8618d9213a863c468a865e9d2ec50221015f7abf52221bc927152ef26c484b4c"},
|
||||||
|
{file = "regex-2022.1.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b52cc45e71657bc4743a5606d9023459de929b2a198d545868e11898ba1c3f59"},
|
||||||
|
{file = "regex-2022.1.18-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e12949e5071c20ec49ef00c75121ed2b076972132fc1913ddf5f76cae8d10b4"},
|
||||||
|
{file = "regex-2022.1.18-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b02e3e72665cd02afafb933453b0c9f6c59ff6e3708bd28d0d8580450e7e88af"},
|
||||||
|
{file = "regex-2022.1.18-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:abfcb0ef78df0ee9df4ea81f03beea41849340ce33a4c4bd4dbb99e23ec781b6"},
|
||||||
|
{file = "regex-2022.1.18-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6213713ac743b190ecbf3f316d6e41d099e774812d470422b3a0f137ea635832"},
|
||||||
|
{file = "regex-2022.1.18-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:61ebbcd208d78658b09e19c78920f1ad38936a0aa0f9c459c46c197d11c580a0"},
|
||||||
|
{file = "regex-2022.1.18-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:b013f759cd69cb0a62de954d6d2096d648bc210034b79b1881406b07ed0a83f9"},
|
||||||
|
{file = "regex-2022.1.18-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9187500d83fd0cef4669385cbb0961e227a41c0c9bc39219044e35810793edf7"},
|
||||||
|
{file = "regex-2022.1.18-cp310-cp310-win32.whl", hash = "sha256:94c623c331a48a5ccc7d25271399aff29729fa202c737ae3b4b28b89d2b0976d"},
|
||||||
|
{file = "regex-2022.1.18-cp310-cp310-win_amd64.whl", hash = "sha256:1a171eaac36a08964d023eeff740b18a415f79aeb212169080c170ec42dd5184"},
|
||||||
|
{file = "regex-2022.1.18-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:49810f907dfe6de8da5da7d2b238d343e6add62f01a15d03e2195afc180059ed"},
|
||||||
|
{file = "regex-2022.1.18-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d2f5c3f7057530afd7b739ed42eb04f1011203bc5e4663e1e1d01bb50f813e3"},
|
||||||
|
{file = "regex-2022.1.18-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:85ffd6b1cb0dfb037ede50ff3bef80d9bf7fa60515d192403af6745524524f3b"},
|
||||||
|
{file = "regex-2022.1.18-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba37f11e1d020969e8a779c06b4af866ffb6b854d7229db63c5fdddfceaa917f"},
|
||||||
|
{file = "regex-2022.1.18-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637e27ea1ebe4a561db75a880ac659ff439dec7f55588212e71700bb1ddd5af9"},
|
||||||
|
{file = "regex-2022.1.18-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:37978254d9d00cda01acc1997513f786b6b971e57b778fbe7c20e30ae81a97f3"},
|
||||||
|
{file = "regex-2022.1.18-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e54a1eb9fd38f2779e973d2f8958fd575b532fe26013405d1afb9ee2374e7ab8"},
|
||||||
|
{file = "regex-2022.1.18-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:768632fd8172ae03852e3245f11c8a425d95f65ff444ce46b3e673ae5b057b74"},
|
||||||
|
{file = "regex-2022.1.18-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:de2923886b5d3214be951bc2ce3f6b8ac0d6dfd4a0d0e2a4d2e5523d8046fdfb"},
|
||||||
|
{file = "regex-2022.1.18-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:1333b3ce73269f986b1fa4d5d395643810074dc2de5b9d262eb258daf37dc98f"},
|
||||||
|
{file = "regex-2022.1.18-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:d19a34f8a3429bd536996ad53597b805c10352a8561d8382e05830df389d2b43"},
|
||||||
|
{file = "regex-2022.1.18-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:8d2f355a951f60f0843f2368b39970e4667517e54e86b1508e76f92b44811a8a"},
|
||||||
|
{file = "regex-2022.1.18-cp36-cp36m-win32.whl", hash = "sha256:2245441445099411b528379dee83e56eadf449db924648e5feb9b747473f42e3"},
|
||||||
|
{file = "regex-2022.1.18-cp36-cp36m-win_amd64.whl", hash = "sha256:25716aa70a0d153cd844fe861d4f3315a6ccafce22b39d8aadbf7fcadff2b633"},
|
||||||
|
{file = "regex-2022.1.18-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7e070d3aef50ac3856f2ef5ec7214798453da878bb5e5a16c16a61edf1817cc3"},
|
||||||
|
{file = "regex-2022.1.18-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22709d701e7037e64dae2a04855021b62efd64a66c3ceed99dfd684bfef09e38"},
|
||||||
|
{file = "regex-2022.1.18-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9099bf89078675c372339011ccfc9ec310310bf6c292b413c013eb90ffdcafc"},
|
||||||
|
{file = "regex-2022.1.18-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04611cc0f627fc4a50bc4a9a2e6178a974c6a6a4aa9c1cca921635d2c47b9c87"},
|
||||||
|
{file = "regex-2022.1.18-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:552a39987ac6655dad4bf6f17dd2b55c7b0c6e949d933b8846d2e312ee80005a"},
|
||||||
|
{file = "regex-2022.1.18-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e031899cb2bc92c0cf4d45389eff5b078d1936860a1be3aa8c94fa25fb46ed8"},
|
||||||
|
{file = "regex-2022.1.18-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2dacb3dae6b8cc579637a7b72f008bff50a94cde5e36e432352f4ca57b9e54c4"},
|
||||||
|
{file = "regex-2022.1.18-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e5c31d70a478b0ca22a9d2d76d520ae996214019d39ed7dd93af872c7f301e52"},
|
||||||
|
{file = "regex-2022.1.18-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bb804c7d0bfbd7e3f33924ff49757de9106c44e27979e2492819c16972ec0da2"},
|
||||||
|
{file = "regex-2022.1.18-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:36b2d700a27e168fa96272b42d28c7ac3ff72030c67b32f37c05616ebd22a202"},
|
||||||
|
{file = "regex-2022.1.18-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:16f81025bb3556eccb0681d7946e2b35ff254f9f888cff7d2120e8826330315c"},
|
||||||
|
{file = "regex-2022.1.18-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:da80047524eac2acf7c04c18ac7a7da05a9136241f642dd2ed94269ef0d0a45a"},
|
||||||
|
{file = "regex-2022.1.18-cp37-cp37m-win32.whl", hash = "sha256:6ca45359d7a21644793de0e29de497ef7f1ae7268e346c4faf87b421fea364e6"},
|
||||||
|
{file = "regex-2022.1.18-cp37-cp37m-win_amd64.whl", hash = "sha256:38289f1690a7e27aacd049e420769b996826f3728756859420eeee21cc857118"},
|
||||||
|
{file = "regex-2022.1.18-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6014038f52b4b2ac1fa41a58d439a8a00f015b5c0735a0cd4b09afe344c94899"},
|
||||||
|
{file = "regex-2022.1.18-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0b5d6f9aed3153487252d00a18e53f19b7f52a1651bc1d0c4b5844bc286dfa52"},
|
||||||
|
{file = "regex-2022.1.18-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9d24b03daf7415f78abc2d25a208f234e2c585e5e6f92f0204d2ab7b9ab48e3"},
|
||||||
|
{file = "regex-2022.1.18-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bf594cc7cc9d528338d66674c10a5b25e3cde7dd75c3e96784df8f371d77a298"},
|
||||||
|
{file = "regex-2022.1.18-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd914db437ec25bfa410f8aa0aa2f3ba87cdfc04d9919d608d02330947afaeab"},
|
||||||
|
{file = "regex-2022.1.18-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90b6840b6448203228a9d8464a7a0d99aa8fa9f027ef95fe230579abaf8a6ee1"},
|
||||||
|
{file = "regex-2022.1.18-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11772be1eb1748e0e197a40ffb82fb8fd0d6914cd147d841d9703e2bef24d288"},
|
||||||
|
{file = "regex-2022.1.18-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a602bdc8607c99eb5b391592d58c92618dcd1537fdd87df1813f03fed49957a6"},
|
||||||
|
{file = "regex-2022.1.18-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7e26eac9e52e8ce86f915fd33380f1b6896a2b51994e40bb094841e5003429b4"},
|
||||||
|
{file = "regex-2022.1.18-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:519c0b3a6fbb68afaa0febf0d28f6c4b0a1074aefc484802ecb9709faf181607"},
|
||||||
|
{file = "regex-2022.1.18-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3c7ea86b9ca83e30fa4d4cd0eaf01db3ebcc7b2726a25990966627e39577d729"},
|
||||||
|
{file = "regex-2022.1.18-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:51f02ca184518702975b56affde6c573ebad4e411599005ce4468b1014b4786c"},
|
||||||
|
{file = "regex-2022.1.18-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:385ccf6d011b97768a640e9d4de25412204fbe8d6b9ae39ff115d4ff03f6fe5d"},
|
||||||
|
{file = "regex-2022.1.18-cp38-cp38-win32.whl", hash = "sha256:1f8c0ae0a0de4e19fddaaff036f508db175f6f03db318c80bbc239a1def62d02"},
|
||||||
|
{file = "regex-2022.1.18-cp38-cp38-win_amd64.whl", hash = "sha256:760c54ad1b8a9b81951030a7e8e7c3ec0964c1cb9fee585a03ff53d9e531bb8e"},
|
||||||
|
{file = "regex-2022.1.18-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:93c20777a72cae8620203ac11c4010365706062aa13aaedd1a21bb07adbb9d5d"},
|
||||||
|
{file = "regex-2022.1.18-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6aa427c55a0abec450bca10b64446331b5ca8f79b648531138f357569705bc4a"},
|
||||||
|
{file = "regex-2022.1.18-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c38baee6bdb7fe1b110b6b3aaa555e6e872d322206b7245aa39572d3fc991ee4"},
|
||||||
|
{file = "regex-2022.1.18-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:752e7ddfb743344d447367baa85bccd3629c2c3940f70506eb5f01abce98ee68"},
|
||||||
|
{file = "regex-2022.1.18-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8acef4d8a4353f6678fd1035422a937c2170de58a2b29f7da045d5249e934101"},
|
||||||
|
{file = "regex-2022.1.18-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c73d2166e4b210b73d1429c4f1ca97cea9cc090e5302df2a7a0a96ce55373f1c"},
|
||||||
|
{file = "regex-2022.1.18-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24c89346734a4e4d60ecf9b27cac4c1fee3431a413f7aa00be7c4d7bbacc2c4d"},
|
||||||
|
{file = "regex-2022.1.18-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:596f5ae2eeddb79b595583c2e0285312b2783b0ec759930c272dbf02f851ff75"},
|
||||||
|
{file = "regex-2022.1.18-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ecfe51abf7f045e0b9cdde71ca9e153d11238679ef7b5da6c82093874adf3338"},
|
||||||
|
{file = "regex-2022.1.18-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1d6301f5288e9bdca65fab3de6b7de17362c5016d6bf8ee4ba4cbe833b2eda0f"},
|
||||||
|
{file = "regex-2022.1.18-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:93cce7d422a0093cfb3606beae38a8e47a25232eea0f292c878af580a9dc7605"},
|
||||||
|
{file = "regex-2022.1.18-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cf0db26a1f76aa6b3aa314a74b8facd586b7a5457d05b64f8082a62c9c49582a"},
|
||||||
|
{file = "regex-2022.1.18-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:defa0652696ff0ba48c8aff5a1fac1eef1ca6ac9c660b047fc8e7623c4eb5093"},
|
||||||
|
{file = "regex-2022.1.18-cp39-cp39-win32.whl", hash = "sha256:6db1b52c6f2c04fafc8da17ea506608e6be7086715dab498570c3e55e4f8fbd1"},
|
||||||
|
{file = "regex-2022.1.18-cp39-cp39-win_amd64.whl", hash = "sha256:ebaeb93f90c0903233b11ce913a7cb8f6ee069158406e056f884854c737d2442"},
|
||||||
|
{file = "regex-2022.1.18.tar.gz", hash = "sha256:97f32dc03a8054a4c4a5ab5d761ed4861e828b2c200febd4e46857069a483916"},
|
||||||
|
]
|
||||||
|
toml = [
|
||||||
|
{file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
|
||||||
|
{file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
|
||||||
|
]
|
||||||
|
tomli = [
|
||||||
|
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
|
||||||
|
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
|
||||||
|
]
|
||||||
|
traitlets = [
|
||||||
|
{file = "traitlets-5.1.1-py3-none-any.whl", hash = "sha256:2d313cc50a42cd6c277e7d7dc8d4d7fedd06a2c215f78766ae7b1a66277e0033"},
|
||||||
|
{file = "traitlets-5.1.1.tar.gz", hash = "sha256:059f456c5a7c1c82b98c2e8c799f39c9b8128f6d0d46941ee118daace9eb70c7"},
|
||||||
|
]
|
||||||
|
typed-ast = [
|
||||||
|
{file = "typed_ast-1.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:183b183b7771a508395d2cbffd6db67d6ad52958a5fdc99f450d954003900266"},
|
||||||
|
{file = "typed_ast-1.5.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:676d051b1da67a852c0447621fdd11c4e104827417bf216092ec3e286f7da596"},
|
||||||
|
{file = "typed_ast-1.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc2542e83ac8399752bc16e0b35e038bdb659ba237f4222616b4e83fb9654985"},
|
||||||
|
{file = "typed_ast-1.5.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:74cac86cc586db8dfda0ce65d8bcd2bf17b58668dfcc3652762f3ef0e6677e76"},
|
||||||
|
{file = "typed_ast-1.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:18fe320f354d6f9ad3147859b6e16649a0781425268c4dde596093177660e71a"},
|
||||||
|
{file = "typed_ast-1.5.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:31d8c6b2df19a777bc8826770b872a45a1f30cfefcfd729491baa5237faae837"},
|
||||||
|
{file = "typed_ast-1.5.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:963a0ccc9a4188524e6e6d39b12c9ca24cc2d45a71cfdd04a26d883c922b4b78"},
|
||||||
|
{file = "typed_ast-1.5.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0eb77764ea470f14fcbb89d51bc6bbf5e7623446ac4ed06cbd9ca9495b62e36e"},
|
||||||
|
{file = "typed_ast-1.5.2-cp36-cp36m-win_amd64.whl", hash = "sha256:294a6903a4d087db805a7656989f613371915fc45c8cc0ddc5c5a0a8ad9bea4d"},
|
||||||
|
{file = "typed_ast-1.5.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:26a432dc219c6b6f38be20a958cbe1abffcc5492821d7e27f08606ef99e0dffd"},
|
||||||
|
{file = "typed_ast-1.5.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7407cfcad702f0b6c0e0f3e7ab876cd1d2c13b14ce770e412c0c4b9728a0f88"},
|
||||||
|
{file = "typed_ast-1.5.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f30ddd110634c2d7534b2d4e0e22967e88366b0d356b24de87419cc4410c41b7"},
|
||||||
|
{file = "typed_ast-1.5.2-cp37-cp37m-win_amd64.whl", hash = "sha256:8c08d6625bb258179b6e512f55ad20f9dfef019bbfbe3095247401e053a3ea30"},
|
||||||
|
{file = "typed_ast-1.5.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:90904d889ab8e81a956f2c0935a523cc4e077c7847a836abee832f868d5c26a4"},
|
||||||
|
{file = "typed_ast-1.5.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bbebc31bf11762b63bf61aaae232becb41c5bf6b3461b80a4df7e791fabb3aca"},
|
||||||
|
{file = "typed_ast-1.5.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c29dd9a3a9d259c9fa19d19738d021632d673f6ed9b35a739f48e5f807f264fb"},
|
||||||
|
{file = "typed_ast-1.5.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:58ae097a325e9bb7a684572d20eb3e1809802c5c9ec7108e85da1eb6c1a3331b"},
|
||||||
|
{file = "typed_ast-1.5.2-cp38-cp38-win_amd64.whl", hash = "sha256:da0a98d458010bf4fe535f2d1e367a2e2060e105978873c04c04212fb20543f7"},
|
||||||
|
{file = "typed_ast-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:33b4a19ddc9fc551ebabca9765d54d04600c4a50eda13893dadf67ed81d9a098"},
|
||||||
|
{file = "typed_ast-1.5.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1098df9a0592dd4c8c0ccfc2e98931278a6c6c53cb3a3e2cf7e9ee3b06153344"},
|
||||||
|
{file = "typed_ast-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42c47c3b43fe3a39ddf8de1d40dbbfca60ac8530a36c9b198ea5b9efac75c09e"},
|
||||||
|
{file = "typed_ast-1.5.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f290617f74a610849bd8f5514e34ae3d09eafd521dceaa6cf68b3f4414266d4e"},
|
||||||
|
{file = "typed_ast-1.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:df05aa5b241e2e8045f5f4367a9f6187b09c4cdf8578bb219861c4e27c443db5"},
|
||||||
|
{file = "typed_ast-1.5.2.tar.gz", hash = "sha256:525a2d4088e70a9f75b08b3f87a51acc9cde640e19cc523c7e41aa355564ae27"},
|
||||||
|
]
|
||||||
|
types-setuptools = [
|
||||||
|
{file = "types-setuptools-57.4.9.tar.gz", hash = "sha256:536ef74744f8e1e4be4fc719887f886e74e4cf3c792b4a06984320be4df450b5"},
|
||||||
|
{file = "types_setuptools-57.4.9-py3-none-any.whl", hash = "sha256:948dc6863373750e2cd0b223a84f1fb608414cde5e55cf38ea657b93aeb411d2"},
|
||||||
|
]
|
||||||
|
typing-extensions = [
|
||||||
|
{file = "typing_extensions-4.0.1-py3-none-any.whl", hash = "sha256:7f001e5ac290a0c0401508864c7ec868be4e701886d5b573a9528ed3973d9d3b"},
|
||||||
|
{file = "typing_extensions-4.0.1.tar.gz", hash = "sha256:4ca091dea149f945ec56afb48dae714f21e8692ef22a395223bcd328961b6a0e"},
|
||||||
|
]
|
||||||
|
wcwidth = [
|
||||||
|
{file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"},
|
||||||
|
{file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"},
|
||||||
|
]
|
||||||
|
zipp = [
|
||||||
|
{file = "zipp-3.7.0-py3-none-any.whl", hash = "sha256:b47250dd24f92b7dd6a0a8fc5244da14608f3ca90a5efcd37a3b1642fac9a375"},
|
||||||
|
{file = "zipp-3.7.0.tar.gz", hash = "sha256:9f50f446828eb9d45b267433fd3e9da8d801f614129124863f9c51ebceafb87d"},
|
||||||
|
]
|
@ -10,7 +10,8 @@ python = "^3.7"
|
|||||||
msgpack = ">= 1.0"
|
msgpack = ">= 1.0"
|
||||||
langcodes = ">= 3.0"
|
langcodes = ">= 3.0"
|
||||||
regex = ">= 2020.04.04"
|
regex = ">= 2020.04.04"
|
||||||
ftfy = ">= 3.0"
|
ftfy = ">= 6.1"
|
||||||
|
mypy = "^0.931"
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
[tool.poetry.dev-dependencies]
|
||||||
pytest = "^6.2.5"
|
pytest = "^6.2.5"
|
||||||
@ -20,6 +21,8 @@ ipadic = "^1.0.0"
|
|||||||
mecab-ko-dic = "^1.0.0"
|
mecab-ko-dic = "^1.0.0"
|
||||||
ipython = ">=7"
|
ipython = ">=7"
|
||||||
black = "^22.1.0"
|
black = "^22.1.0"
|
||||||
|
flake8 = "^4.0.1"
|
||||||
|
types-setuptools = "^57.4.9"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core>=1.0.0"]
|
requires = ["poetry-core>=1.0.0"]
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from pkg_resources import resource_filename
|
from pkg_resources import resource_filename
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
from typing import List, Dict, Iterator, Tuple
|
||||||
import langcodes
|
import langcodes
|
||||||
import msgpack
|
import msgpack
|
||||||
import gzip
|
import gzip
|
||||||
@ -12,7 +13,7 @@ import warnings
|
|||||||
|
|
||||||
from .tokens import tokenize, simple_tokenize, lossy_tokenize
|
from .tokens import tokenize, simple_tokenize, lossy_tokenize
|
||||||
from .language_info import get_language_info
|
from .language_info import get_language_info
|
||||||
from .preprocess import num_generic_digits
|
from .numbers import digit_freq
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -34,7 +35,7 @@ tokenize = tokenize
|
|||||||
simple_tokenize = simple_tokenize
|
simple_tokenize = simple_tokenize
|
||||||
|
|
||||||
|
|
||||||
def read_cBpack(filename):
|
def read_cBpack(filename: str) -> List[List[str]]:
|
||||||
"""
|
"""
|
||||||
Read a file from an idiosyncratic format that we use for storing
|
Read a file from an idiosyncratic format that we use for storing
|
||||||
approximate word frequencies, called "cBpack".
|
approximate word frequencies, called "cBpack".
|
||||||
@ -87,7 +88,7 @@ def read_cBpack(filename):
|
|||||||
return data[1:]
|
return data[1:]
|
||||||
|
|
||||||
|
|
||||||
def available_languages(wordlist="best"):
|
def available_languages(wordlist: str = "best") -> Dict[str, str]:
|
||||||
"""
|
"""
|
||||||
Given a wordlist name, return a dictionary of language codes to filenames,
|
Given a wordlist name, return a dictionary of language codes to filenames,
|
||||||
representing all the languages in which that wordlist is available.
|
representing all the languages in which that wordlist is available.
|
||||||
@ -111,7 +112,9 @@ def available_languages(wordlist="best"):
|
|||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=None)
|
@lru_cache(maxsize=None)
|
||||||
def get_frequency_list(lang, wordlist="best", match_cutoff=None):
|
def get_frequency_list(
|
||||||
|
lang: str, wordlist: str = "best", match_cutoff: None = None
|
||||||
|
) -> List[List[str]]:
|
||||||
"""
|
"""
|
||||||
Read the raw data from a wordlist file, returning it as a list of
|
Read the raw data from a wordlist file, returning it as a list of
|
||||||
lists. (See `read_cBpack` for what this represents.)
|
lists. (See `read_cBpack` for what this represents.)
|
||||||
@ -125,10 +128,9 @@ def get_frequency_list(lang, wordlist="best", match_cutoff=None):
|
|||||||
warnings.warn("The `match_cutoff` parameter is deprecated", DeprecationWarning)
|
warnings.warn("The `match_cutoff` parameter is deprecated", DeprecationWarning)
|
||||||
available = available_languages(wordlist)
|
available = available_languages(wordlist)
|
||||||
|
|
||||||
# TODO: decrease the maximum distance. This distance is so high just
|
# the max_distance is high because we unify scripts, such as Traditional
|
||||||
# because it allows a test where 'yue' matches 'zh', and maybe the
|
# vs. Simplified Chinese, in one wordlist
|
||||||
# distance between those is high because they shouldn't match.
|
best, _distance = langcodes.closest_match(lang, list(available), max_distance=60)
|
||||||
best, _distance = langcodes.closest_match(lang, list(available), max_distance=70)
|
|
||||||
if best == "und":
|
if best == "und":
|
||||||
raise LookupError("No wordlist %r available for language %r" % (wordlist, lang))
|
raise LookupError("No wordlist %r available for language %r" % (wordlist, lang))
|
||||||
|
|
||||||
@ -141,7 +143,7 @@ def get_frequency_list(lang, wordlist="best", match_cutoff=None):
|
|||||||
return read_cBpack(available[best])
|
return read_cBpack(available[best])
|
||||||
|
|
||||||
|
|
||||||
def cB_to_freq(cB):
|
def cB_to_freq(cB: int) -> float:
|
||||||
"""
|
"""
|
||||||
Convert a word frequency from the logarithmic centibel scale that we use
|
Convert a word frequency from the logarithmic centibel scale that we use
|
||||||
internally, to a proportion from 0 to 1.
|
internally, to a proportion from 0 to 1.
|
||||||
@ -157,7 +159,7 @@ def cB_to_freq(cB):
|
|||||||
return 10 ** (cB / 100)
|
return 10 ** (cB / 100)
|
||||||
|
|
||||||
|
|
||||||
def cB_to_zipf(cB):
|
def cB_to_zipf(cB: int) -> float:
|
||||||
"""
|
"""
|
||||||
Convert a word frequency from centibels to the Zipf scale
|
Convert a word frequency from centibels to the Zipf scale
|
||||||
(see `zipf_to_freq`).
|
(see `zipf_to_freq`).
|
||||||
@ -169,7 +171,7 @@ def cB_to_zipf(cB):
|
|||||||
return (cB + 900) / 100
|
return (cB + 900) / 100
|
||||||
|
|
||||||
|
|
||||||
def zipf_to_freq(zipf):
|
def zipf_to_freq(zipf: float) -> float:
|
||||||
"""
|
"""
|
||||||
Convert a word frequency from the Zipf scale to a proportion between 0 and
|
Convert a word frequency from the Zipf scale to a proportion between 0 and
|
||||||
1.
|
1.
|
||||||
@ -185,7 +187,7 @@ def zipf_to_freq(zipf):
|
|||||||
return 10**zipf / 1e9
|
return 10**zipf / 1e9
|
||||||
|
|
||||||
|
|
||||||
def freq_to_zipf(freq):
|
def freq_to_zipf(freq: float) -> float:
|
||||||
"""
|
"""
|
||||||
Convert a word frequency from a proportion between 0 and 1 to the
|
Convert a word frequency from a proportion between 0 and 1 to the
|
||||||
Zipf scale (see `zipf_to_freq`).
|
Zipf scale (see `zipf_to_freq`).
|
||||||
@ -194,7 +196,9 @@ def freq_to_zipf(freq):
|
|||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=None)
|
@lru_cache(maxsize=None)
|
||||||
def get_frequency_dict(lang, wordlist="best", match_cutoff=None):
|
def get_frequency_dict(
|
||||||
|
lang: str, wordlist: str = "best", match_cutoff: None = None
|
||||||
|
) -> Dict[str, float]:
|
||||||
"""
|
"""
|
||||||
Get a word frequency list as a dictionary, mapping tokens to
|
Get a word frequency list as a dictionary, mapping tokens to
|
||||||
frequencies as floating-point probabilities.
|
frequencies as floating-point probabilities.
|
||||||
@ -210,7 +214,7 @@ def get_frequency_dict(lang, wordlist="best", match_cutoff=None):
|
|||||||
return freqs
|
return freqs
|
||||||
|
|
||||||
|
|
||||||
def iter_wordlist(lang, wordlist="best"):
|
def iter_wordlist(lang: str, wordlist: str = "best") -> Iterator[str]:
|
||||||
"""
|
"""
|
||||||
Yield the words in a wordlist in approximate descending order of
|
Yield the words in a wordlist in approximate descending order of
|
||||||
frequency.
|
frequency.
|
||||||
@ -225,12 +229,12 @@ def iter_wordlist(lang, wordlist="best"):
|
|||||||
# This dict and inner function are used to implement a "drop everything" cache
|
# This dict and inner function are used to implement a "drop everything" cache
|
||||||
# for word_frequency(); the overheads of lru_cache() are comparable to the time
|
# for word_frequency(); the overheads of lru_cache() are comparable to the time
|
||||||
# it takes to look up frequencies from scratch, so something faster is needed.
|
# it takes to look up frequencies from scratch, so something faster is needed.
|
||||||
_wf_cache = {}
|
_wf_cache: Dict[Tuple[str, str, str, float], float] = {}
|
||||||
|
|
||||||
|
|
||||||
def _word_frequency(word, lang, wordlist, minimum):
|
def _word_frequency(word: str, lang: str, wordlist: str, minimum: float) -> float:
|
||||||
tokens = lossy_tokenize(word, lang)
|
tokens = lossy_tokenize(word, lang)
|
||||||
digits = num_generic_digits(word)
|
dfreq = digit_freq(word)
|
||||||
if not tokens:
|
if not tokens:
|
||||||
return minimum
|
return minimum
|
||||||
|
|
||||||
@ -245,7 +249,7 @@ def _word_frequency(word, lang, wordlist, minimum):
|
|||||||
# If any word is missing, just return the default value
|
# If any word is missing, just return the default value
|
||||||
return minimum
|
return minimum
|
||||||
# spread the frequency of digits over all digit combinations
|
# spread the frequency of digits over all digit combinations
|
||||||
freq = freqs[token] / (10.0**digits)
|
freq = freqs[token]
|
||||||
one_over_result += 1.0 / freq
|
one_over_result += 1.0 / freq
|
||||||
|
|
||||||
freq = 1.0 / one_over_result
|
freq = 1.0 / one_over_result
|
||||||
@ -266,7 +270,9 @@ def _word_frequency(word, lang, wordlist, minimum):
|
|||||||
return round(unrounded, leading_zeroes + 3)
|
return round(unrounded, leading_zeroes + 3)
|
||||||
|
|
||||||
|
|
||||||
def word_frequency(word, lang, wordlist="best", minimum=0.0):
|
def word_frequency(
|
||||||
|
word: str, lang: str, wordlist: str = "best", minimum: float = 0.0
|
||||||
|
) -> float:
|
||||||
"""
|
"""
|
||||||
Get the frequency of `word` in the language with code `lang`, from the
|
Get the frequency of `word` in the language with code `lang`, from the
|
||||||
specified `wordlist`.
|
specified `wordlist`.
|
||||||
@ -293,7 +299,7 @@ def word_frequency(word, lang, wordlist="best", minimum=0.0):
|
|||||||
return _wf_cache[args]
|
return _wf_cache[args]
|
||||||
|
|
||||||
|
|
||||||
def zipf_frequency(word, lang, wordlist="best", minimum=0.0):
|
def zipf_frequency(word: str, lang: str, wordlist: str = "best", minimum: float = 0.0):
|
||||||
"""
|
"""
|
||||||
Get the frequency of `word`, in the language with code `lang`, on the Zipf
|
Get the frequency of `word`, in the language with code `lang`, on the Zipf
|
||||||
scale.
|
scale.
|
||||||
@ -321,7 +327,9 @@ def zipf_frequency(word, lang, wordlist="best", minimum=0.0):
|
|||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=100)
|
@lru_cache(maxsize=100)
|
||||||
def top_n_list(lang, n, wordlist="best", ascii_only=False):
|
def top_n_list(
|
||||||
|
lang: str, n: int, wordlist: str = "best", ascii_only: bool = False
|
||||||
|
) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Return a frequency list of length `n` in descending order of frequency.
|
Return a frequency list of length `n` in descending order of frequency.
|
||||||
This list contains words from `wordlist`, of the given language.
|
This list contains words from `wordlist`, of the given language.
|
||||||
@ -337,8 +345,12 @@ def top_n_list(lang, n, wordlist="best", ascii_only=False):
|
|||||||
|
|
||||||
|
|
||||||
def random_words(
|
def random_words(
|
||||||
lang="en", wordlist="best", nwords=5, bits_per_word=12, ascii_only=False
|
lang: str = "en",
|
||||||
):
|
wordlist: str = "best",
|
||||||
|
nwords: int = 5,
|
||||||
|
bits_per_word: int = 12,
|
||||||
|
ascii_only: bool = False,
|
||||||
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Returns a string of random, space separated words.
|
Returns a string of random, space separated words.
|
||||||
|
|
||||||
@ -362,7 +374,9 @@ def random_words(
|
|||||||
return " ".join([random.choice(choices) for i in range(nwords)])
|
return " ".join([random.choice(choices) for i in range(nwords)])
|
||||||
|
|
||||||
|
|
||||||
def random_ascii_words(lang="en", wordlist="best", nwords=5, bits_per_word=12):
|
def random_ascii_words(
|
||||||
|
lang: str = "en", wordlist: str = "best", nwords: int = 5, bits_per_word: int = 12
|
||||||
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Returns a string of random, space separated, ASCII words.
|
Returns a string of random, space separated, ASCII words.
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from pkg_resources import resource_filename
|
from pkg_resources import resource_filename
|
||||||
|
from typing import List
|
||||||
import jieba
|
import jieba
|
||||||
import msgpack
|
import msgpack
|
||||||
import gzip
|
import gzip
|
||||||
@ -17,7 +18,7 @@ jieba_tokenizer = None
|
|||||||
jieba_orig_tokenizer = None
|
jieba_orig_tokenizer = None
|
||||||
|
|
||||||
|
|
||||||
def simplify_chinese(text):
|
def simplify_chinese(text: str) -> str:
|
||||||
"""
|
"""
|
||||||
Convert Chinese text character-by-character to Simplified Chinese, for the
|
Convert Chinese text character-by-character to Simplified Chinese, for the
|
||||||
purpose of looking up word frequencies.
|
purpose of looking up word frequencies.
|
||||||
@ -31,7 +32,7 @@ def simplify_chinese(text):
|
|||||||
return text.translate(SIMPLIFIED_MAP).casefold()
|
return text.translate(SIMPLIFIED_MAP).casefold()
|
||||||
|
|
||||||
|
|
||||||
def jieba_tokenize(text, external_wordlist=False):
|
def jieba_tokenize(text: str, external_wordlist: bool = False) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Tokenize the given text into tokens whose word frequencies can probably
|
Tokenize the given text into tokens whose word frequencies can probably
|
||||||
be looked up. This uses Jieba, a word-frequency-based tokenizer.
|
be looked up. This uses Jieba, a word-frequency-based tokenizer.
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from langcodes import Language, closest_match
|
from langcodes import Language, closest_match
|
||||||
|
from typing import List, Union
|
||||||
|
|
||||||
# Text in scripts written without spaces has to be handled specially in our
|
# Text in scripts written without spaces has to be handled specially in our
|
||||||
# tokenization regex (see TOKEN_RE in tokens.py). Also, when one of these is
|
# tokenization regex (see TOKEN_RE in tokens.py). Also, when one of these is
|
||||||
@ -44,7 +44,9 @@ EXTRA_JAPANESE_CHARACTERS = "ー々〻〆"
|
|||||||
# happens in ConceptNet.
|
# happens in ConceptNet.
|
||||||
|
|
||||||
|
|
||||||
def _language_in_list(language, targets, max_distance=10):
|
def _language_in_list(
|
||||||
|
language: Language, targets: List[str], max_distance: int = 10
|
||||||
|
) -> bool:
|
||||||
"""
|
"""
|
||||||
A helper function to determine whether this language matches one of the
|
A helper function to determine whether this language matches one of the
|
||||||
target languages, with a match score above a certain threshold.
|
target languages, with a match score above a certain threshold.
|
||||||
@ -57,7 +59,7 @@ def _language_in_list(language, targets, max_distance=10):
|
|||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=None)
|
@lru_cache(maxsize=None)
|
||||||
def get_language_info(language):
|
def get_language_info(language: Union[str, Language]) -> dict:
|
||||||
"""
|
"""
|
||||||
Looks up the things we need to know about how to handle text in a given
|
Looks up the things we need to know about how to handle text in a given
|
||||||
language. This will return a dictionary with the following fields:
|
language. This will return a dictionary with the following fields:
|
||||||
|
@ -1,14 +1,10 @@
|
|||||||
from pkg_resources import resource_filename
|
|
||||||
import MeCab
|
import MeCab
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import os
|
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
|
||||||
# MeCab has fixed-sized buffers for many things, including the dictionary path
|
def make_mecab_analyzer(lang: str) -> MeCab.Tagger:
|
||||||
MAX_PATH_LENGTH = 58
|
|
||||||
|
|
||||||
|
|
||||||
def make_mecab_analyzer(lang):
|
|
||||||
"""
|
"""
|
||||||
Get a MeCab analyzer object, given the language code of the language to
|
Get a MeCab analyzer object, given the language code of the language to
|
||||||
analyze.
|
analyze.
|
||||||
@ -22,14 +18,14 @@ def make_mecab_analyzer(lang):
|
|||||||
|
|
||||||
return MeCab.Tagger(ipadic.MECAB_ARGS)
|
return MeCab.Tagger(ipadic.MECAB_ARGS)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Can't run MeCab on language {lang}".format(lang))
|
raise ValueError(f"Can't run MeCab on language {lang}")
|
||||||
|
|
||||||
|
|
||||||
# The constructed analyzers will go in this dictionary.
|
# The constructed analyzers will go in this dictionary.
|
||||||
MECAB_ANALYZERS = {}
|
MECAB_ANALYZERS: Dict[str, MeCab.Tagger] = {}
|
||||||
|
|
||||||
|
|
||||||
def mecab_tokenize(text, lang):
|
def mecab_tokenize(text: str, lang: str) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Use the mecab-python3 package to tokenize the given text. The `lang`
|
Use the mecab-python3 package to tokenize the given text. The `lang`
|
||||||
must be 'ja' for Japanese or 'ko' for Korean.
|
must be 'ja' for Japanese or 'ko' for Korean.
|
||||||
|
72
wordfreq/numbers.py
Normal file
72
wordfreq/numbers.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
from .preprocess import MULTI_DIGIT_RE
|
||||||
|
|
||||||
|
# Frequencies of leading digits, according to Benford's law, sort of.
|
||||||
|
# Benford's law doesn't describe numbers with leading zeroes, because "007"
|
||||||
|
# and "7" are the same number, but for us they should have different frequencies.
|
||||||
|
# I added an estimate for the frequency of numbers with leading zeroes.
|
||||||
|
DIGIT_FREQS = [0.009, 0.300, 0.175, 0.124, 0.096, 0.078, 0.066, 0.057, 0.050, 0.045]
|
||||||
|
|
||||||
|
# Suppose you have a token NNNN, a 4-digit number representing a year. We're making
|
||||||
|
# a probability distribution of P(token=NNNN) | P(token is 4 digits).
|
||||||
|
#
|
||||||
|
# We do this with a piecewise exponential function whose peak is a plateau covering
|
||||||
|
# the years 2019 to 2039.
|
||||||
|
#
|
||||||
|
# YEAR_LOG_PEAK is chosen by experimentation to make this probability add up to about
|
||||||
|
# .994. Here, that represents P(token represents a year) | P(token is 4 digits).
|
||||||
|
# The other .006 represents P(token does not represent a year) | P(token is 4 digits).
|
||||||
|
|
||||||
|
YEAR_LOG_PEAK = -1.875
|
||||||
|
NOT_YEAR_PROB = 0.006
|
||||||
|
REFERENCE_YEAR = 2019
|
||||||
|
PLATEAU_WIDTH = 20
|
||||||
|
|
||||||
|
|
||||||
|
def benford_freq(text: str) -> float:
|
||||||
|
first_digit = int(text[0])
|
||||||
|
return DIGIT_FREQS[first_digit] / 10 ** (len(text) - 1)
|
||||||
|
|
||||||
|
|
||||||
|
def year_freq(text: str) -> float:
|
||||||
|
year = int(text)
|
||||||
|
|
||||||
|
# Fitting a line to the curve seen at
|
||||||
|
# https://twitter.com/r_speer/status/1493715982887571456.
|
||||||
|
|
||||||
|
if year <= REFERENCE_YEAR:
|
||||||
|
year_log_freq = YEAR_LOG_PEAK - 0.0083 * (REFERENCE_YEAR - year)
|
||||||
|
|
||||||
|
# It's no longer 2019, which is when the Google Books data was last collected.
|
||||||
|
# It's 2022 as I write this, and possibly even later as you're using it. Years
|
||||||
|
# keep happening.
|
||||||
|
#
|
||||||
|
# So, we'll just keep the expected frequency of the "present" year constant for
|
||||||
|
# 20 years.
|
||||||
|
|
||||||
|
elif REFERENCE_YEAR < year <= REFERENCE_YEAR + PLATEAU_WIDTH:
|
||||||
|
year_log_freq = YEAR_LOG_PEAK
|
||||||
|
|
||||||
|
# Fall off quickly to catch up with the actual frequency of future years
|
||||||
|
# (it's low). This curve is made up to fit with the made-up "present" data above.
|
||||||
|
else:
|
||||||
|
year_log_freq = YEAR_LOG_PEAK - 0.2 * (year - (REFERENCE_YEAR + PLATEAU_WIDTH))
|
||||||
|
|
||||||
|
year_prob = 10.0**year_log_freq
|
||||||
|
|
||||||
|
# If this token _doesn't_ represent a year, then use the Benford frequency
|
||||||
|
# distribution.
|
||||||
|
not_year_prob = NOT_YEAR_PROB * benford_freq(text)
|
||||||
|
return year_prob + not_year_prob
|
||||||
|
|
||||||
|
|
||||||
|
def digit_freq(text: str) -> float:
|
||||||
|
freq = 1.0
|
||||||
|
for match in MULTI_DIGIT_RE.findall(text):
|
||||||
|
if len(match) == 4:
|
||||||
|
freq *= year_freq(match)
|
||||||
|
else:
|
||||||
|
freq *= benford_freq(match)
|
||||||
|
return freq
|
||||||
|
|
||||||
|
|
||||||
|
print(sum(digit_freq("%04d" % year) for year in range(0, 10000)))
|
@ -3,6 +3,7 @@ import unicodedata
|
|||||||
|
|
||||||
from .language_info import get_language_info
|
from .language_info import get_language_info
|
||||||
from .transliterate import transliterate
|
from .transliterate import transliterate
|
||||||
|
from langcodes import Language
|
||||||
|
|
||||||
MARK_RE = regex.compile(r"[\p{Mn}\N{ARABIC TATWEEL}]", regex.V1)
|
MARK_RE = regex.compile(r"[\p{Mn}\N{ARABIC TATWEEL}]", regex.V1)
|
||||||
|
|
||||||
@ -10,7 +11,7 @@ DIGIT_RE = regex.compile(r"\d")
|
|||||||
MULTI_DIGIT_RE = regex.compile(r"\d[\d.,]+")
|
MULTI_DIGIT_RE = regex.compile(r"\d[\d.,]+")
|
||||||
|
|
||||||
|
|
||||||
def preprocess_text(text, language):
|
def preprocess_text(text: str, language: Language) -> str:
|
||||||
"""
|
"""
|
||||||
This function applies pre-processing steps that convert forms of words
|
This function applies pre-processing steps that convert forms of words
|
||||||
considered equivalent into one standardized form.
|
considered equivalent into one standardized form.
|
||||||
@ -196,7 +197,7 @@ def preprocess_text(text, language):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def remove_marks(text):
|
def remove_marks(text: str) -> str:
|
||||||
"""
|
"""
|
||||||
Remove decorations from words in abjad scripts:
|
Remove decorations from words in abjad scripts:
|
||||||
|
|
||||||
@ -208,7 +209,7 @@ def remove_marks(text):
|
|||||||
return MARK_RE.sub("", text)
|
return MARK_RE.sub("", text)
|
||||||
|
|
||||||
|
|
||||||
def casefold_with_i_dots(text):
|
def casefold_with_i_dots(text: str) -> str:
|
||||||
"""
|
"""
|
||||||
Convert capital I's and capital dotted İ's to lowercase in the way
|
Convert capital I's and capital dotted İ's to lowercase in the way
|
||||||
that's appropriate for Turkish and related languages, then case-fold
|
that's appropriate for Turkish and related languages, then case-fold
|
||||||
@ -218,7 +219,7 @@ def casefold_with_i_dots(text):
|
|||||||
return text.casefold()
|
return text.casefold()
|
||||||
|
|
||||||
|
|
||||||
def commas_to_cedillas(text):
|
def commas_to_cedillas(text: str) -> str:
|
||||||
"""
|
"""
|
||||||
Convert s and t with commas (ș and ț) to cedillas (ş and ţ), which is
|
Convert s and t with commas (ș and ț) to cedillas (ş and ţ), which is
|
||||||
preferred in Turkish.
|
preferred in Turkish.
|
||||||
@ -235,7 +236,7 @@ def commas_to_cedillas(text):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def cedillas_to_commas(text):
|
def cedillas_to_commas(text: str) -> str:
|
||||||
"""
|
"""
|
||||||
Convert s and t with cedillas (ş and ţ) to commas (ș and ț), which is
|
Convert s and t with cedillas (ş and ţ) to commas (ș and ț), which is
|
||||||
preferred in Romanian.
|
preferred in Romanian.
|
||||||
@ -252,7 +253,7 @@ def cedillas_to_commas(text):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _sub_zeroes(match):
|
def _sub_zeroes(match: regex.Match) -> str:
|
||||||
"""
|
"""
|
||||||
Given a regex match, return what it matched with digits replaced by
|
Given a regex match, return what it matched with digits replaced by
|
||||||
zeroes.
|
zeroes.
|
||||||
@ -260,15 +261,7 @@ def _sub_zeroes(match):
|
|||||||
return DIGIT_RE.sub("0", match.group(0))
|
return DIGIT_RE.sub("0", match.group(0))
|
||||||
|
|
||||||
|
|
||||||
def num_generic_digits(text):
|
def smash_numbers(text: str) -> str:
|
||||||
"""
|
|
||||||
Determine how many "generic digits" are in the text (digits that we
|
|
||||||
replace with 0 to combine numbers of the same length).
|
|
||||||
"""
|
|
||||||
return sum([len(match) for match in MULTI_DIGIT_RE.findall(text)])
|
|
||||||
|
|
||||||
|
|
||||||
def smash_numbers(text):
|
|
||||||
"""
|
"""
|
||||||
Replace sequences of multiple digits with zeroes, so we don't need to
|
Replace sequences of multiple digits with zeroes, so we don't need to
|
||||||
distinguish the frequencies of thousands of numbers.
|
distinguish the frequencies of thousands of numbers.
|
||||||
|
@ -2,6 +2,7 @@ import regex
|
|||||||
import unicodedata
|
import unicodedata
|
||||||
import logging
|
import logging
|
||||||
import langcodes
|
import langcodes
|
||||||
|
from typing import List
|
||||||
from ftfy.fixes import uncurl_quotes
|
from ftfy.fixes import uncurl_quotes
|
||||||
|
|
||||||
from .language_info import (
|
from .language_info import (
|
||||||
@ -20,7 +21,7 @@ _WARNED_LANGUAGES = set()
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def _make_spaceless_expr():
|
def _make_spaceless_expr() -> str:
|
||||||
scripts = sorted(SPACELESS_SCRIPTS)
|
scripts = sorted(SPACELESS_SCRIPTS)
|
||||||
pieces = [r"\p{IsIdeo}"] + [
|
pieces = [r"\p{IsIdeo}"] + [
|
||||||
r"\p{Script=%s}" % script_code for script_code in scripts
|
r"\p{Script=%s}" % script_code for script_code in scripts
|
||||||
@ -179,7 +180,7 @@ TOKEN_RE_WITH_PUNCTUATION = regex.compile(
|
|||||||
PUNCT_RE = regex.compile(r"[\p{punct}]+")
|
PUNCT_RE = regex.compile(r"[\p{punct}]+")
|
||||||
|
|
||||||
|
|
||||||
def simple_tokenize(text, include_punctuation=False):
|
def simple_tokenize(text: str, include_punctuation: bool = False) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Tokenize the given text using a straightforward, Unicode-aware token
|
Tokenize the given text using a straightforward, Unicode-aware token
|
||||||
expression.
|
expression.
|
||||||
@ -214,7 +215,12 @@ def simple_tokenize(text, include_punctuation=False):
|
|||||||
return [token.strip("'").casefold() for token in TOKEN_RE.findall(text)]
|
return [token.strip("'").casefold() for token in TOKEN_RE.findall(text)]
|
||||||
|
|
||||||
|
|
||||||
def tokenize(text, lang, include_punctuation=False, external_wordlist=False):
|
def tokenize(
|
||||||
|
text: str,
|
||||||
|
lang: str,
|
||||||
|
include_punctuation: bool = False,
|
||||||
|
external_wordlist: bool = False,
|
||||||
|
) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Tokenize this text in a way that's relatively simple but appropriate for
|
Tokenize this text in a way that's relatively simple but appropriate for
|
||||||
the language. Strings that are looked up in wordfreq will be run through
|
the language. Strings that are looked up in wordfreq will be run through
|
||||||
@ -255,15 +261,20 @@ def tokenize(text, lang, include_punctuation=False, external_wordlist=False):
|
|||||||
text = preprocess_text(text, language)
|
text = preprocess_text(text, language)
|
||||||
|
|
||||||
if info["tokenizer"] == "mecab":
|
if info["tokenizer"] == "mecab":
|
||||||
from wordfreq.mecab import mecab_tokenize as _mecab_tokenize
|
from wordfreq.mecab import mecab_tokenize
|
||||||
|
|
||||||
|
_mecab_tokenize = mecab_tokenize
|
||||||
|
|
||||||
# Get just the language code out of the Language object, so we can
|
# Get just the language code out of the Language object, so we can
|
||||||
# use it to select a MeCab dictionary
|
# use it to select a MeCab dictionary
|
||||||
|
assert language.language is not None
|
||||||
tokens = _mecab_tokenize(text, language.language)
|
tokens = _mecab_tokenize(text, language.language)
|
||||||
if not include_punctuation:
|
if not include_punctuation:
|
||||||
tokens = [token for token in tokens if not PUNCT_RE.match(token)]
|
tokens = [token for token in tokens if not PUNCT_RE.match(token)]
|
||||||
elif info["tokenizer"] == "jieba":
|
elif info["tokenizer"] == "jieba":
|
||||||
from wordfreq.chinese import jieba_tokenize as _jieba_tokenize
|
from wordfreq.chinese import jieba_tokenize
|
||||||
|
|
||||||
|
_jieba_tokenize = jieba_tokenize
|
||||||
|
|
||||||
tokens = _jieba_tokenize(text, external_wordlist=external_wordlist)
|
tokens = _jieba_tokenize(text, external_wordlist=external_wordlist)
|
||||||
if not include_punctuation:
|
if not include_punctuation:
|
||||||
@ -285,7 +296,12 @@ def tokenize(text, lang, include_punctuation=False, external_wordlist=False):
|
|||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
|
|
||||||
def lossy_tokenize(text, lang, include_punctuation=False, external_wordlist=False):
|
def lossy_tokenize(
|
||||||
|
text: str,
|
||||||
|
lang: str,
|
||||||
|
include_punctuation: bool = False,
|
||||||
|
external_wordlist: bool = False,
|
||||||
|
) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Get a list of tokens for this text, with largely the same results and
|
Get a list of tokens for this text, with largely the same results and
|
||||||
options as `tokenize`, but aggressively normalize some text in a lossy way
|
options as `tokenize`, but aggressively normalize some text in a lossy way
|
||||||
@ -312,7 +328,9 @@ def lossy_tokenize(text, lang, include_punctuation=False, external_wordlist=Fals
|
|||||||
tokens = tokenize(text, lang, include_punctuation, external_wordlist)
|
tokens = tokenize(text, lang, include_punctuation, external_wordlist)
|
||||||
|
|
||||||
if info["lookup_transliteration"] == "zh-Hans":
|
if info["lookup_transliteration"] == "zh-Hans":
|
||||||
from wordfreq.chinese import simplify_chinese as _simplify_chinese
|
from wordfreq.chinese import simplify_chinese
|
||||||
|
|
||||||
|
_simplify_chinese = simplify_chinese
|
||||||
|
|
||||||
tokens = [_simplify_chinese(token) for token in tokens]
|
tokens = [_simplify_chinese(token) for token in tokens]
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# This table comes from https://github.com/opendatakosovo/cyrillic-transliteration/blob/master/cyrtranslit/mapping.py,
|
# This table comes from
|
||||||
|
# https://github.com/opendatakosovo/cyrillic-transliteration/blob/master/cyrtranslit/mapping.py,
|
||||||
# from the 'cyrtranslit' module. We originally had to reimplement it because
|
# from the 'cyrtranslit' module. We originally had to reimplement it because
|
||||||
# 'cyrtranslit' didn't work in Python 3; now it does, but we've made the table
|
# 'cyrtranslit' didn't work in Python 3; now it does, but we've made the table
|
||||||
# more robust than the one in cyrtranslit.
|
# more robust than the one in cyrtranslit.
|
||||||
|
Loading…
Reference in New Issue
Block a user