mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 17:31:41 +00:00
put back the freqs_to_cBpack cutoff; prepare for 1.0
This commit is contained in:
parent
32102ba3c2
commit
c5708b24e4
2
setup.py
2
setup.py
@ -33,7 +33,7 @@ if sys.version_info < (3, 4):
|
||||
|
||||
setup(
|
||||
name="wordfreq",
|
||||
version='1.0b4',
|
||||
version='1.0',
|
||||
maintainer='Luminoso Technologies, Inc.',
|
||||
maintainer_email='info@luminoso.com',
|
||||
url='http://github.com/LuminosoInsight/wordfreq/',
|
||||
|
@ -69,10 +69,16 @@ def freqs_to_cBpack(in_filename, out_filename, cutoff=-600, lang=None):
|
||||
written to the new file.
|
||||
"""
|
||||
freq_cutoff = 10 ** (cutoff / 100.)
|
||||
# freq_cutoff will only be effective here if the data we're reading
|
||||
# is already normalized to frequencies. If we're reading counts,
|
||||
# it just won't matter. This is why we check for cB <= cutoff again
|
||||
# below.
|
||||
freqs = read_freqs(in_filename, freq_cutoff, lang=lang)
|
||||
cBpack = []
|
||||
for token, freq in freqs.items():
|
||||
cB = round(math.log10(freq) * 100)
|
||||
if cB <= cutoff:
|
||||
continue
|
||||
neg_cB = -cB
|
||||
while neg_cB >= len(cBpack):
|
||||
cBpack.append([])
|
||||
|
Loading…
Reference in New Issue
Block a user