2017-08-25 21:37:48 +00:00
|
|
|
"""
|
2020-10-01 20:05:43 +00:00
|
|
|
A quick script to output the top N words (500 for now) in each language.
|
2017-08-25 21:37:48 +00:00
|
|
|
You can send the output to a file and diff it to see changes between wordfreq
|
|
|
|
versions.
|
|
|
|
"""
|
|
|
|
import wordfreq
|
|
|
|
|
|
|
|
|
2020-10-01 20:05:43 +00:00
|
|
|
N = 500
|
2017-08-25 21:37:48 +00:00
|
|
|
|
2019-04-16 15:33:22 +00:00
|
|
|
if __name__ == '__main__':
|
|
|
|
for lang in sorted(wordfreq.available_languages()):
|
2020-10-01 20:05:43 +00:00
|
|
|
for word in wordfreq.top_n_list(lang, N):
|
2019-04-16 15:33:22 +00:00
|
|
|
print('{}\t{}'.format(lang, word))
|