From 7c6cf84749f5181483fcb5d15458d6b808fb2cab Mon Sep 17 00:00:00 2001 From: Robyn Speer Date: Wed, 13 May 2015 04:09:34 -0400 Subject: [PATCH] update README, another setup fix Former-commit-id: dd41e61c575758861b30b1fabc5af6fd595f1654 --- README.txt | 30 +++++++++++++++++++++++------- setup.py | 1 + 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/README.txt b/README.txt index e17da08..4db9b2a 100644 --- a/README.txt +++ b/README.txt @@ -4,16 +4,32 @@ Author: Robyn Speer ## License -`wordfreq` is freely redistributable under the MIT license. See -`MIT-LICENSE.txt`. +`wordfreq` is freely redistributable under the MIT license (see +`MIT-LICENSE.txt`), and it includes data files that may be +redistributed under a Creative Commons Attribution-ShareAlike 4.0 +license (https://creativecommons.org/licenses/by-sa/4.0/). -It contains data extracted from Google Books Ngrams -(http://books.google.com/ngrams). The terms of use of this data are: +`wordfreq` contains data extracted from Google Books Ngrams +(http://books.google.com/ngrams) and Google Books Syntactic Ngrams +(http://commondatastorage.googleapis.com/books/syntactic-ngrams/index.html). +The terms of use of this data are: Ngram Viewer graphs and data may be freely used for any purpose, although acknowledgement of Google Books Ngram Viewer as the source, and inclusion of a link to http://books.google.com/ngrams, would be appreciated. -It also contains data from the University of Leeds Centre for Translation -Studies (see http://corpus.leeds.ac.uk/list.html), which is freely -redistributable under a Creative Commons Attribution license. +It also contains data derived from the following Creative Commons-licensed +sources: + +- The Leeds Internet Corpus, from the University of Leeds Centre for Translation + Studies (http://corpus.leeds.ac.uk/list.html) + +- The OpenSubtitles Frequency Word Lists, by Invoke IT Limited + (https://invokeit.wordpress.com/frequency-word-lists/) + +- Wikipedia, the free encyclopedia (http://www.wikipedia.org) + +Some additional data was collected by a custom application that watches the +streaming Twitter API, in accordance with Twitter's Developer Agreement & +Policy. This software only gives statistics about words that are very commonly +used on Twitter; it does not display or republish any Twitter content. diff --git a/setup.py b/setup.py index 02ec1d9..68db8ea 100755 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ #!/usr/bin/env python from setuptools import setup +import sys import os classifiers = [