mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-25 02:05:24 +00:00
WIP on Ninja build automation
This commit is contained in:
parent
815d393b74
commit
14e445a937
28
wordfreq_builder/rules.ninja
Normal file
28
wordfreq_builder/rules.ninja
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
# This defines the rules on how to build parts of the wordfreq lists, using the
|
||||||
|
# Ninja build system:
|
||||||
|
#
|
||||||
|
# http://martine.github.io/ninja/manual.html
|
||||||
|
#
|
||||||
|
# Ninja is available in the 'ninja-build' Ubuntu package. It's like make with
|
||||||
|
# better parallelism and the ability for build steps to produce multiple
|
||||||
|
# outputs. The tradeoff is that its rule syntax isn't full of magic for
|
||||||
|
# expanding wildcards and finding dependencies, so in general you have to
|
||||||
|
# write the dependencies using a script.
|
||||||
|
#
|
||||||
|
# This file will become the header of the larger build.ninja file, which also
|
||||||
|
# contains the programatically-defined dependency graph.
|
||||||
|
|
||||||
|
# Variables
|
||||||
|
DATA = ./data
|
||||||
|
|
||||||
|
# Splits the single file $in into $slices parts, whose names will be
|
||||||
|
# $prefix plus a two-digit numeric suffix.
|
||||||
|
rule split
|
||||||
|
command = split -d -n $slices $in $prefix
|
||||||
|
|
||||||
|
# wiki2text is a tool I wrote using the development version of Nim, which
|
||||||
|
# extracts plain text from Wikipedia dumps obtained from dumps.wikimedia.org.
|
||||||
|
# The code is at https://github.com/rspeer/wiki2text, but right now it'll
|
||||||
|
# take a bit of setup to get it to run.
|
||||||
|
rule wiki2text
|
||||||
|
command = bunzip2 -c $in | wiki2text > $out
|
4
wordfreq_builder/wordfreq.cfg
Normal file
4
wordfreq_builder/wordfreq.cfg
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
[wordfreq]
|
||||||
|
version = 0.8
|
||||||
|
data_dir = ./data
|
||||||
|
languages = en, es, fr, de, pt, nl, ru, it, ko, ja, zh-TW, zh-CN, ar, ms
|
Loading…
Reference in New Issue
Block a user