mirror of
https://github.com/adambard/learnxinyminutes-docs.git
synced 2025-01-14 13:15:59 +00:00
Update awk.html.markdown
This commit is contained in:
parent
4c36ee6943
commit
c8284c9c95
@ -6,14 +6,15 @@ contributors:
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
AWK is a standard tool on every POSIX-compliant UNIX system. It's like a
|
AWK is a standard tool on every POSIX-compliant UNIX system. It's like
|
||||||
stripped-down Perl, perfect for text-processing tasks and other scripting
|
flex/lex, from the command-line, perfect for text-processing tasks and
|
||||||
needs. It has a C-like syntax, but without semicolons, manual memory
|
other scripting needs. It has a C-like syntax, but without mandatory
|
||||||
management, or static typing. It excels at text processing. You can call to it
|
semicolons (although, you should use them anyway, because they are required
|
||||||
from a shell script, or you can use it as a stand-alone scripting language.
|
when you're writing one-liners, something AWK excells at), manual memory
|
||||||
|
management, or static typing. It excels at text processing. You can call to
|
||||||
|
it from a shell script, or you can use it as a stand-alone scripting language.
|
||||||
|
|
||||||
Why use AWK instead of Perl? Mostly because AWK is part of UNIX. You can always
|
Why use AWK instead of Perl? Readability. AWK is easier to read
|
||||||
count on it, whereas Perl's future is in question. AWK is also easier to read
|
|
||||||
than Perl. For simple text-processing scripts, particularly ones that read
|
than Perl. For simple text-processing scripts, particularly ones that read
|
||||||
files line by line and split on delimiters, AWK is probably the right tool for
|
files line by line and split on delimiters, AWK is probably the right tool for
|
||||||
the job.
|
the job.
|
||||||
@ -23,8 +24,23 @@ the job.
|
|||||||
|
|
||||||
# Comments are like this
|
# Comments are like this
|
||||||
|
|
||||||
# AWK programs consist of a collection of patterns and actions. The most
|
|
||||||
# important pattern is called BEGIN. Actions go into brace blocks.
|
# AWK programs consist of a collection of patterns and actions.
|
||||||
|
pattern1 { action; } # just like lex
|
||||||
|
pattern2 { action; }
|
||||||
|
|
||||||
|
# There is an implied loop and AWK automatically reads and parses each
|
||||||
|
# record of each file supplied. Each record is split by the FS delimiter,
|
||||||
|
# which defaults to white-space (multiple spaces,tabs count as one)
|
||||||
|
# You cann assign FS either on the command line (-F C) or in your BEGIN
|
||||||
|
# pattern
|
||||||
|
|
||||||
|
# One of the special patterns is BEGIN. The BEGIN pattern is true
|
||||||
|
# BEFORE any of the files are read. The END pattern is true after
|
||||||
|
# an End-of-file from the last file (or standard-in if no files specified)
|
||||||
|
# There is also an output field separator (OFS) that you can assign, which
|
||||||
|
# defaults to a single space
|
||||||
|
|
||||||
BEGIN {
|
BEGIN {
|
||||||
|
|
||||||
# BEGIN will run at the beginning of the program. It's where you put all
|
# BEGIN will run at the beginning of the program. It's where you put all
|
||||||
@ -32,114 +48,116 @@ BEGIN {
|
|||||||
# have no text files, then think of BEGIN as the main entry point.
|
# have no text files, then think of BEGIN as the main entry point.
|
||||||
|
|
||||||
# Variables are global. Just set them or use them, no need to declare..
|
# Variables are global. Just set them or use them, no need to declare..
|
||||||
count = 0
|
count = 0;
|
||||||
|
|
||||||
# Operators just like in C and friends
|
# Operators just like in C and friends
|
||||||
a = count + 1
|
a = count + 1;
|
||||||
b = count - 1
|
b = count - 1;
|
||||||
c = count * 1
|
c = count * 1;
|
||||||
d = count / 1 # integer division
|
d = count / 1; # integer division
|
||||||
e = count % 1 # modulus
|
e = count % 1; # modulus
|
||||||
f = count ^ 1 # exponentiation
|
f = count ^ 1; # exponentiation
|
||||||
|
|
||||||
a += 1
|
a += 1;
|
||||||
b -= 1
|
b -= 1;
|
||||||
c *= 1
|
c *= 1;
|
||||||
d /= 1
|
d /= 1;
|
||||||
e %= 1
|
e %= 1;
|
||||||
f ^= 1
|
f ^= 1;
|
||||||
|
|
||||||
# Incrementing and decrementing by one
|
# Incrementing and decrementing by one
|
||||||
a++
|
a++;
|
||||||
b--
|
b--;
|
||||||
|
|
||||||
# As a prefix operator, it returns the incremented value
|
# As a prefix operator, it returns the incremented value
|
||||||
++a
|
++a;
|
||||||
--b
|
--b;
|
||||||
|
|
||||||
# Notice, also, no punctuation such as semicolons to terminate statements
|
# Notice, also, no punctuation such as semicolons to terminate statements
|
||||||
|
|
||||||
# Control statements
|
# Control statements
|
||||||
if (count == 0)
|
if (count == 0)
|
||||||
print "Starting with count of 0"
|
print "Starting with count of 0";
|
||||||
else
|
else
|
||||||
print "Huh?"
|
print "Huh?";
|
||||||
|
|
||||||
# Or you could use the ternary operator
|
# Or you could use the ternary operator
|
||||||
print (count == 0) ? "Starting with count of 0" : "Huh?"
|
print (count == 0) ? "Starting with count of 0" : "Huh?";
|
||||||
|
|
||||||
# Blocks consisting of multiple lines use braces
|
# Blocks consisting of multiple lines use braces
|
||||||
while (a < 10) {
|
while (a < 10) {
|
||||||
print "String concatenation is done" " with a series" " of"
|
print "String concatenation is done" " with a series" " of"
|
||||||
" space-separated strings"
|
" space-separated strings";
|
||||||
print a
|
print a;
|
||||||
|
|
||||||
a++
|
a++;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < 10; i++)
|
for (i = 0; i < 10; i++)
|
||||||
print "Good ol' for loop"
|
print "Good ol' for loop";
|
||||||
|
|
||||||
# As for comparisons, they're the standards:
|
# As for comparisons, they're the standards:
|
||||||
a < b # Less than
|
# a < b # Less than
|
||||||
a <= b # Less than or equal
|
# a <= b # Less than or equal
|
||||||
a != b # Not equal
|
# a != b # Not equal
|
||||||
a == b # Equal
|
# a == b # Equal
|
||||||
a > b # Greater than
|
# a > b # Greater than
|
||||||
a >= b # Greater than or equal
|
# a >= b # Greater than or equal
|
||||||
|
|
||||||
# Logical operators as well
|
# Logical operators as well
|
||||||
a && b # AND
|
# a && b # AND
|
||||||
a || b # OR
|
# a || b # OR
|
||||||
|
|
||||||
# In addition, there's the super useful regular expression match
|
# In addition, there's the super useful regular expression match
|
||||||
if ("foo" ~ "^fo+$")
|
if ("foo" ~ "^fo+$")
|
||||||
print "Fooey!"
|
print "Fooey!";
|
||||||
if ("boo" !~ "^fo+$")
|
if ("boo" !~ "^fo+$")
|
||||||
print "Boo!"
|
print "Boo!";
|
||||||
|
|
||||||
# Arrays
|
# Arrays
|
||||||
arr[0] = "foo"
|
arr[0] = "foo";
|
||||||
arr[1] = "bar"
|
arr[1] = "bar";
|
||||||
# Unfortunately, there is no other way to initialize an array. Ya just
|
|
||||||
# gotta chug through every value line by line like that.
|
|
||||||
|
|
||||||
# You also have associative arrays
|
# You can also initialize an array with the built-in function split()
|
||||||
assoc["foo"] = "bar"
|
|
||||||
assoc["bar"] = "baz"
|
n = split("foo:bar:baz", arr, ":");
|
||||||
|
|
||||||
|
# You also have associative arrays (actually, they're all associative arrays)
|
||||||
|
assoc["foo"] = "bar";
|
||||||
|
assoc["bar"] = "baz";
|
||||||
|
|
||||||
# And multi-dimensional arrays, with some limitations I won't mention here
|
# And multi-dimensional arrays, with some limitations I won't mention here
|
||||||
multidim[0,0] = "foo"
|
multidim[0,0] = "foo";
|
||||||
multidim[0,1] = "bar"
|
multidim[0,1] = "bar";
|
||||||
multidim[1,0] = "baz"
|
multidim[1,0] = "baz";
|
||||||
multidim[1,1] = "boo"
|
multidim[1,1] = "boo";
|
||||||
|
|
||||||
# You can test for array membership
|
# You can test for array membership
|
||||||
if ("foo" in assoc)
|
if ("foo" in assoc)
|
||||||
print "Fooey!"
|
print "Fooey!";
|
||||||
|
|
||||||
# You can also use the 'in' operator to traverse the keys of an array
|
# You can also use the 'in' operator to traverse the keys of an array
|
||||||
for (key in assoc)
|
for (key in assoc)
|
||||||
print assoc[key]
|
print assoc[key];
|
||||||
|
|
||||||
# The command line is in a special array called ARGV
|
# The command line is in a special array called ARGV
|
||||||
for (argnum in ARGV)
|
for (argnum in ARGV)
|
||||||
print ARGV[argnum]
|
print ARGV[argnum];
|
||||||
|
|
||||||
# You can remove elements of an array
|
# You can remove elements of an array
|
||||||
# This is particularly useful to prevent AWK from assuming the arguments
|
# This is particularly useful to prevent AWK from assuming the arguments
|
||||||
# are files for it to process
|
# are files for it to process
|
||||||
delete ARGV[1]
|
delete ARGV[1];
|
||||||
|
|
||||||
# The number of command line arguments is in a variable called ARGC
|
# The number of command line arguments is in a variable called ARGC
|
||||||
print ARGC
|
print ARGC;
|
||||||
|
|
||||||
# AWK has several built-in functions. They fall into three categories. I'll
|
# AWK has several built-in functions. They fall into three categories. I'll
|
||||||
# demonstrate each of them in their own functions, defined later.
|
# demonstrate each of them in their own functions, defined later.
|
||||||
|
|
||||||
return_value = arithmetic_functions(a, b, c)
|
return_value = arithmetic_functions(a, b, c);
|
||||||
string_functions()
|
string_functions();
|
||||||
io_functions()
|
io_functions();
|
||||||
}
|
}
|
||||||
|
|
||||||
# Here's how you define a function
|
# Here's how you define a function
|
||||||
@ -159,26 +177,26 @@ function arithmetic_functions(a, b, c, d) {
|
|||||||
# Now, to demonstrate the arithmetic functions
|
# Now, to demonstrate the arithmetic functions
|
||||||
|
|
||||||
# Most AWK implementations have some standard trig functions
|
# Most AWK implementations have some standard trig functions
|
||||||
localvar = sin(a)
|
localvar = sin(a);
|
||||||
localvar = cos(a)
|
localvar = cos(a);
|
||||||
localvar = atan2(b, a) # arc tangent of b / a
|
localvar = atan2(b, a); # arc tangent of b / a
|
||||||
|
|
||||||
# And logarithmic stuff
|
# And logarithmic stuff
|
||||||
localvar = exp(a)
|
localvar = exp(a);
|
||||||
localvar = log(a)
|
localvar = log(a);
|
||||||
|
|
||||||
# Square root
|
# Square root
|
||||||
localvar = sqrt(a)
|
localvar = sqrt(a);
|
||||||
|
|
||||||
# Truncate floating point to integer
|
# Truncate floating point to integer
|
||||||
localvar = int(5.34) # localvar => 5
|
localvar = int(5.34); # localvar => 5
|
||||||
|
|
||||||
# Random numbers
|
# Random numbers
|
||||||
srand() # Supply a seed as an argument. By default, it uses the time of day
|
srand(); # Supply a seed as an argument. By default, it uses the time of day
|
||||||
localvar = rand() # Random number between 0 and 1.
|
localvar = rand(); # Random number between 0 and 1.
|
||||||
|
|
||||||
# Here's how to return a value
|
# Here's how to return a value
|
||||||
return localvar
|
return localvar;
|
||||||
}
|
}
|
||||||
|
|
||||||
function string_functions( localvar, arr) {
|
function string_functions( localvar, arr) {
|
||||||
@ -188,61 +206,66 @@ function string_functions( localvar, arr) {
|
|||||||
|
|
||||||
# Search and replace, first instance (sub) or all instances (gsub)
|
# Search and replace, first instance (sub) or all instances (gsub)
|
||||||
# Both return number of matches replaced
|
# Both return number of matches replaced
|
||||||
localvar = "fooooobar"
|
localvar = "fooooobar";
|
||||||
sub("fo+", "Meet me at the ", localvar) # localvar => "Meet me at the bar"
|
sub("fo+", "Meet me at the ", localvar); # localvar => "Meet me at the bar"
|
||||||
gsub("e+", ".", localvar) # localvar => "m..t m. at th. bar"
|
gsub("e+", ".", localvar); # localvar => "m..t m. at th. bar"
|
||||||
|
|
||||||
# Search for a string that matches a regular expression
|
# Search for a string that matches a regular expression
|
||||||
# index() does the same thing, but doesn't allow a regular expression
|
# index() does the same thing, but doesn't allow a regular expression
|
||||||
match(localvar, "t") # => 4, since the 't' is the fourth character
|
match(localvar, "t"); # => 4, since the 't' is the fourth character
|
||||||
|
|
||||||
# Split on a delimiter
|
# Split on a delimiter
|
||||||
split("foo-bar-baz", arr, "-") # a => ["foo", "bar", "baz"]
|
n = split("foo-bar-baz", arr, "-"); # a[1] = "foo"; a[2] = "bar"; a[3] = "baz"; n = 3
|
||||||
|
|
||||||
# Other useful stuff
|
# Other useful stuff
|
||||||
sprintf("%s %d %d %d", "Testing", 1, 2, 3) # => "Testing 1 2 3"
|
sprintf("%s %d %d %d", "Testing", 1, 2, 3); # => "Testing 1 2 3"
|
||||||
substr("foobar", 2, 3) # => "oob"
|
substr("foobar", 2, 3); # => "oob"
|
||||||
substr("foobar", 4) # => "bar"
|
substr("foobar", 4); # => "bar"
|
||||||
length("foo") # => 3
|
length("foo"); # => 3
|
||||||
tolower("FOO") # => "foo"
|
tolower("FOO"); # => "foo"
|
||||||
toupper("foo") # => "FOO"
|
toupper("foo"); # => "FOO"
|
||||||
}
|
}
|
||||||
|
|
||||||
function io_functions( localvar) {
|
function io_functions( localvar) {
|
||||||
|
|
||||||
# You've already seen print
|
# You've already seen print
|
||||||
print "Hello world"
|
print "Hello world";
|
||||||
|
|
||||||
# There's also printf
|
# There's also printf
|
||||||
printf("%s %d %d %d\n", "Testing", 1, 2, 3)
|
printf("%s %d %d %d\n", "Testing", 1, 2, 3);
|
||||||
|
|
||||||
# AWK doesn't have file handles, per se. It will automatically open a file
|
# AWK doesn't have file handles, per se. It will automatically open a file
|
||||||
# handle for you when you use something that needs one. The string you used
|
# handle for you when you use something that needs one. The string you used
|
||||||
# for this can be treated as a file handle, for purposes of I/O. This makes
|
# for this can be treated as a file handle, for purposes of I/O. This makes
|
||||||
# it feel sort of like shell scripting:
|
# it feel sort of like shell scripting, but to get the same output, the string
|
||||||
|
# must match exactly, so use a vaiable:
|
||||||
|
|
||||||
print "foobar" >"/tmp/foobar.txt"
|
outfile = "/tmp/foobar.txt";
|
||||||
|
|
||||||
# Now the string "/tmp/foobar.txt" is a file handle. You can close it:
|
print "foobar" > outfile;
|
||||||
close("/tmp/foobar.txt")
|
|
||||||
|
# Now the string outfile is a file handle. You can close it:
|
||||||
|
close(outfile);
|
||||||
|
|
||||||
# Here's how you run something in the shell
|
# Here's how you run something in the shell
|
||||||
system("echo foobar") # => prints foobar
|
system("echo foobar"); # => prints foobar
|
||||||
|
|
||||||
# Reads a line from standard input and stores in localvar
|
# Reads a line from standard input and stores in localvar
|
||||||
getline localvar
|
getline localvar;
|
||||||
|
|
||||||
# Reads a line from a pipe
|
# Reads a line from a pipe (again, use a string so you close it properly)
|
||||||
"echo foobar" | getline localvar # localvar => "foobar"
|
cmd = "echo foobar";
|
||||||
close("echo foobar")
|
cmd | getline localvar; # localvar => "foobar"
|
||||||
|
close(cmd);
|
||||||
|
|
||||||
# Reads a line from a file and stores in localvar
|
# Reads a line from a file and stores in localvar
|
||||||
getline localvar <"/tmp/foobar.txt"
|
infile = "/tmp/foobar.txt";
|
||||||
close("/tmp/foobar.txt")
|
getline localvar < infile;
|
||||||
|
close(infile);
|
||||||
}
|
}
|
||||||
|
|
||||||
# As I said at the beginning, AWK programs consist of a collection of patterns
|
# As I said at the beginning, AWK programs consist of a collection of patterns
|
||||||
# and actions. You've already seen the all-important BEGIN pattern. Other
|
# and actions. You've already seen the BEGIN pattern. Other
|
||||||
# patterns are used only if you're processing lines from files or standard
|
# patterns are used only if you're processing lines from files or standard
|
||||||
# input.
|
# input.
|
||||||
#
|
#
|
||||||
@ -257,7 +280,7 @@ function io_functions( localvar) {
|
|||||||
# expression, /^fo+bar$/, and will be skipped for any line that fails to
|
# expression, /^fo+bar$/, and will be skipped for any line that fails to
|
||||||
# match it. Let's just print the line:
|
# match it. Let's just print the line:
|
||||||
|
|
||||||
print
|
print;
|
||||||
|
|
||||||
# Whoa, no argument! That's because print has a default argument: $0.
|
# Whoa, no argument! That's because print has a default argument: $0.
|
||||||
# $0 is the name of the current line being processed. It is created
|
# $0 is the name of the current line being processed. It is created
|
||||||
@ -268,16 +291,16 @@ function io_functions( localvar) {
|
|||||||
# does. And, like the shell, each field can be access with a dollar sign
|
# does. And, like the shell, each field can be access with a dollar sign
|
||||||
|
|
||||||
# This will print the second and fourth fields in the line
|
# This will print the second and fourth fields in the line
|
||||||
print $2, $4
|
print $2, $4;
|
||||||
|
|
||||||
# AWK automatically defines many other variables to help you inspect and
|
# AWK automatically defines many other variables to help you inspect and
|
||||||
# process each line. The most important one is NF
|
# process each line. The most important one is NF
|
||||||
|
|
||||||
# Prints the number of fields on this line
|
# Prints the number of fields on this line
|
||||||
print NF
|
print NF;
|
||||||
|
|
||||||
# Print the last field on this line
|
# Print the last field on this line
|
||||||
print $NF
|
print $NF;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Every pattern is actually a true/false test. The regular expression in the
|
# Every pattern is actually a true/false test. The regular expression in the
|
||||||
@ -286,7 +309,7 @@ function io_functions( localvar) {
|
|||||||
# currently processing. Thus, the complete version of it is this:
|
# currently processing. Thus, the complete version of it is this:
|
||||||
|
|
||||||
$0 ~ /^fo+bar$/ {
|
$0 ~ /^fo+bar$/ {
|
||||||
print "Equivalent to the last pattern"
|
print "Equivalent to the last pattern";
|
||||||
}
|
}
|
||||||
|
|
||||||
a > 0 {
|
a > 0 {
|
||||||
@ -315,10 +338,10 @@ a > 0 {
|
|||||||
BEGIN {
|
BEGIN {
|
||||||
|
|
||||||
# First, ask the user for the name
|
# First, ask the user for the name
|
||||||
print "What name would you like the average age for?"
|
print "What name would you like the average age for?";
|
||||||
|
|
||||||
# Get a line from standard input, not from files on the command line
|
# Get a line from standard input, not from files on the command line
|
||||||
getline name <"/dev/stdin"
|
getline name < "/dev/stdin";
|
||||||
}
|
}
|
||||||
|
|
||||||
# Now, match every line whose first field is the given name
|
# Now, match every line whose first field is the given name
|
||||||
@ -335,8 +358,8 @@ $1 == name {
|
|||||||
# ...etc. There are plenty more, documented in the man page.
|
# ...etc. There are plenty more, documented in the man page.
|
||||||
|
|
||||||
# Keep track of a running total and how many lines matched
|
# Keep track of a running total and how many lines matched
|
||||||
sum += $3
|
sum += $3;
|
||||||
nlines++
|
nlines++;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Another special pattern is called END. It will run after processing all the
|
# Another special pattern is called END. It will run after processing all the
|
||||||
@ -348,7 +371,7 @@ $1 == name {
|
|||||||
|
|
||||||
END {
|
END {
|
||||||
if (nlines)
|
if (nlines)
|
||||||
print "The average age for " name " is " sum / nlines
|
print "The average age for " name " is " sum / nlines;
|
||||||
}
|
}
|
||||||
|
|
||||||
```
|
```
|
||||||
@ -357,3 +380,4 @@ Further Reading:
|
|||||||
* [Awk tutorial](http://www.grymoire.com/Unix/Awk.html)
|
* [Awk tutorial](http://www.grymoire.com/Unix/Awk.html)
|
||||||
* [Awk man page](https://linux.die.net/man/1/awk)
|
* [Awk man page](https://linux.die.net/man/1/awk)
|
||||||
* [The GNU Awk User's Guide](https://www.gnu.org/software/gawk/manual/gawk.html) GNU Awk is found on most Linux systems.
|
* [The GNU Awk User's Guide](https://www.gnu.org/software/gawk/manual/gawk.html) GNU Awk is found on most Linux systems.
|
||||||
|
* [AWK one-liner collection](http://tuxgraphics.org/~guido/scripts/awk-one-liner.html)
|
||||||
|
Loading…
Reference in New Issue
Block a user