Move under www to ease rsync
This commit is contained in:
9
www/eps/hpr2163/hpr2163_arithmetic_assignment_operators.awk
Executable file
9
www/eps/hpr2163/hpr2163_arithmetic_assignment_operators.awk
Executable file
@@ -0,0 +1,9 @@
|
||||
BEGIN{
|
||||
x = 42; print "x is",x
|
||||
x += 1; print "x += 1 is",x
|
||||
x -= 1; print "x -= 1 is",x
|
||||
x *= 2; print "x *= 2 is",x
|
||||
x /= 2; print "x /= 2 is",x
|
||||
x %= 5; print "x %= 5 is",x
|
||||
x ^= 4; print "x ^= 4 is",x
|
||||
}
|
||||
12
www/eps/hpr2163/hpr2163_color_count.awk
Executable file
12
www/eps/hpr2163/hpr2163_color_count.awk
Executable file
@@ -0,0 +1,12 @@
|
||||
BEGIN {
|
||||
FS=","
|
||||
OFS=","
|
||||
print "color,count"
|
||||
}
|
||||
NR != 1 {
|
||||
count[$2]+=1
|
||||
}
|
||||
END {
|
||||
for (color in count)
|
||||
print color, count[color]
|
||||
}
|
||||
BIN
www/eps/hpr2163/hpr2163_full_shownotes.epub
Executable file
BIN
www/eps/hpr2163/hpr2163_full_shownotes.epub
Executable file
Binary file not shown.
402
www/eps/hpr2163/hpr2163_full_shownotes.html
Executable file
402
www/eps/hpr2163/hpr2163_full_shownotes.html
Executable file
@@ -0,0 +1,402 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="generator" content="pandoc">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||||
<meta name="author" content="Dave Morriss">
|
||||
<title>Gnu Awk - Part 4 (HPR Show 2163)</title>
|
||||
<style type="text/css">code{white-space: pre;}</style>
|
||||
<!--[if lt IE 9]>
|
||||
<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
|
||||
<![endif]-->
|
||||
<style type="text/css">
|
||||
div.sourceCode { overflow-x: auto; }
|
||||
table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
|
||||
margin: 0; padding: 0; vertical-align: baseline; border: none; }
|
||||
table.sourceCode { width: 100%; line-height: 100%; }
|
||||
td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
|
||||
td.sourceCode { padding-left: 5px; }
|
||||
code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
|
||||
code > span.dt { color: #902000; } /* DataType */
|
||||
code > span.dv { color: #40a070; } /* DecVal */
|
||||
code > span.bn { color: #40a070; } /* BaseN */
|
||||
code > span.fl { color: #40a070; } /* Float */
|
||||
code > span.ch { color: #4070a0; } /* Char */
|
||||
code > span.st { color: #4070a0; } /* String */
|
||||
code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
|
||||
code > span.ot { color: #007020; } /* Other */
|
||||
code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
|
||||
code > span.fu { color: #06287e; } /* Function */
|
||||
code > span.er { color: #ff0000; font-weight: bold; } /* Error */
|
||||
code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
|
||||
code > span.cn { color: #880000; } /* Constant */
|
||||
code > span.sc { color: #4070a0; } /* SpecialChar */
|
||||
code > span.vs { color: #4070a0; } /* VerbatimString */
|
||||
code > span.ss { color: #bb6688; } /* SpecialString */
|
||||
code > span.im { } /* Import */
|
||||
code > span.va { color: #19177c; } /* Variable */
|
||||
code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
|
||||
code > span.op { color: #666666; } /* Operator */
|
||||
code > span.bu { } /* BuiltIn */
|
||||
code > span.ex { } /* Extension */
|
||||
code > span.pp { color: #bc7a00; } /* Preprocessor */
|
||||
code > span.at { color: #7d9029; } /* Attribute */
|
||||
code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
|
||||
code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
|
||||
code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
|
||||
code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
|
||||
</style>
|
||||
<link rel="stylesheet" href="http://hackerpublicradio.org/css/hpr.css">
|
||||
</head>
|
||||
|
||||
<body id="home">
|
||||
<div id="container" class="shadow">
|
||||
<header>
|
||||
<h1 class="title">Gnu Awk - Part 4 (HPR Show 2163)</h1>
|
||||
<h2 class="author">Dave Morriss</h2>
|
||||
<hr/>
|
||||
</header>
|
||||
|
||||
<main id="maincontent">
|
||||
<article>
|
||||
<header>
|
||||
<h1>Table of Contents</h1>
|
||||
<nav id="TOC">
|
||||
<ul>
|
||||
<li><a href="#introduction">Introduction</a></li>
|
||||
<li><a href="#recap-of-the-last-episode">Recap of the last episode</a><ul>
|
||||
<li><a href="#logical-operators">Logical Operators</a></li>
|
||||
<li><a href="#the-next-statement">The <em>next</em> statement</a></li>
|
||||
<li><a href="#the-begin-and-end-rules">The <em>BEGIN</em> and <em>END</em> rules</a></li>
|
||||
<li><a href="#variables-arrays-loops-etc">Variables, arrays, loops, etc</a></li>
|
||||
</ul></li>
|
||||
<li><a href="#explaining-variables">Explaining variables</a><ul>
|
||||
<li><a href="#variable-assignment">Variable assignment</a></li>
|
||||
<li><a href="#arithmetic-operators">Arithmetic operators</a></li>
|
||||
<li><a href="#assignment-operators">Assignment operators</a><ul>
|
||||
<li><a href="#examples">Examples</a></li>
|
||||
</ul></li>
|
||||
<li><a href="#type-conversion">Type conversion</a></li>
|
||||
<li><a href="#increment-and-decrement-operators">Increment and decrement operators</a></li>
|
||||
<li><a href="#arrays">Arrays</a><ul>
|
||||
<li><a href="#looping-through-arrays">Looping through arrays</a></li>
|
||||
</ul></li>
|
||||
</ul></li>
|
||||
<li><a href="#more-built-in-variables">More built-in variables</a></li>
|
||||
<li><a href="#summary">Summary</a></li>
|
||||
<li><a href="#links">Links</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</header>
|
||||
<h2 id="introduction">Introduction</h2>
|
||||
<p>This is the fourth episode of the series that <a href="http://hackerpublicradio.org/correspondents.php?hostid=300" title="Mr. Young">Mr. Young</a> and I are doing. These shows are now collected under the series title “<a href="http://hackerpublicradio.org/series.php?id=94" title="Learning Awk">Learning Awk</a>”.</p>
|
||||
<h2 id="recap-of-the-last-episode">Recap of the last episode</h2>
|
||||
<h3 id="logical-operators">Logical Operators</h3>
|
||||
<p>We have seen the operators ‘<b>&&</b>’ (<em>and</em>) and ‘<b>||</b>’ (<em>or</em>). These are also called <em>Boolean Operators</em>. There is also one more operator ‘<b>!</b>’ (<em>not</em>) which we haven’t yet encountered. These operators allow the construction of <em>Boolean expressions</em> which may be quite complex.</p>
|
||||
<p>If you are used to programming you will expect these operators to have a precedence, just like operators in arithmetic do. We will deal with this subject in more detail later since it is relevant not only in patterns but also in other parts of an Awk program.</p>
|
||||
<h3 id="the-next-statement">The <em>next</em> statement</h3>
|
||||
<p>We saw this statement in the last episode and learned that it causes the processing of the current input record to stop. No more patterns are tested against this record and no more actions in the current rule are executed. Note that “<em>next</em>” is a statement like “<em>print</em>”, and can only occur in the action part of a rule. It is also not permitted in <em>BEGIN</em> or <em>END</em> rules (more of which anon).</p>
|
||||
<h3 id="the-begin-and-end-rules">The <em>BEGIN</em> and <em>END</em> rules</h3>
|
||||
<p>The <em>BEGIN</em> and <em>END</em> elements are special <em>patterns</em>, which in conjunction with <em>actions</em> enclosed in curly brackets make up <em>rules</em> in the same sense that the ‘<em>pattern {action}</em>’ sequences we have seen so far are rules. As we saw in the last episode, <em>BEGIN</em> rules are run before the main ‘<em>pattern {action}</em>’ rules are processed and the input file is (or files are) read, whereas <em>END</em> rules run after the input files have been processed.</p>
|
||||
<p>It is permitted to write more than one <em>BEGIN</em> rule and more than one <em>END</em> rule. These are just concatenated together in the order they are encountered by Awk.</p>
|
||||
<p>Awk will complain if either <em>BEGIN</em> or <em>END</em> is not followed by an <em>action</em> since this is meaningless.</p>
|
||||
<h3 id="variables-arrays-loops-etc">Variables, arrays, loops, etc</h3>
|
||||
<p>Learning a programming language is never a linear process, and sometimes reference is made to new features that have not yet been explained. A number of new features were mentioned in passing in the last episode, and we will look at these in more detail in this episode.</p>
|
||||
<h2 id="explaining-variables">Explaining variables</h2>
|
||||
<p>We saw the built-in variables like <em>NR</em> and <em>NF</em> earlier in the series, and you saw in the last episode that you can create your own variables too. A variable, as in other languages, is simply a named storage area that can hold a value. The name must consist of letters, digits or the underscore. It may not start with a digit, and there is a difference between upper case and lower case letters (‘<code>sum</code>’, ‘<code>Sum</code>’ and ‘<code>SUM</code>’ are different variables). Such simple variables which can hold a single value are also called <em>scalars</em>.</p>
|
||||
<p>A variable in Awk may contain a numeric value or a string. Awk deals with the conversion of one to another as appropriate (though sometimes it needs help).</p>
|
||||
<p>In Awk, unlike many other languages, it is not necessary to initialise variables before using them. All variables start as an empty string which is converted to zero as appropriate.</p>
|
||||
<h3 id="variable-assignment">Variable assignment</h3>
|
||||
<p>Variables are set to values using <em>assignment</em> such as:</p>
|
||||
<pre><code>count = 3</code></pre>
|
||||
<p>As you saw in the last episode there are many types of assignment, for example:</p>
|
||||
<pre><code>used += $3</code></pre>
|
||||
<p>This means <em>increment the contents of variable ‘<code>used</code>’ by the contents of field 3</em>. (There is an assumption here that field 3 contains a numeric value, of course.)</p>
|
||||
<p>It’s a shorthand version of:</p>
|
||||
<pre><code>used = used + $3</code></pre>
|
||||
<p>This means <em>add the contents of ‘<code>used</code>’ to the contents of field 3 and save the result back in ‘<code>used</code>’</em>.</p>
|
||||
<p>The first time the variable is incremented its contents are taken to be zero. This is normally bad practice in older and stricter compiled languages, but Awk is more forgiving.</p>
|
||||
<p>Since we have now started to look at writing arithmetic expressions it is probably a good idea to review what the arithmetic operators are in Awk.</p>
|
||||
<h3 id="arithmetic-operators">Arithmetic operators</h3>
|
||||
<p>It is important to note that <em>all numbers in Awk are floating point numbers</em>. This fact can catch you out in some edge cases, which we will try to highlight as the series progresses.</p>
|
||||
<p>This list is based on the one from the <a href="https://www.gnu.org/software/gawk/manual/html_node/index.html" title="GNU Awk User's Guide">GNU Awk User’s Guide</a>. The operators are listed in order of their precedence, highest to lowest.</p>
|
||||
<dl>
|
||||
<dt><em>x ^ y</em></dt>
|
||||
<dd><p>Exponentiation; <em>x</em> raised to the <em>y</em> power. ‘2 ^ 3’ has the value eight. There is a ‘<code>**</code>’ operator but is not standard, and therefore not portable, and will not be used here.</p>
|
||||
</dd>
|
||||
<dt><em>- x</em></dt>
|
||||
<dd><p>Negation</p>
|
||||
</dd>
|
||||
<dt><em>+ x</em></dt>
|
||||
<dd><p>Unary plus; this can be used to force Awk to convert a string to a number.</p>
|
||||
</dd>
|
||||
<dt><em>x * y</em></dt>
|
||||
<dd><p>Multiplication</p>
|
||||
</dd>
|
||||
<dt><em>x / y</em></dt>
|
||||
<dd><p>Division; because all numbers in awk are floating-point numbers, the result is not rounded to an integer – thus ‘3 / 4’ has the value 0.75, where in Bash ‘<code>echo $((3/4))</code>’ returns 0.</p>
|
||||
</dd>
|
||||
<dt><em>x % y</em></dt>
|
||||
<dd><p>Remainder after <em>x</em> is divided by <em>y</em>. So ‘3 % 4’ is 3, ‘5 % 2’ is 1, and so on</p>
|
||||
</dd>
|
||||
<dt><em>x + y</em></dt>
|
||||
<dd><p>Addition.</p>
|
||||
</dd>
|
||||
<dt><em>x - y</em></dt>
|
||||
<dd><p>Subtraction.</p>
|
||||
</dd>
|
||||
</dl>
|
||||
<h3 id="assignment-operators">Assignment operators</h3>
|
||||
<p>As you have seen arithmetic assignment operators (like <b>+=</b>) exist in Awk. These are a shorthand form of more verbose assignments. The following table lists these assignment operators (modified from the <a href="https://www.gnu.org/software/gawk/manual/html_node/index.html" title="GNU Awk User's Guide">GNU Awk User’s Guide</a>):</p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr class="header">
|
||||
<th>Operator</th>
|
||||
<th>Effect</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="odd">
|
||||
<td><em>variable</em> += <em>increment</em></td>
|
||||
<td>Add <em>increment</em> to the value of <em>variable</em>.</td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td><em>variable</em> -= <em>decrement</em></td>
|
||||
<td>Subtract <em>decrement</em> from the value of <em>variable</em>.</td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td><em>variable</em> *= <em>coefficient</em></td>
|
||||
<td>Multiply the value of <em>variable</em> by <em>coefficient</em>.</td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td><em>variable</em> /= <em>divisor</em></td>
|
||||
<td>Divide the value of <em>variable</em> by <em>divisor</em>.</td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td><em>variable</em> %= <em>modulus</em></td>
|
||||
<td>Set <em>variable</em> to its remainder by <em>modulus</em>.</td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td><em>variable</em> ^= <em>power</em></td>
|
||||
<td>Raise <em>variable</em> to the power <em>power</em>.</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<h4 id="examples">Examples</h4>
|
||||
<p>See the associated Awk <a href="http://hackerpublicradio.org/eps/hpr2163/arithmetic_assignment_operators.awk" title="Arithmetic assignment operators">script</a> called <code>arithmetic_assignment_operators.awk</code>:</p>
|
||||
<div class="sourceCode"><table class="sourceCode awk numberLines"><tr class="sourceCode"><td class="lineNumbers"><pre>1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
</pre></td><td class="sourceCode"><pre><code class="sourceCode awk"><span class="cf">BEGIN</span><span class="kw">{</span>
|
||||
x <span class="op">=</span> <span class="dv">42</span><span class="op">;</span> <span class="kw">print</span> <span class="st">"x is"</span><span class="op">,</span>x
|
||||
x <span class="op">+=</span> <span class="dv">1</span><span class="op">;</span> <span class="kw">print</span> <span class="st">"x += 1 is"</span><span class="op">,</span>x
|
||||
x <span class="op">-=</span> <span class="dv">1</span><span class="op">;</span> <span class="kw">print</span> <span class="st">"x -= 1 is"</span><span class="op">,</span>x
|
||||
x <span class="op">*=</span> <span class="dv">2</span><span class="op">;</span> <span class="kw">print</span> <span class="st">"x *= 2 is"</span><span class="op">,</span>x
|
||||
x <span class="op">/=</span> <span class="dv">2</span><span class="op">;</span> <span class="kw">print</span> <span class="st">"x /= 2 is"</span><span class="op">,</span>x
|
||||
x <span class="op">%=</span> <span class="dv">5</span><span class="op">;</span> <span class="kw">print</span> <span class="st">"x %= 5 is"</span><span class="op">,</span>x
|
||||
x <span class="op">^=</span> <span class="dv">4</span><span class="op">;</span> <span class="kw">print</span> <span class="st">"x ^= 4 is"</span><span class="op">,</span>x
|
||||
<span class="kw">}</span></code></pre></td></tr></table></div>
|
||||
<p>Note that everything here is in a <em>BEGIN</em> rule because we don’t want to process a file, just run a little Awk program. Note also that semicolons are needed as statement separators when there are multiple statements on a line, but not otherwise.</p>
|
||||
<p>When run it produces the following output:</p>
|
||||
<pre><code>$ awk -f arithmetic_assignment_operators.awk
|
||||
x is 42
|
||||
x += 1 is 43
|
||||
x -= 1 is 42
|
||||
x *= 2 is 84
|
||||
x /= 2 is 42
|
||||
x %= 5 is 2
|
||||
x ^= 4 is 16</code></pre>
|
||||
<h3 id="type-conversion">Type conversion</h3>
|
||||
<p>As mentioned earlier, a variable in Awk may contain a numeric value or a string, at any point in time. When converting from a number to a string, then the conversion simply consists of a string version of the number. Converting from a string to a number requires the string to begin with a valid digit sequence.</p>
|
||||
<pre><code>$ awk 'BEGIN{s="9gag.com"; x=s+1; print x}'
|
||||
10</code></pre>
|
||||
<p>If the string contains no valid number at the start then it is converted to zero.</p>
|
||||
<p>Awk will convert integer numbers (42), and floating point numbers (4.2), as well as exponential numbers (1E3):</p>
|
||||
<pre><code>$ awk 'BEGIN{ printf "%g %g %g\n","42","4.2","1E3" }'
|
||||
42 4.2 1000</code></pre>
|
||||
<p>(Note: the ‘<em>g</em>’ format-control letter is for printing general numbers)</p>
|
||||
<h3 id="increment-and-decrement-operators">Increment and decrement operators</h3>
|
||||
<p>In the last episode we saw the use of these operators which increment or decrement the value of a variable by one. There are similar operators in Bash, and these were covered in <a href="http://hackerpublicradio.org/eps/hpr1951_full_shownotes.html#examples-of-arithmetic-evaluation" title="Increment and decrement in Bash">HPR episode 1951</a>.</p>
|
||||
<p>The formal definition of these operators is:</p>
|
||||
<dl>
|
||||
<dt><em>++variable</em></dt>
|
||||
<dd>Increment <em>variable</em>, returning the new value as the value of the expression.
|
||||
</dd>
|
||||
<dt><em>variable++</em></dt>
|
||||
<dd>Increment <em>variable</em>, returning the old value of <em>variable</em> as the value of the expression.
|
||||
</dd>
|
||||
<dt><em>--variable</em></dt>
|
||||
<dd>Decrement <em>variable</em>, returning the new value as the value of the expression. (This expression is like ‘<em>++variable</em>’, but instead of adding, it subtracts.)
|
||||
</dd>
|
||||
<dt><em>variable--</em></dt>
|
||||
<dd>Decrement <em>variable</em>, returning the old value of <em>variable</em> as the value of the expression. (This expression is like ‘<em>variable++</em>’, but instead of adding, it subtracts.)
|
||||
</dd>
|
||||
</dl>
|
||||
<p>We will look at some examples of the use of these operators a little later.</p>
|
||||
<h3 id="arrays">Arrays</h3>
|
||||
<p>As well as the simple (<em>scalar</em>) variables we have seen, Awk also provides one-dimensional arrays<a href="#fn1" class="footnoteRef" id="fnref1"><sup>1</sup></a>. These arrays are <em>associative</em> (also known as <em>hashes</em>).</p>
|
||||
<p>An array has a name conforming to the rules for scalar variables mentioned earlier. Not surprisingly you cannot name an array the same as a simple variable.</p>
|
||||
<p>An array is a means of storing multiple values, and these values are referenced by <em>index</em> values. Also, unlike most compiled languages, Awk’s arrays can be of any length and can be added to at will. They can also be deleted from, but we’ll deal with that later.</p>
|
||||
<p>Given an array <code>a</code>, we might store a value in it thus:</p>
|
||||
<pre><code>a[1] = "HPR"</code></pre>
|
||||
<p>Here the array name is <code>a</code>, the index is <code>1</code> and the contents of <code>a[1]</code> is the string “HPR”.</p>
|
||||
<p>If you are familiar with arrays in other languages you might assume that the index <code>1</code> is numeric. In fact, in Awk it is converted to a string because all array indices are strings because Awk arrays are not contiguous but are <em>associative</em>. Such arrays are indexed by arbitrary string values, making a sort of <em>look-up table</em>.</p>
|
||||
<p>Thus in an example in the last episode we saw:</p>
|
||||
<pre><code>NR != 1 {
|
||||
a[$2]++
|
||||
}</code></pre>
|
||||
<p>Here the Awk script was being used to produce a frequency count of colours in our example file <a href="http://hackerpublicradio.org/eps/hpr2129/file1.txt" title="file1.txt"><code>file1.txt</code></a>. Field 2 in this file is the name of a colour, so the meaning of <code>a[$2]++</code> is:</p>
|
||||
<blockquote>
|
||||
<p>Index the array <code>a</code> by the (string) contents of field 2. If the element does not exist create it. Since Awk is very relaxed about initialisation, this array element will be taken to be zero on creation, and will then be incremented to 1. If the element already exists then its previous value will be incremented.</p>
|
||||
</blockquote>
|
||||
<p>If you were able to look into the resulting array the end result would be:</p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr class="header">
|
||||
<th>Index</th>
|
||||
<th>Contents</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="odd">
|
||||
<td>brown</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td>purple</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td>red</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td>yellow</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td>green</td>
|
||||
<td>1</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>So, this shows that there is an array element: <code>a["brown"]</code>. Contained in this array element is the number 2 because the colour ‘brown’ was encountered twice.</p>
|
||||
<p>Note that we also know that the expression <code>a[$2]++</code> achieves the same as the assignment <code>a[$2]+=1</code>.</p>
|
||||
<h4 id="looping-through-arrays">Looping through arrays</h4>
|
||||
<p>In the last episode the concept of looping through an array to print it out was introduced. We saw:</p>
|
||||
<pre><code>for (b in a) {
|
||||
print b, a[b]
|
||||
}</code></pre>
|
||||
<p>As is so often the case with learning to write scripts it is often useful to visit more advanced topics early on, even though the concepts behind them may not yet have have been properly established.</p>
|
||||
<p>We have not yet examined looping and other statements in Awk, but since we want to be able to process entire arrays we need to look at this one now.</p>
|
||||
<p>In brief, the ‘<code>for</code>’ statement provides a way to repeat a given set of statements a number of times. We will look at this statement and the related ‘<code>while</code>’ statement later in the series.</p>
|
||||
<p>This variant of the ‘<code>for</code>’ statement allows the processing of arrays. It consists of the following components:</p>
|
||||
<pre><code>for (variable in array)
|
||||
body</code></pre>
|
||||
<p>The expression ‘<code>(variable in array)</code>’ results in all of the index values in the nominated array being provided, one at a time. While the loop runs the variable is set to successive index values and the body is executed.</p>
|
||||
<p>The body can consist of a single statement or a group of statements. If a group is used, then curly braces must be used to enclose them.</p>
|
||||
<p>The order in which array index values are provided is not defined – different Awk version will use different orders. There are extensions within GNU Awk (gawk) which can control this but we will leave this until much later.</p>
|
||||
<p>So, dealing with our example from last episode, we can modify it as follows (with spelling concessions due to the trans-Atlantic nature of this series):</p>
|
||||
<div class="sourceCode"><table class="sourceCode awk numberLines"><tr class="sourceCode"><td class="lineNumbers"><pre>1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
11
|
||||
12
|
||||
</pre></td><td class="sourceCode"><pre><code class="sourceCode awk"><span class="cf">BEGIN</span> <span class="kw">{</span>
|
||||
<span class="bu">FS</span><span class="op">=</span><span class="st">","</span>
|
||||
<span class="bu">OFS</span><span class="op">=</span><span class="st">","</span>
|
||||
<span class="kw">print</span> <span class="st">"color,count"</span>
|
||||
<span class="kw">}</span>
|
||||
<span class="bu">NR</span> <span class="op">!=</span> <span class="dv">1</span> <span class="kw">{</span>
|
||||
count[<span class="dt">$2</span>]<span class="op">+=</span><span class="dv">1</span>
|
||||
<span class="kw">}</span>
|
||||
<span class="cf">END</span> <span class="kw">{</span>
|
||||
<span class="kw">for</span> (color <span class="kw">in</span> count)
|
||||
<span class="kw">print</span> color<span class="op">,</span> count[color]
|
||||
<span class="kw">}</span></code></pre></td></tr></table></div>
|
||||
<p>This Awk script is available as <a href="http://hackerpublicradio.org/eps/hpr2163/color_count.awk" title="color_count.awk"><code>color_count.awk</code></a>. The array has been renamed from ‘<code>a</code>’ to ‘<code>count</code>’ because it holds counts (frequencies) of the number of times a colour is encountered. The array is indexed by the names of colours in field 2. When we loop through the array in the <em>END</em> rule we use a variable ‘<code>color</code>’ to store the latest index. Note that the unnecessary semicolons and curly braces have been removed (to demonstrate that they can be!).</p>
|
||||
<p>Running the script produces the following output:</p>
|
||||
<pre><code>$ awk -f color_count.awk file1.csv
|
||||
color,count
|
||||
brown,2
|
||||
purple,2
|
||||
red,2
|
||||
yellow,2
|
||||
green,1</code></pre>
|
||||
<h2 id="more-built-in-variables">More built-in variables</h2>
|
||||
<p>In the last episode two more built-in (or predefined) variables were introduced. The first was <em>FS</em>, which we have encountered before, though not in such a form. The <em>FS</em> variable is set through the <em>-F</em> (or <em>–field-separator</em>) command-line option, so ‘<code>-F ","</code>’ on the command line is the same as the statement <code>FS = ","</code> in an Awk script. As we saw, the statement form needs to be in a <em>BEGIN</em> rule to be set early enough in the script.</p>
|
||||
<pre><code>$ awk -F "," 'BEGIN{print "FS is",FS}'
|
||||
FS is ,</code></pre>
|
||||
<p>Of course, <em>FS</em> controls the chosen field separator as has been explained earlier in the series.</p>
|
||||
<p>In the last episode we also saw the <em>OFS</em> variable. This does not have a command-line equivalent. This variable, short for <em><b>O</b>utput <b>F</b>ield <b>S</b>eparator</em>, controls the format of the output record produced by the <code>print</code> statement. Normally it is set to a single space, so a print statement like the following separates its arguments with a single space:</p>
|
||||
<pre><code>$ awk 'BEGIN{print "Hello","World"}'
|
||||
Hello World</code></pre>
|
||||
<p>Note that omitting the comma results in the following:</p>
|
||||
<pre><code>$ awk 'BEGIN{print "Hello" "World"}'
|
||||
HelloWorld</code></pre>
|
||||
<p>This is because Awk has concatenated the two strings before handing them to the <code>print</code> statement.</p>
|
||||
<p>The <em>OFS</em> variable can be a string if required:</p>
|
||||
<pre><code>$ awk 'BEGIN{OFS=" blurg "; print "Hello","World"}'
|
||||
Hello blurg World</code></pre>
|
||||
<p>The contents of <em>OFS</em> only affects the behaviour of the <code>print</code> statement, not <code>printf</code>:</p>
|
||||
<pre><code>$ awk 'BEGIN{OFS="\t"; printf "%s %s\n","Hello","World"}'
|
||||
Hello World</code></pre>
|
||||
<p>Here the first argument to the <code>printf</code> statement, the format string, specifies that two string arguments will be printed followed by a newline. The remaining arguments are the two strings. The contents of <em>OFS</em> have no effect on the output.</p>
|
||||
<h2 id="summary">Summary</h2>
|
||||
<p>This episode covered:</p>
|
||||
<ul>
|
||||
<li>A recap of the last episode</li>
|
||||
<li>Variables: simple or <em>scalar</em> variables</li>
|
||||
<li>Assignment of values to variables</li>
|
||||
<li>Arithmetic operators used in arithmetic expressions</li>
|
||||
<li>Assignment operators</li>
|
||||
<li>Conversion of strings to numbers and vice versa</li>
|
||||
<li>Increment and decrement operators</li>
|
||||
<li>Variables: Awk’s <em>associative</em> arrays (aka <em>hashes</em>)</li>
|
||||
<li>A brief peek at <code>for</code> loops used to scan arrays</li>
|
||||
<li>The built-in or predefined variables <em>FS</em> and <em>OFS</em></li>
|
||||
</ul>
|
||||
<h2 id="links">Links</h2>
|
||||
<ul>
|
||||
<li><em>GNU Awk User’s Guide</em>: <a href="https://www.gnu.org/software/gawk/manual/html_node/index.html" class="uri">https://www.gnu.org/software/gawk/manual/html_node/index.html</a></li>
|
||||
<li>Previous shows on HPR:
|
||||
<ul>
|
||||
<li>“<em>Gnu Awk - Part 1</em>”: <a href="http://hackerpublicradio.org/eps.php?id=2114" class="uri">http://hackerpublicradio.org/eps.php?id=2114</a></li>
|
||||
<li>“<em>Gnu Awk - Part 2</em>”: <a href="http://hackerpublicradio.org/eps.php?id=2129" class="uri">http://hackerpublicradio.org/eps.php?id=2129</a></li>
|
||||
<li>“<em>Gnu Awk - Part 3</em>”: <a href="http://hackerpublicradio.org/eps.php?id=2143" class="uri">http://hackerpublicradio.org/eps.php?id=2143</a></li>
|
||||
</ul></li>
|
||||
<li>Arithmetic expansion in Bash “<em>Some additional Bash Tips</em>”: <a href="http://hackerpublicradio.org/eps.php?id=1951" class="uri">http://hackerpublicradio.org/eps.php?id=1951</a></li>
|
||||
<li>Resources:
|
||||
<ul>
|
||||
<li>Demonstration of arithmetic assignment operators: <a href="http://hackerpublicradio.org/eps/hpr2163/arithmetic_assignment_operators.awk" class="uri">http://hackerpublicradio.org/eps/hpr2163/arithmetic_assignment_operators.awk</a></li>
|
||||
<li>Counting frequencies of particular colours: <a href="http://hackerpublicradio.org/eps/hpr2163/color_count.awk" class="uri">http://hackerpublicradio.org/eps/hpr2163/color_count.awk</a></li>
|
||||
</ul></li>
|
||||
</ul>
|
||||
<!--
|
||||
vim: syntax=markdown:ts=8:sw=4:ai:et:tw=78:fo=tcqn:fdm=marker
|
||||
-->
|
||||
<section class="footnotes">
|
||||
<hr />
|
||||
<ol>
|
||||
<li id="fn1"><p>Actually, standard Awk provides a way of treating such arrays as multi-dimensional, and GNU Awk (gawk) provides true <em>arrays of arrays</em>, but this is rather advanced and non-portable!<a href="#fnref1">↩</a></p></li>
|
||||
</ol>
|
||||
</section>
|
||||
</article>
|
||||
</main>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
BIN
www/eps/hpr2163/hpr2163_full_shownotes.pdf
Executable file
BIN
www/eps/hpr2163/hpr2163_full_shownotes.pdf
Executable file
Binary file not shown.
Reference in New Issue
Block a user