Move under www to ease rsync
This commit is contained in:
23
www/eps/hpr2679/hpr2679_bash13_ex1.sh
Executable file
23
www/eps/hpr2679/hpr2679_bash13_ex1.sh
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Three word regular expression
|
||||
#
|
||||
re='^([a-zA-Z]+) +([a-zA-Z]+) +([a-zA-Z]+) *\.?'
|
||||
|
||||
#
|
||||
# A sentence is expected as the only argument
|
||||
#
|
||||
if [[ $# -ne 1 ]]; then
|
||||
echo "Usage: $0 sentence"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Sentence: $1"
|
||||
if [[ $1 =~ $re ]]; then
|
||||
echo "Matched"
|
||||
for i in {0..3}; do
|
||||
printf '%2d %s\n' $i "${BASH_REMATCH[$i]}"
|
||||
done
|
||||
fi
|
||||
|
||||
50
www/eps/hpr2679/hpr2679_bash13_ex2.sh
Executable file
50
www/eps/hpr2679/hpr2679_bash13_ex2.sh
Executable file
@@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
|
||||
# =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~
|
||||
# IP Address parsing revisited
|
||||
# =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~
|
||||
#
|
||||
# An IP address looks like this:
|
||||
# 192.168.0.5
|
||||
# Four groups of 1-3 numbers in the range 0..255 separated by dots.
|
||||
#
|
||||
re='^([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})$'
|
||||
|
||||
#
|
||||
# The address is expected as the only argument
|
||||
#
|
||||
if [[ $# -ne 1 ]]; then
|
||||
echo "Usage: $0 IP_address"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
#
|
||||
# Validate against the regex
|
||||
#
|
||||
if [[ $1 =~ $re ]]; then
|
||||
#
|
||||
# Look at the components and check they are all in range
|
||||
#
|
||||
errs=0
|
||||
problems=
|
||||
for i in {1..4}; do
|
||||
d="${BASH_REMATCH[$i]}"
|
||||
if [[ $d -lt 0 || $d -gt 255 ]]; then
|
||||
((errs++))
|
||||
problems+="$d "
|
||||
fi
|
||||
done
|
||||
|
||||
#
|
||||
# Report any problems found
|
||||
#
|
||||
if [[ $errs -gt 0 ]]; then
|
||||
problems="${problems:0:-1}"
|
||||
echo "$1 is not a valid IP address; contains ${problems// /, }"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$1 is a valid IP address"
|
||||
else
|
||||
echo "$1 is not a valid IP address"
|
||||
fi
|
||||
14
www/eps/hpr2679/hpr2679_bash13_ex3.sh
Executable file
14
www/eps/hpr2679/hpr2679_bash13_ex3.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
# =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~
|
||||
# Experimenting with backreferences in Bash regular expressions
|
||||
# =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~
|
||||
|
||||
re='(\<.{1,10}\>) \1'
|
||||
|
||||
if [[ $1 =~ $re ]]; then
|
||||
echo "Matched: $1"
|
||||
else
|
||||
echo "No match: $1"
|
||||
fi
|
||||
|
||||
46
www/eps/hpr2679/hpr2679_bash13_ex4.sh
Executable file
46
www/eps/hpr2679/hpr2679_bash13_ex4.sh
Executable file
@@ -0,0 +1,46 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Check that the data file exists
|
||||
#
|
||||
data="bash13_ex4.txt"
|
||||
[ -e "$data" ] || { echo "File $data not found"; exit 1; }
|
||||
|
||||
#
|
||||
# Email addresses can be:
|
||||
# 1. local-part@domain
|
||||
# 2. Name <local-part@domain>
|
||||
#
|
||||
part1='([a-zA-Z0-9_][a-zA-Z0-9_.]+@[a-zA-Z0-9.-]+)'
|
||||
part2='([^<]+)<([a-zA-Z0-9_][a-zA-Z0-9_.]+@[a-zA-Z0-9.-]+)>'
|
||||
re="^($part1|$part2)$"
|
||||
|
||||
#
|
||||
# Read and check each line from the file
|
||||
#
|
||||
while read -r line; do
|
||||
#
|
||||
# Does it match the regular expression?
|
||||
#
|
||||
if [[ $line =~ $re ]]; then
|
||||
#declare -p BASH_REMATCH
|
||||
#
|
||||
# Decide which format it is depending on whether element 2 of
|
||||
# BASH_REMATCH is zero length
|
||||
#
|
||||
if [[ -z ${BASH_REMATCH[2]} ]]; then
|
||||
# Type 2
|
||||
name="${BASH_REMATCH[3]}"
|
||||
email="${BASH_REMATCH[4]}"
|
||||
else
|
||||
# Type 1
|
||||
name=
|
||||
email="${BASH_REMATCH[2]}"
|
||||
fi
|
||||
echo "Name: $name"
|
||||
echo "Email: $email"
|
||||
else
|
||||
echo "Not recognised: $line"
|
||||
fi
|
||||
echo
|
||||
done < "$data"
|
||||
14
www/eps/hpr2679/hpr2679_bash13_ex4.txt
Executable file
14
www/eps/hpr2679/hpr2679_bash13_ex4.txt
Executable file
@@ -0,0 +1,14 @@
|
||||
A Feldspar <afeldspar@yahoo.ca>
|
||||
mcrawfor@live.com
|
||||
.42@unknown.mars
|
||||
Joel W <joelw@comcast.net>
|
||||
tokuhirom@mac.com
|
||||
kramulous@sbcglobal.net
|
||||
kawasaki@me.com
|
||||
S Meir <smeier@yahoo.com>
|
||||
G Flake <flakeg@comcast.net>
|
||||
R.A.Mollin <ramollin@optonline.net>
|
||||
geekoid@sbcglobal.net
|
||||
vim_use@googlegroups.com
|
||||
vim@vim.org
|
||||
B@tm@n <batty@bat.cave>
|
||||
342
www/eps/hpr2679/hpr2679_full_shownotes.html
Executable file
342
www/eps/hpr2679/hpr2679_full_shownotes.html
Executable file
@@ -0,0 +1,342 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="generator" content="pandoc">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||||
<meta name="author" content="Dave Morriss">
|
||||
<title>Extra ancillary Bash tips - 13 (HPR Show 2679)</title>
|
||||
<style type="text/css">code{white-space: pre;}</style>
|
||||
<!--[if lt IE 9]>
|
||||
<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
|
||||
<![endif]-->
|
||||
<link rel="stylesheet" href="http://hackerpublicradio.org/css/hpr.css">
|
||||
</head>
|
||||
|
||||
<body id="home">
|
||||
<div id="container" class="shadow">
|
||||
<header>
|
||||
<h1 class="title">Extra ancillary Bash tips - 13 (HPR Show 2679)</h1>
|
||||
<h2 class="author">Dave Morriss</h2>
|
||||
<hr/>
|
||||
</header>
|
||||
|
||||
<main id="maincontent">
|
||||
<article>
|
||||
<header>
|
||||
<h1>Table of Contents</h1>
|
||||
<nav id="TOC">
|
||||
<ul>
|
||||
<li><a href="#making-decisions-in-bash">Making decisions in Bash</a></li>
|
||||
<li><a href="#capture-groups">Capture groups</a><ul>
|
||||
<li><a href="#bash_rematch"><code>BASH_REMATCH</code></a></li>
|
||||
</ul></li>
|
||||
<li><a href="#examples">Examples</a><ul>
|
||||
<li><a href="#example-1">Example 1</a></li>
|
||||
<li><a href="#example-2">Example 2</a></li>
|
||||
<li><a href="#example-3">Example 3</a></li>
|
||||
</ul></li>
|
||||
<li><a href="#links">Links</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</header>
|
||||
<h2 id="making-decisions-in-bash">Making decisions in Bash</h2>
|
||||
<p>This is the thirteenth episode in the <em>Bash Tips</em> sub-series. It is the fifth and final of a group of shows about making decisions in Bash.</p>
|
||||
<p>In the last four episodes we saw the types of test Bash provides, and we looked briefly at some of the commands that use these tests. We looked at conditional expressions and all of the operators Bash provides to do this. We concentrated particularly on string comparisons which use <em>glob</em> and <em>extended glob</em> patterns then we devoted an episode to Bash <em>regular expressions</em>.</p>
|
||||
<p>Now we want to look at the final topic within regular expressions, the use of <em>capture groups</em>.</p>
|
||||
<h2 id="capture-groups">Capture groups</h2>
|
||||
<p>If you have followed the series on <code>sed</code> or the one covering the <code>awk</code> language the existence of capture groups will not be a surprise to you. It’s a way in which you can group elements of a regular expression using parentheses to denote a component of the string being compared.</p>
|
||||
<p>For example you might want to look for three-word sentences:</p>
|
||||
<pre><code>re='^([a-zA-Z]+) +([a-zA-Z]+) +([a-zA-Z]+) *\.?'</code></pre>
|
||||
<ul>
|
||||
<li>There are three groups. They consist of <code>([a-zA-Z]+)</code> meaning one or more alphabetic characters.</li>
|
||||
<li>The characters of each word are followed by one or more spaces (<code>' +'</code>) in the first and second cases. The third case is followed by zero or more spaces and an optional full-stop.</li>
|
||||
<li>The entire regular expression is anchored to the start of the string.</li>
|
||||
<li>Only the words themselves are being captured by being in groups, not the intervening spaces.</li>
|
||||
</ul>
|
||||
<p>We will look at a script that uses this regular expression soon.</p>
|
||||
<h3 id="bash_rematch"><code>BASH_REMATCH</code></h3>
|
||||
<p>Bash uses an internal read-only array called <code>BASH_REMATCH</code> to hold what is matched by a regular expression. The zeroth element of the array holds what the entire regular expression has matched, and the rest hold what was matched by any capture groups in the regular expression.</p>
|
||||
<p>Like other regular expression systems each capture group is numbered in order of occurrence, so element 1 of <code>BASH_REMATCH</code> contains the first, element 2 the second and so forth.</p>
|
||||
<p>In <code>sed</code> is is possible to refer to a capture group with a sequence such as <code>'\1'</code>, allowing regular expressions themselves to repeat parts such as <code>'\(cat\)\1'</code>. This is shown by the following <code>sed</code> example:</p>
|
||||
<pre><code>$ echo "catcat" | sed -e 's/\(cat\)\1/match/'
|
||||
match</code></pre>
|
||||
<p>Sadly this is apparently not available in Bash – or at least nothing is documented as far as I can find. (There are references to a partial implementation, but this doesn’t seem to be something to rely on).</p>
|
||||
<p>See the <a href="#example-2">example 2</a> below for some experiments with this.</p>
|
||||
<p>The following downloadable example <a href="hpr2679_bash13_ex1.sh">bash13_ex1.sh</a> demonstrates the use of <code>BASH_REMATCH</code>:</p>
|
||||
<pre><code>$ cat bash13_ex1.sh
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Three word regular expression
|
||||
#
|
||||
re='^([a-zA-Z]+) +([a-zA-Z]+) +([a-zA-Z]+) *\.?'
|
||||
|
||||
#
|
||||
# A sentence is expected as the only argument
|
||||
#
|
||||
if [[ $# -ne 1 ]]; then
|
||||
echo "Usage: $0 sentence"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Sentence: $1"
|
||||
if [[ $1 =~ $re ]]; then
|
||||
echo "Matched"
|
||||
for i in {0..3}; do
|
||||
printf '%2d %s\n' $i "${BASH_REMATCH[$i]}"
|
||||
done
|
||||
fi
|
||||
</code></pre>
|
||||
<p>This uses the regular expression discussed above in an <code>if</code> command. If the regular expression matches then a message is output and in a <code>for</code> loop the elements of <code>BASH_REMATCH</code> are printed with the index.</p>
|
||||
<pre><code>$ ./bash13_ex1.sh 'Aardvarks eat ants.'
|
||||
Sentence: Aardvarks eat ants.
|
||||
Matched
|
||||
0 Aardvarks eat ants.
|
||||
1 Aardvarks
|
||||
2 eat
|
||||
3 ants</code></pre>
|
||||
<p>Note that you cannot rewrite the regular expression using repetition with expectation that the capture groups will behave as the explicit form:</p>
|
||||
<pre><code>re='^(([a-zA-Z]+) *){3}\.?'</code></pre>
|
||||
<p>There is only one capture group here, which is applied three times. The result is that the regular expression matches and <code>BASH_REMATCH[0]</code> contains the whole matched string but elements 1 and 2 will contain the last matching word:</p>
|
||||
<pre><code> 0 Aardvarks eat ants.
|
||||
1 ants
|
||||
2 ants</code></pre>
|
||||
<h2 id="examples">Examples</h2>
|
||||
<h3 id="example-1">Example 1</h3>
|
||||
<p>In this example we enhance Example 4 from the last episode which checks an IP address for validity.</p>
|
||||
<p>The example (<a href="hpr2679_bash13_ex2.sh">bash13_ex2.sh</a>) is downloadable from the HPR site.</p>
|
||||
<pre><code>$ cat bash13_ex2.sh
|
||||
#!/bin/bash
|
||||
|
||||
# =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~
|
||||
# IP Address parsing revisited
|
||||
# =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~
|
||||
#
|
||||
# An IP address looks like this:
|
||||
# 192.168.0.5
|
||||
# Four groups of 1-3 numbers in the range 0..255 separated by dots.
|
||||
#
|
||||
re='^([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})$'
|
||||
|
||||
#
|
||||
# The address is expected as the only argument
|
||||
#
|
||||
if [[ $# -ne 1 ]]; then
|
||||
echo "Usage: $0 IP_address"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
#
|
||||
# Validate against the regex
|
||||
#
|
||||
if [[ $1 =~ $re ]]; then
|
||||
#
|
||||
# Look at the components and check they are all in range
|
||||
#
|
||||
errs=0
|
||||
problems=
|
||||
for i in {1..4}; do
|
||||
d="${BASH_REMATCH[$i]}"
|
||||
if [[ $d -lt 0 || $d -gt 255 ]]; then
|
||||
((errs++))
|
||||
problems+="$d "
|
||||
fi
|
||||
done
|
||||
|
||||
#
|
||||
# Report any problems found
|
||||
#
|
||||
if [[ $errs -gt 0 ]]; then
|
||||
problems="${problems:0:-1}"
|
||||
echo "$1 is not a valid IP address; contains ${problems// /, }"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$1 is a valid IP address"
|
||||
else
|
||||
echo "$1 is not a valid IP address"
|
||||
fi</code></pre>
|
||||
<p>The regular expression in this case is:</p>
|
||||
<pre><code>^([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})$</code></pre>
|
||||
<p>Note how each group of digits is in parentheses making it a capture group. The intervening dots (<code>'.'</code>) are outside the groups.</p>
|
||||
<p>The loop which checks each group steps a value from 1 to 4, saving each element of <code>BASH_REMATCH</code> in a variable <code>'d'</code> for convenience. If there is an error with a value lower than 0 or greater than 255 a variable <code>'errs'</code> is incremented and the failing number is appended to the variable <code>'problems'</code>.</p>
|
||||
<p>The error count is checked once the loop has completed and if greater than zero an error message is produced with the list of problem numbers and the script exits with a <em>false</em> value.</p>
|
||||
<p>Note that <code>'problems="${problems:0:-1}"'</code> removes the last character (a trailing space) from the variable. Also <code>'${problems// /, }'</code> replaces all spaces in the string with a comma and a space to make a readable list.</p>
|
||||
<p>Examples of running the script:</p>
|
||||
<pre><code>$ ./bash13_ex2.sh 192.168.0.
|
||||
192.168.0. is not a valid IP address
|
||||
|
||||
$ ./bash13_ex2.sh 192.168.0.5
|
||||
192.168.0.5 is a valid IP address
|
||||
|
||||
$ ./bash13_ex2.sh 192.168.500.256
|
||||
192.168.500.256 is not a valid IP address; contains 500, 256</code></pre>
|
||||
<h3 id="example-2">Example 2</h3>
|
||||
<p>Although I could not find any official documentation about back references in Bash regular expressions there does seem to be something in the version I am using. This example demonstrates the use of this feature in a simple way.</p>
|
||||
<p>A back reference consist of a backslash (<code>'\'</code>) and a number. The number refers to the capture group, counting from the left of the regular expression.</p>
|
||||
<p>It looks, after testing, as if only a single digit is catered for, so this means capture groups 1-9.</p>
|
||||
<p>This example is downloadable as usual: <a href="hpr2679_bash13_ex3.sh">bash13_ex3.sh</a></p>
|
||||
<pre><code>$ cat bash13_ex3.sh
|
||||
#!/bin/bash
|
||||
|
||||
# =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~
|
||||
# Experimenting with backreferences in Bash regular expressions
|
||||
# =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~ =~
|
||||
|
||||
re='(\<.{1,10}\>) \1'
|
||||
|
||||
if [[ $1 =~ $re ]]; then
|
||||
echo "Matched: $1"
|
||||
else
|
||||
echo "No match: $1"
|
||||
fi
|
||||
</code></pre>
|
||||
<p>The regular expression matches a 1-10 letter word followed by a space and the same word already captured.</p>
|
||||
<pre><code>$ ./bash13_ex3.sh 'turnip turnip'
|
||||
Matched: turnip turnip</code></pre>
|
||||
<h3 id="example-3">Example 3</h3>
|
||||
<p>This is a moderately complex example which tries to parse a file of email addresses. The format of email addresses is quite complex, and this script does not try to be comprehensive in what it does. A Bash script is not the best way to perform this validation but it should be of interest nevertheless.</p>
|
||||
<p>The formats catered for are:</p>
|
||||
<ul>
|
||||
<li><em>local-part</em>@<em>domain</em> – such as <code>'vim@vim.org'</code></li>
|
||||
<li><em>name</em> <<em>local-part</em>@<em>domain</em>> – such as <code>'HPR List <hpr@hackerpublicradio.org>'</code></li>
|
||||
</ul>
|
||||
<p>There are others, but these are the ones most likely to be encountered.</p>
|
||||
<p>This downloadable example (<a href="hpr2679_bash13_ex4.sh">bash13_ex4.sh</a>) reads data from a file (<a href="hpr2679_bash13_ex4.txt">bash13_ex4.txt</a>) which is also downloadable.</p>
|
||||
<pre><code>$ cat bash13_ex4.sh
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Check that the data file exists
|
||||
#
|
||||
data="bash13_ex4.txt"
|
||||
[ -e "$data" ] || { echo "File $data not found"; exit 1; }
|
||||
|
||||
#
|
||||
# Email addresses can be:
|
||||
# 1. local-part@domain
|
||||
# 2. Name <local-part@domain>
|
||||
#
|
||||
part1='([a-zA-Z0-9_][a-zA-Z0-9_.]+@[a-zA-Z0-9.-]+)'
|
||||
part2='([^<]+)<([a-zA-Z0-9_][a-zA-Z0-9_.]+@[a-zA-Z0-9.-]+)>'
|
||||
re="^($part1|$part2)$"
|
||||
|
||||
#
|
||||
# Read and check each line from the file
|
||||
#
|
||||
while read -r line; do
|
||||
#
|
||||
# Does it match the regular expression?
|
||||
#
|
||||
if [[ $line =~ $re ]]; then
|
||||
#declare -p BASH_REMATCH
|
||||
#
|
||||
# Decide which format it is depending on whether element 2 of
|
||||
# BASH_REMATCH is zero length
|
||||
#
|
||||
if [[ -z ${BASH_REMATCH[2]} ]]; then
|
||||
# Type 2
|
||||
name="${BASH_REMATCH[3]}"
|
||||
email="${BASH_REMATCH[4]}"
|
||||
else
|
||||
# Type 1
|
||||
name=
|
||||
email="${BASH_REMATCH[2]}"
|
||||
fi
|
||||
echo "Name: $name"
|
||||
echo "Email: $email"
|
||||
else
|
||||
echo "Not recognised: $line"
|
||||
fi
|
||||
echo
|
||||
done < "$data"</code></pre>
|
||||
<p>This script uses a single regular expression to match either of the formats. For convenience, because it is so long, I have build the variable <code>'re'</code> from the two variables <code>'part1'</code> and <code>'part2'</code>. The two alternative regular expressions are enclosed in parentheses, and separated by a vertical bar <code>'|'</code>. The entire thing is anchored at the start and end of the string. The sub-expressions are:</p>
|
||||
<pre><code>([a-zA-Z0-9_][a-zA-Z0-9_.]+@[a-zA-Z0-9.-]+)
|
||||
([^<]+)<(([a-zA-Z0-9_][a-zA-Z0-9_.]+@[a-zA-Z0-9.-]+))></code></pre>
|
||||
<p>The first one matches the <em>local-part</em>@<em>domain</em> format and the second matches <em>name</em> <<em>local-part</em>@<em>domain</em>. Let’s examine them both in detail:</p>
|
||||
<p><code>([a-zA-Z0-9_][a-zA-Z0-9_.]+@[a-zA-Z0-9.-]+)</code></p>
|
||||
<ul>
|
||||
<li>The first square bracketed part matches any letter, any digit or an underscore. This is because the <em>local-part</em> cannot begin with a dot <code>'.'</code>.</li>
|
||||
<li>The second square bracketed part matches the rest of the <em>local-part</em>. In real life many more characters are allowed, but we’re keeping it simpler here.</li>
|
||||
<li>This is followed by a <code>'@'</code> symbol and the final square bracketed part that matches the <em>domain</em>.</li>
|
||||
<li>The entire sub-expression is enclosed in parentheses as a capture group.</li>
|
||||
</ul>
|
||||
<p><code>([^<]+)<(([a-zA-Z0-9_][a-zA-Z0-9_.]+@[a-zA-Z0-9.-]+))></code></p>
|
||||
<ul>
|
||||
<li>Here there are two capture groups. The first contains a square bracketed expression which defines any character that is <u>not</u> a less than sign (<code>'<'</code>). The modifier is a plus sign meaning one to any number of these characters.</li>
|
||||
<li>Between the groups is a less than symbol which we don’t want to capture.</li>
|
||||
<li>The second group is the same as the first sub-expression, and is followed by a greater than sign (<code>'>'</code>).</li>
|
||||
</ul>
|
||||
<p>A <code>while</code> loop with a <code>read</code> command is used to read from the data file which was defined earlier in the script and its existence verified.</p>
|
||||
<p>Inside the loop the regular expression is compared with the line just read from the file. If it doesn’t match then the line is reported as not recognised. If it matches then the script can collect the elements from the <code>BASH_REMATCH</code> array and report them.</p>
|
||||
<p>Because the regular expression is complex the way in which the important capture groups are written to <code>BASH_REMATCH</code> differs according to which sub-expression matched. The script contains a <code>declare -p</code> command which is commented out. Removing the <code>'#'</code> from this activates it; it is a way of displaying the attributes and contents of an array in Bash (as a command which could be used to build the array).</p>
|
||||
<p>Doing this and looking at what happens when the script encounters addresses of the two types shows the following type of thing:</p>
|
||||
<pre><code>declare -ar BASH_REMATCH=([0]="kawasaki@me.com" [1]="kawasaki@me.com" [2]="kawasaki@me.com" [3]="" [4]="")
|
||||
Name:
|
||||
Email: kawasaki@me.com
|
||||
|
||||
declare -ar BASH_REMATCH=([0]="S Meir <smeier@yahoo.com>" [1]="S Meir <smeier@yahoo.com>" [2]="" [3]="S Meir " [4]="smeier@yahoo.com")
|
||||
Name: S Meir
|
||||
Email: smeier@yahoo.com</code></pre>
|
||||
<p>The first address <code>kawasaki@me.com</code> matches the first sub-expression.</p>
|
||||
<ul>
|
||||
<li>Remember that element zero of <code>BASH_REMATCH</code> contains everything matched by the regular expression, so we can ignore that.</li>
|
||||
<li>Element one also matches everything because we have created an extra capture group by enclosing the two alternative sub-expressions in parentheses. This can also be ignored.</li>
|
||||
<li>If the address matches the first sub-expression it will be written to the second element of <code>BASH_REMATCH</code> because this is the second capture group.</li>
|
||||
<li>The third and fourth capture groups in the second sub-expression are not matched in this case so these elements of <code>BASH_REMATCH</code> are empty.</li>
|
||||
</ul>
|
||||
<p>The second address <code>S Meir <smeier@yahoo.com></code> matches the second sub-expression in the regular expression.</p>
|
||||
<ul>
|
||||
<li>We can ignore <code>BASH_REMATCH</code> elements zero and one for the same reason as before.</li>
|
||||
<li>Element 2 is empty because the address does not match the second capture group.</li>
|
||||
<li>Elements three and four match the third and fourth capture groups.</li>
|
||||
</ul>
|
||||
<p>The script uses the fact that element two of <code>BASH_REMATCH</code> is zero length (<code>'-z'</code>) to determine which type of address was matched and to report the name and email address details accordingly.</p>
|
||||
<p>Here is an excerpt from what is displayed when the script is run (with the <code>declare</code> command commented out):</p>
|
||||
<pre><code>Name: A Feldspar
|
||||
Email: afeldspar@yahoo.ca
|
||||
|
||||
Name:
|
||||
Email: mcrawfor@live.com
|
||||
|
||||
Not recognised: .42@unknown.mars
|
||||
...</code></pre>
|
||||
<p>Note: these are dummy addresses.</p>
|
||||
<h2 id="links">Links</h2>
|
||||
<ul>
|
||||
<li><a href="https://www.gnu.org/software/bash/manual/bash.html">"<em>GNU BASH Reference Manual</em>"</a>
|
||||
<ul>
|
||||
<li><a href="https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs">"Bash Conditional Constructs"</a></li>
|
||||
<li><a href="https://www.gnu.org/software/bash/manual/bash.html#Bash-Conditional-Expressions">"Bash Conditional Expressions"</a></li>
|
||||
<li><a href="https://www.gnu.org/software/bash/manual/bash.html#Bourne-Shell-Builtins">"Bourne Shell Builtins"</a></li>
|
||||
<li><a href="https://www.gnu.org/software/bash/manual/bash.html#The-Set-Builtin">"The <code>set</code> Builtin"</a></li>
|
||||
<li><a href="https://www.gnu.org/software/bash/manual/bash.html#Pattern-Matching">"Bash Pattern Matching"</a></li>
|
||||
</ul></li>
|
||||
<li><p><a href="http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html">"<em>POSIX Shell Command Language</em>"</a> - documentation of all of the POSIX features mentioned in this series.</p></li>
|
||||
<li><p><a href="http://hackerpublicradio.org/series.php?id=42">HPR series: <em>Bash Scripting</em></a></p></li>
|
||||
<li>Previous episodes under the heading <em>Bash Tips</em>:
|
||||
<ol>
|
||||
<li><a href="http://hackerpublicradio.org/eps/hpr1648">HPR episode 1648 "<em>Bash parameter manipulation</em>"</a></li>
|
||||
<li><a href="http://hackerpublicradio.org/eps/hpr1843">HPR episode 1843 "<em>Some Bash tips</em>"</a></li>
|
||||
<li><a href="http://hackerpublicradio.org/eps/hpr1884">HPR episode 1884 "<em>Some more Bash tips</em>"</a></li>
|
||||
<li><a href="http://hackerpublicradio.org/eps/hpr1903">HPR episode 1903 "<em>Some further Bash tips</em>"</a></li>
|
||||
<li><a href="http://hackerpublicradio.org/eps/hpr1951">HPR episode 1951 "<em>Some additional Bash tips</em>"</a></li>
|
||||
<li><a href="http://hackerpublicradio.org/eps/hpr2045">HPR episode 2045 "<em>Some other Bash tips</em>"</a></li>
|
||||
<li><a href="http://hackerpublicradio.org/eps/hpr2278">HPR episode 2278 "<em>Some supplementary Bash tips</em>"</a></li>
|
||||
<li><a href="http://hackerpublicradio.org/eps/hpr2293">HPR episode 2293 "<em>More supplementary Bash tips</em>"</a></li>
|
||||
<li><a href="http://hackerpublicradio.org/eps/hpr2639">HPR episode 2639 "<em>Some ancillary Bash tips - 9</em>"</a></li>
|
||||
<li><a href="http://hackerpublicradio.org/eps/hpr2649">HPR episode 2649 "<em>More ancillary Bash tips - 10</em>"</a></li>
|
||||
<li><a href="http://hackerpublicradio.org/eps/hpr2659">HPR episode 2659 "<em>Further ancillary Bash tips - 11</em>"</a></li>
|
||||
<li><a href="http://hackerpublicradio.org/eps/hpr2669">HPR episode 2669 "<em>Additional ancillary Bash tips - 12</em>"</a></li>
|
||||
</ol></li>
|
||||
<li>Resources:
|
||||
<ul>
|
||||
<li>Examples: <a href="hpr2679_bash13_ex1.sh">bash13_ex1.sh</a>, <a href="hpr2679_bash13_ex2.sh">bash13_ex2.sh</a>, <a href="hpr2679_bash13_ex3.sh">bash13_ex3.sh</a>, <a href="hpr2679_bash13_ex4.sh">bash13_ex4.sh</a>, <a href="hpr2679_bash13_ex4.txt">bash13_ex4.txt</a></li>
|
||||
</ul></li>
|
||||
</ul>
|
||||
</article>
|
||||
</main>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user