lars
ead2c41bb3
* fixed parsing of number of inhabitants with html entities * fixed wikipedia name of Grabow
109 lines
3.5 KiB
Bash
Executable file
109 lines
3.5 KiB
Bash
Executable file
#!/bin/sh
|
|
#
|
|
# Add a calculation of "inhabitants per node" to Gary68's quality evaluation
|
|
# data.
|
|
# See http://wiki.openstreetmap.org/wiki/Mapping_Quality
|
|
#
|
|
# Copyright 2009 Lars Kruse
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; version 3 dated June, 2007.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
|
|
# MA 02110-1301, USA.
|
|
#
|
|
|
|
# original data
|
|
QUALITY_URL="http://www.gary68.de/osm/qa/unmapped/mecklenburg-vorpommern.htm"
|
|
# where to get the number of inhabitants
|
|
WIKIPEDIA_URL="http://de.wikipedia.org/wiki"
|
|
PLACE_TYPE="(town|city)"
|
|
# include this header file before the content
|
|
HEADER_TEMPLATE_FILE="$(dirname "$0")/mv_sparsely_mapped_towns_header.template"
|
|
# some wikipedia pages have different names
|
|
# remove ALL non-letters (7-bit) characters from the _first_ column
|
|
PLACE_NAME_MAPPING="Feldberg Feldberger_Seenlandschaft
|
|
BoizenburgElbe Boizenburg
|
|
Dmitz D%C3%B6mitz
|
|
Friedland Friedland_(Mecklenburg)
|
|
FrstenbergHavel F%C3%BCrstenberg/Havel
|
|
Goldberg Goldberg_(Mecklenburg)
|
|
Grabow Grabow_(Elde)
|
|
Malchow Malchow_(Mecklenburg)
|
|
OstseebadRerik Rerik
|
|
RbelMritz R%C3%B6bel
|
|
Schnberg Sch%C3%B6nberg_(Mecklenburg)
|
|
Strasburg Strasburg_(Uckermark)
|
|
Tessin Tessin_(bei_Rostock)
|
|
Wesenberg Wesenberg_(Mecklenburg)
|
|
Zarrentin Amt_Zarrentin"
|
|
|
|
|
|
get_place_data() {
|
|
wget --quiet --output-document - "$QUALITY_URL" \
|
|
| sed -n "1,/Details all information/p" \
|
|
| grep -E -A 4 -B 2 "^<td>$PLACE_TYPE</td>$" \
|
|
| grep -v "^--$"
|
|
}
|
|
|
|
|
|
get_place_size() {
|
|
# call "zcat -f" to uncompress a page if necessary
|
|
# ignore stuff like " " (space) within the number
|
|
wget --quiet --output-document - "$WIKIPEDIA_URL/$1" \
|
|
| zcat -f \
|
|
| grep -A 1 "^<td>Einwohner:</td>$" \
|
|
| tail -1 | cut -d ">" -f 2 | cut -d "<" -f 1 \
|
|
| sed -r 's/&#[0-9]{3};//g' | sed s/[^0-9]//g
|
|
}
|
|
|
|
|
|
normalize_place_name() {
|
|
local simple_name="$(echo "$1" | sed s/[^a-zA-Z]//g)"
|
|
local new_name="$(echo "$PLACE_NAME_MAPPING" | grep "^$simple_name" | cut -f 2)"
|
|
if test -z "$new_name"
|
|
then echo "$1"
|
|
else echo "$new_name"
|
|
fi
|
|
}
|
|
|
|
|
|
get_place_inhabitants() {
|
|
local previous_line
|
|
local place_name
|
|
local place_size
|
|
local place_nodes
|
|
while read line
|
|
do if echo "$line" | grep -q -E "<td>$PLACE_TYPE</td>"
|
|
then place_name="$(echo "$previous_line" | cut -d ">" -f 2 | cut -d "<" -f 1)"
|
|
place_name="$(normalize_place_name "$place_name")"
|
|
place_size="$(get_place_size "$place_name")"
|
|
else if echo "$line" | grep -q "^</tr>$"
|
|
then echo "<td>$place_size</td>"
|
|
place_nodes="$(echo "$previous_line" | cut -d ">" -f 2 | cut -d "<" -f 1)"
|
|
echo "<td>$(echo "scale=2;$place_size/$place_nodes" | bc)</td>"
|
|
fi
|
|
fi
|
|
echo "$line"
|
|
previous_line="$line"
|
|
done
|
|
}
|
|
|
|
|
|
# we need the default locales - otherwise 'sed' will not filter umlauts
|
|
export LANG=
|
|
|
|
sed "s/__DATE__/$(date +%d.%m.%Y)/" "$HEADER_TEMPLATE_FILE"
|
|
echo '<table border="1">'
|
|
echo '<tr><th>Ort</th><th>Kategory</th><th></th><th>Qualität</th><th>Anzahl der Nodes</th><th>Einwohner</th><th>Einwohner pro Node</th></tr>'
|
|
get_place_data | get_place_inhabitants
|
|
echo '</table></body></html>'
|
|
|