codekasten/osm/qa/sparsely_mapped/generate_unmapped_places.sh

#!/bin/sh
#
# Add a calculation of "inhabitants per node" to Gary68's quality evaluation
# data.
# See http://wiki.openstreetmap.org/wiki/Mapping_Quality
#
# Copyright 2009 Lars Kruse
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; version 3 dated June, 2007.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program;  if not, write to the Free Software
#  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#

# original data
QUALITY_URL="http://www.gary68.de/osm/qa/unmapped/mecklenburg-vorpommern.htm"
# where to get the number of inhabitants
WIKIPEDIA_URL="http://de.wikipedia.org/wiki"
PLACE_TYPE="(town|city)"
# include this header file before the content
HEADER_TEMPLATE_FILE="$(dirname "$0")/mv_sparsely_mapped_towns_header.template"
# some wikipedia pages have different names
# remove ALL non-letters (7-bit) characters from the _first_ column
PLACE_NAME_MAPPING="Feldberg	Feldberger_Seenlandschaft
BoizenburgElbe	Boizenburg
Dmitz	D%C3%B6mitz
Friedland	Friedland_(Mecklenburg)
FrstenbergHavel	F%C3%BCrstenberg/Havel
Goldberg	Goldberg_(Mecklenburg)
Malchow	Malchow_(Mecklenburg)
OstseebadRerik	Rerik
RbelMritz	R%C3%B6bel
Schnberg	Sch%C3%B6nberg_(Mecklenburg)
Strasburg	Strasburg_(Uckermark)
Tessin	Tessin_(bei_Rostock)
Wesenberg	Wesenberg_(Mecklenburg)
Zarrentin	Amt_Zarrentin"


get_place_data() {
	wget --quiet --output-document - "$QUALITY_URL" \
			| sed -n "1,/Details all information/p" \
			| grep -E -A 4 -B 2 "^<td>$PLACE_TYPE</td>$" \
			| grep -v "^--$"
}


get_place_size() {
	# call "zcat -f" to uncompress a page if necessary
	wget --quiet --output-document - "$WIKIPEDIA_URL/$1" \
			| zcat -f \
			| grep -A 1 "^<td>Einwohner:</td>$" \
			| tail -1 | cut -d ">" -f 2 | cut -d "<" -f 1 \
			| sed s/[^0-9]//g
}


normalize_place_name() {
	local simple_name="$(echo "$1" | sed s/[^a-zA-Z]//g)"
	local new_name="$(echo "$PLACE_NAME_MAPPING" | grep "^$simple_name" | cut -f 2)"
	if test -z "$new_name"
	  then	echo "$1"
	  else	echo "$new_name"
	 fi
}


get_place_inhabitants() {
	local previous_line
	local place_name
	local place_size
	local place_nodes
	while read line
	  do	if echo "$line" | grep -q -E "<td>$PLACE_TYPE</td>"
			  then	place_name="$(echo "$previous_line" | cut -d ">" -f 2 | cut -d "<" -f 1)"
					place_name="$(normalize_place_name "$place_name")"
					place_size="$(get_place_size "$place_name")"
			  else	if echo "$line" | grep -q "^</tr>$"
					  then	echo "<td>$place_size</td>"
							place_nodes="$(echo "$previous_line" | cut -d ">" -f 2 | cut -d "<" -f 1)"
							echo "<td>$(echo "scale=2;$place_size/$place_nodes" | bc)</td>"
					 fi
			 fi
			echo "$line"
			previous_line="$line"
	 done
}


# we need the default locales - otherwise 'sed' will not filter umlauts
export LANG=

sed "s/__DATE__/$(date +%d.%m.%Y)/" "$HEADER_TEMPLATE_FILE"
echo '<table border="1">'
echo '<tr><th>Ort</th><th>Kategory</th><th></th><th>Qualit&auml;t</th><th>Anzahl der Nodes</th><th>Einwohner</th><th>Einwohner pro Node</th></tr>'
get_place_data | get_place_inhabitants
echo '</table></body></html>'