#!/bin/sh
# original data
QUALITY_URL="http://www.gary68.de/osm/qa/unmapped/mecklenburg-vorpommern.htm"
# where to get the number of inhabitants
WIKIPEDIA_URL="http://de.wikipedia.org/wiki"
PLACE_TYPE="(town|city)"
# some wikipedia pages have different names
# remove ALL non-letters (7-bit) characters from the _first_ column
PLACE_NAME_MAPPING="Feldberg Feldberger_Seenlandschaft
BoizenburgElbe Boizenburg
FrstenbergHavel F%C3%BCrstenberg/Havel
Goldberg Goldberg_(Mecklenburg)
Malchow Malchow_(Mecklenburg)
OstseebadRerik Rerik
RbelMritz Röbel
Strasburg Strasburg_(Uckermark)
Tessin Tessin_(bei_Rostock)
Wesenberg Wesenberg_(Mecklenburg)
Zarrentin Amt_Zarrentin"
get_place_data() {
echo '
'
wget --quiet --output-document - "$QUALITY_URL" \
| sed -n "1,/Details all information/p" \
| grep -E -A 4 -B 2 "^$PLACE_TYPE | $" \
| grep -v "^--$"
echo '
'
}
get_place_size() {
wget --quiet --output-document - "$WIKIPEDIA_URL/$1" \
| grep -A 1 "^Einwohner: | $" \
| tail -1 | cut -d ">" -f 2 | cut -d "<" -f 1 \
| sed s/[^0-9]//g
}
normalize_place_name() {
local simple_name="$(echo "$1" | sed s/[^a-zA-Z]//g)"
local new_name="$(echo "$PLACE_NAME_MAPPING" | grep "^$simple_name" | cut -f 2)"
if test -z "$new_name"
then echo "$1"
else echo "$new_name"
fi
}
get_place_inhabitants() {
local previous_line
local place_name
local place_size
local place_nodes
while read line
do if echo "$line" | grep -q -E "$PLACE_TYPE | "
then place_name="$(echo "$previous_line" | cut -d ">" -f 2 | cut -d "<" -f 1)"
place_name="$(normalize_place_name "$place_name")"
place_size="$(get_place_size "$place_name")"
else if echo "$line" | grep -q "^$"
then echo "$place_size | "
place_nodes="$(echo "$previous_line" | cut -d ">" -f 2 | cut -d "<" -f 1)"
echo "$(echo "scale=2;$place_size/$place_nodes" | bc) | "
fi
fi
echo "$line"
previous_line="$line"
done
}
# we need the default locales - otherwise 'sed' will not filter umlauts
export LANG=
get_place_data | get_place_inhabitants