OSM scripts:

* moved the bulk mailing script to a different location * added a script to improve the output of the OSM-QA data for MV
2009-01-06 10:46:00 +00:00 · 2009-01-06 10:46:00 +00:00 · f3c33396cc
commit f3c33396cc
parent c61ecb1c0e
6 changed files with 211 additions and 0 deletions
--- a/osm/qa/generate_unmapped_places.sh
+++ b/osm/qa/generate_unmapped_places.sh
@ -0,0 +1,77 @@
+#!/bin/sh
+
+# original data
+QUALITY_URL="http://www.gary68.de/osm/qa/unmapped/mecklenburg-vorpommern.htm"
+# where to get the number of inhabitants
+WIKIPEDIA_URL="http://de.wikipedia.org/wiki"
+PLACE_TYPE="(town|city)"
+# some wikipedia pages have different names
+# remove ALL non-letters (7-bit) characters from the _first_ column
+PLACE_NAME_MAPPING="Feldberg	Feldberger_Seenlandschaft
+BoizenburgElbe	Boizenburg
+FrstenbergHavel	F%C3%BCrstenberg/Havel
+Goldberg	Goldberg_(Mecklenburg)
+Malchow	Malchow_(Mecklenburg)
+OstseebadRerik	Rerik
+RbelMritz	Röbel
+Strasburg	Strasburg_(Uckermark)
+Tessin	Tessin_(bei_Rostock)
+Wesenberg	Wesenberg_(Mecklenburg)
+Zarrentin	Amt_Zarrentin"
+
+
+get_place_data() {
+	echo '<html><body><table border="1">'
+	wget --quiet --output-document - "$QUALITY_URL" \
+			| sed -n "1,/Details all information/p" \
+			| grep -E -A 4 -B 2 "^<td>$PLACE_TYPE</td>$" \
+			| grep -v "^--$"
+	echo '</table></body></html>'
+}
+
+
+get_place_size() {
+	wget --quiet --output-document - "$WIKIPEDIA_URL/$1" \
+			| grep -A 1 "^<td>Einwohner:</td>$" \
+			| tail -1 | cut -d ">" -f 2 | cut -d "<" -f 1 \
+			| sed s/[^0-9]//g
+}
+
+
+normalize_place_name() {
+	local simple_name="$(echo "$1" | sed s/[^a-zA-Z]//g)"
+	local new_name="$(echo "$PLACE_NAME_MAPPING" | grep "^$simple_name" | cut -f 2)"
+	if test -z "$new_name"
+	  then	echo "$1"
+	  else	echo "$new_name"
+	 fi
+}
+
+
+get_place_inhabitants() {
+	local previous_line
+	local place_name
+	local place_size
+	local place_nodes
+	while read line
+	  do	if echo "$line" | grep -q -E "<td>$PLACE_TYPE</td>"
+			  then	place_name="$(echo "$previous_line" | cut -d ">" -f 2 | cut -d "<" -f 1)"
+					place_name="$(normalize_place_name "$place_name")"
+					place_size="$(get_place_size "$place_name")"
+			  else	if echo "$line" | grep -q "^</tr>$"
+					  then	echo "<td>$place_size</td>"
+							place_nodes="$(echo "$previous_line" | cut -d ">" -f 2 | cut -d "<" -f 1)"
+							echo "<td>$(echo "scale=2;$place_size/$place_nodes" | bc)</td>"
+					 fi
+			 fi
+			echo "$line"
+			previous_line="$line"
+	 done
+}
+
+
+# we need the default locales - otherwise 'sed' will not filter umlauts
+export LANG=
+
+get_place_data | get_place_inhabitants
+