encoding issues fixed

ferwer warnings
This commit is contained in:
lars 2011-03-08 22:01:10 +00:00
parent cc83ebd691
commit 38b7268e27

View file

@ -32,10 +32,12 @@ import time
import re import re
import locale import locale
IMPORT_URL = "http://stadtgestalten.org/?q=termin-roh" IMPORT_URL = "http://stadtgestalten.org/?q=termin-roh"
EXPORT_FILE = "events.gml" EXPORT_FILE = "events.gml"
ICON_URL_PREFIX = "http://stadtgestalten.org/event_map/icons/png" ICON_URL_PREFIX = "http://stadtgestalten.org/event_map/icons/png"
INPUT_ENCODING = 'utf-8' INPUT_ENCODING = 'utf-8'
DATE_ENCODING = 'iso8859-15'
# number of additional events to be displayed for the same location # number of additional events to be displayed for the same location
MAX_EXTRA_EVENTS = 3 MAX_EXTRA_EVENTS = 3
@ -102,6 +104,23 @@ TIME_OFFSET_HOURS = {
} }
def htmlentitydecode(s):
# convert html enitities to unicode (taken from: http://wiki.python.org/moin/EscapingHtml)
return re.sub('&(%s);' % '|'.join(htmlentitydefs.name2codepoint),
lambda m: unichr(htmlentitydefs.name2codepoint[m.group(1)]), s)
def htmlentityencode(s):
# convert unicode to html enitities (taken from: http://wiki.python.org/moin/EscapingHtml)
result = []
for char in s:
if ord(char) > 127:
result.append("&#%d;" % ord(char))
else:
result.append(char)
return u"".join(result)
class EventParser(HTMLParser.HTMLParser, object): class EventParser(HTMLParser.HTMLParser, object):
""" This parser extracts the input from the the event page generated by drupal. """ This parser extracts the input from the the event page generated by drupal.
BEWARE: the drupal page MUST be configured as a table layout with the above order BEWARE: the drupal page MUST be configured as a table layout with the above order
@ -183,7 +202,7 @@ class EventParser(HTMLParser.HTMLParser, object):
pass pass
elif self.current_attribute == COLUMNS['title']: elif self.current_attribute == COLUMNS['title']:
# maybe the title is splitted by an ampersand entity # maybe the title is splitted by an ampersand entity
event['title'] = event.get('title', '') + data event['title'] = event.get('title', '') + htmlentityencode(data)
elif self.current_attribute == COLUMNS['time']: elif self.current_attribute == COLUMNS['time']:
if event.has_key("time"): if event.has_key("time"):
# the first "time" field is the start # the first "time" field is the start
@ -204,13 +223,13 @@ class EventParser(HTMLParser.HTMLParser, object):
elif self.current_attribute == COLUMNS['category']: elif self.current_attribute == COLUMNS['category']:
event['category'] = data.strip() event['category'] = data.strip()
elif self.current_attribute == COLUMNS['place']: elif self.current_attribute == COLUMNS['place']:
event['place'] = event.get('place', '') + data event['place'] = event.get('place', '') + htmlentityencode(data)
elif self.current_attribute == COLUMNS['latitude']: elif self.current_attribute == COLUMNS['latitude']:
event['latitude'] = data.strip() event['latitude'] = data.strip()
elif self.current_attribute == COLUMNS['longitude']: elif self.current_attribute == COLUMNS['longitude']:
event['longitude'] = data.strip() event['longitude'] = data.strip()
elif self.current_attribute == COLUMNS['organizer']: elif self.current_attribute == COLUMNS['organizer']:
event['organizer'] = event.get('organizer', '') + data event['organizer'] = event.get('organizer', '') + htmlentityencode(data)
else: else:
sys.stderr.write("UNKNOWN ATTRIBUTE: %d (%s)\n" % (self.current_attribute, data.encode(INPUT_ENCODING))) sys.stderr.write("UNKNOWN ATTRIBUTE: %d (%s)\n" % (self.current_attribute, data.encode(INPUT_ENCODING)))
@ -249,7 +268,7 @@ def get_date_string(timestamp):
sys.stderr.write("Locales (%s) not found: %s\n" % (LOCALE, err_msg) \ sys.stderr.write("Locales (%s) not found: %s\n" % (LOCALE, err_msg) \
+ " Maybe you should run 'aptitude install locales-all' on debian.\n") + " Maybe you should run 'aptitude install locales-all' on debian.\n")
localtime = time.localtime(timestamp) localtime = time.localtime(timestamp)
return time.strftime(locale.nl_langinfo(locale.D_T_FMT), localtime) return htmlentityencode(time.strftime(locale.nl_langinfo(locale.D_T_FMT), localtime))
def html_escape(text): def html_escape(text):
@ -277,8 +296,10 @@ def filtered_events(events):
if not event.has_key('title'): if not event.has_key('title'):
sys.stderr.write("Skipping event without a title\n"); sys.stderr.write("Skipping event without a title\n");
elif not (event.has_key('longitude') and event.has_key('latitude')): elif not (event.has_key('longitude') and event.has_key('latitude')):
sys.stderr.write(("Skipping event without long/lat: %(title)s\n" \ # no error messages -> too many mails sent ...
% event).encode(INPUT_ENCODING)) #sys.stderr.write(("Skipping event without long/lat: %(title)s\n" \
# % event).encode(INPUT_ENCODING))
pass
elif not event.has_key('time'): elif not event.has_key('time'):
sys.stderr.write(("Skipping event without date/time: %(title)s\n" \ sys.stderr.write(("Skipping event without date/time: %(title)s\n" \
% event).encode(INPUT_ENCODING)) % event).encode(INPUT_ENCODING))
@ -360,7 +381,8 @@ def get_gml_from_data(data):
description += '<li style="list-style-image:url(%s)">' \ description += '<li style="list-style-image:url(%s)">' \
% get_icon_url(other_event["category"], None) % get_icon_url(other_event["category"], None)
description += '%s: <a href="%s" title="Details" target="_blank">%s</a></li>' \ description += '%s: <a href="%s" title="Details" target="_blank">%s</a></li>' \
% (get_date_string(other_event["time"]), other_event["event_url"], html_escape(other_event["title"])) % (html_escape(get_date_string(other_event["time"]).decode(DATE_ENCODING)),
other_event["event_url"], html_escape(other_event["title"]))
description += '</ul></li>' description += '</ul></li>'
description += '</ul>' description += '</ul>'
items.append(description) items.append(description)
@ -371,7 +393,7 @@ def get_gml_from_data(data):
# size of the icon # size of the icon
items.append('%d,%d' % (size, size)) items.append('%d,%d' % (size, size))
# offset of the middle of the icon # offset of the middle of the icon
items.append('%d,%d' % (-size/2, -size/2)) items.append('%d,%d' % (0, -size))
# turn the array into a string # turn the array into a string
return '\n'.join(['\t'.join(event) for event in result]) return '\n'.join(['\t'.join(event) for event in result])