encoding issues fixed

ferwer warnings
This commit is contained in:
lars 2011-03-08 22:01:10 +00:00
parent cc83ebd691
commit 38b7268e27

View file

@ -32,10 +32,12 @@ import time
import re
import locale
IMPORT_URL = "http://stadtgestalten.org/?q=termin-roh"
EXPORT_FILE = "events.gml"
ICON_URL_PREFIX = "http://stadtgestalten.org/event_map/icons/png"
INPUT_ENCODING = 'utf-8'
DATE_ENCODING = 'iso8859-15'
# number of additional events to be displayed for the same location
MAX_EXTRA_EVENTS = 3
@ -102,6 +104,23 @@ TIME_OFFSET_HOURS = {
}
def htmlentitydecode(s):
# convert html enitities to unicode (taken from: http://wiki.python.org/moin/EscapingHtml)
return re.sub('&(%s);' % '|'.join(htmlentitydefs.name2codepoint),
lambda m: unichr(htmlentitydefs.name2codepoint[m.group(1)]), s)
def htmlentityencode(s):
# convert unicode to html enitities (taken from: http://wiki.python.org/moin/EscapingHtml)
result = []
for char in s:
if ord(char) > 127:
result.append("&#%d;" % ord(char))
else:
result.append(char)
return u"".join(result)
class EventParser(HTMLParser.HTMLParser, object):
""" This parser extracts the input from the the event page generated by drupal.
BEWARE: the drupal page MUST be configured as a table layout with the above order
@ -183,7 +202,7 @@ class EventParser(HTMLParser.HTMLParser, object):
pass
elif self.current_attribute == COLUMNS['title']:
# maybe the title is splitted by an ampersand entity
event['title'] = event.get('title', '') + data
event['title'] = event.get('title', '') + htmlentityencode(data)
elif self.current_attribute == COLUMNS['time']:
if event.has_key("time"):
# the first "time" field is the start
@ -204,13 +223,13 @@ class EventParser(HTMLParser.HTMLParser, object):
elif self.current_attribute == COLUMNS['category']:
event['category'] = data.strip()
elif self.current_attribute == COLUMNS['place']:
event['place'] = event.get('place', '') + data
event['place'] = event.get('place', '') + htmlentityencode(data)
elif self.current_attribute == COLUMNS['latitude']:
event['latitude'] = data.strip()
elif self.current_attribute == COLUMNS['longitude']:
event['longitude'] = data.strip()
elif self.current_attribute == COLUMNS['organizer']:
event['organizer'] = event.get('organizer', '') + data
event['organizer'] = event.get('organizer', '') + htmlentityencode(data)
else:
sys.stderr.write("UNKNOWN ATTRIBUTE: %d (%s)\n" % (self.current_attribute, data.encode(INPUT_ENCODING)))
@ -249,7 +268,7 @@ def get_date_string(timestamp):
sys.stderr.write("Locales (%s) not found: %s\n" % (LOCALE, err_msg) \
+ " Maybe you should run 'aptitude install locales-all' on debian.\n")
localtime = time.localtime(timestamp)
return time.strftime(locale.nl_langinfo(locale.D_T_FMT), localtime)
return htmlentityencode(time.strftime(locale.nl_langinfo(locale.D_T_FMT), localtime))
def html_escape(text):
@ -277,8 +296,10 @@ def filtered_events(events):
if not event.has_key('title'):
sys.stderr.write("Skipping event without a title\n");
elif not (event.has_key('longitude') and event.has_key('latitude')):
sys.stderr.write(("Skipping event without long/lat: %(title)s\n" \
% event).encode(INPUT_ENCODING))
# no error messages -> too many mails sent ...
#sys.stderr.write(("Skipping event without long/lat: %(title)s\n" \
# % event).encode(INPUT_ENCODING))
pass
elif not event.has_key('time'):
sys.stderr.write(("Skipping event without date/time: %(title)s\n" \
% event).encode(INPUT_ENCODING))
@ -360,7 +381,8 @@ def get_gml_from_data(data):
description += '<li style="list-style-image:url(%s)">' \
% get_icon_url(other_event["category"], None)
description += '%s: <a href="%s" title="Details" target="_blank">%s</a></li>' \
% (get_date_string(other_event["time"]), other_event["event_url"], html_escape(other_event["title"]))
% (html_escape(get_date_string(other_event["time"]).decode(DATE_ENCODING)),
other_event["event_url"], html_escape(other_event["title"]))
description += '</ul></li>'
description += '</ul>'
items.append(description)
@ -371,7 +393,7 @@ def get_gml_from_data(data):
# size of the icon
items.append('%d,%d' % (size, size))
# offset of the middle of the icon
items.append('%d,%d' % (-size/2, -size/2))
items.append('%d,%d' % (0, -size))
# turn the array into a string
return '\n'.join(['\t'.join(event) for event in result])