encoding issues fixed
ferwer warnings
This commit is contained in:
parent
cc83ebd691
commit
38b7268e27
1 changed files with 30 additions and 8 deletions
|
@ -32,10 +32,12 @@ import time
|
|||
import re
|
||||
import locale
|
||||
|
||||
|
||||
IMPORT_URL = "http://stadtgestalten.org/?q=termin-roh"
|
||||
EXPORT_FILE = "events.gml"
|
||||
ICON_URL_PREFIX = "http://stadtgestalten.org/event_map/icons/png"
|
||||
INPUT_ENCODING = 'utf-8'
|
||||
DATE_ENCODING = 'iso8859-15'
|
||||
# number of additional events to be displayed for the same location
|
||||
MAX_EXTRA_EVENTS = 3
|
||||
|
||||
|
@ -102,6 +104,23 @@ TIME_OFFSET_HOURS = {
|
|||
}
|
||||
|
||||
|
||||
def htmlentitydecode(s):
|
||||
# convert html enitities to unicode (taken from: http://wiki.python.org/moin/EscapingHtml)
|
||||
return re.sub('&(%s);' % '|'.join(htmlentitydefs.name2codepoint),
|
||||
lambda m: unichr(htmlentitydefs.name2codepoint[m.group(1)]), s)
|
||||
|
||||
|
||||
def htmlentityencode(s):
|
||||
# convert unicode to html enitities (taken from: http://wiki.python.org/moin/EscapingHtml)
|
||||
result = []
|
||||
for char in s:
|
||||
if ord(char) > 127:
|
||||
result.append("&#%d;" % ord(char))
|
||||
else:
|
||||
result.append(char)
|
||||
return u"".join(result)
|
||||
|
||||
|
||||
class EventParser(HTMLParser.HTMLParser, object):
|
||||
""" This parser extracts the input from the the event page generated by drupal.
|
||||
BEWARE: the drupal page MUST be configured as a table layout with the above order
|
||||
|
@ -183,7 +202,7 @@ class EventParser(HTMLParser.HTMLParser, object):
|
|||
pass
|
||||
elif self.current_attribute == COLUMNS['title']:
|
||||
# maybe the title is splitted by an ampersand entity
|
||||
event['title'] = event.get('title', '') + data
|
||||
event['title'] = event.get('title', '') + htmlentityencode(data)
|
||||
elif self.current_attribute == COLUMNS['time']:
|
||||
if event.has_key("time"):
|
||||
# the first "time" field is the start
|
||||
|
@ -204,13 +223,13 @@ class EventParser(HTMLParser.HTMLParser, object):
|
|||
elif self.current_attribute == COLUMNS['category']:
|
||||
event['category'] = data.strip()
|
||||
elif self.current_attribute == COLUMNS['place']:
|
||||
event['place'] = event.get('place', '') + data
|
||||
event['place'] = event.get('place', '') + htmlentityencode(data)
|
||||
elif self.current_attribute == COLUMNS['latitude']:
|
||||
event['latitude'] = data.strip()
|
||||
elif self.current_attribute == COLUMNS['longitude']:
|
||||
event['longitude'] = data.strip()
|
||||
elif self.current_attribute == COLUMNS['organizer']:
|
||||
event['organizer'] = event.get('organizer', '') + data
|
||||
event['organizer'] = event.get('organizer', '') + htmlentityencode(data)
|
||||
else:
|
||||
sys.stderr.write("UNKNOWN ATTRIBUTE: %d (%s)\n" % (self.current_attribute, data.encode(INPUT_ENCODING)))
|
||||
|
||||
|
@ -249,7 +268,7 @@ def get_date_string(timestamp):
|
|||
sys.stderr.write("Locales (%s) not found: %s\n" % (LOCALE, err_msg) \
|
||||
+ " Maybe you should run 'aptitude install locales-all' on debian.\n")
|
||||
localtime = time.localtime(timestamp)
|
||||
return time.strftime(locale.nl_langinfo(locale.D_T_FMT), localtime)
|
||||
return htmlentityencode(time.strftime(locale.nl_langinfo(locale.D_T_FMT), localtime))
|
||||
|
||||
|
||||
def html_escape(text):
|
||||
|
@ -277,8 +296,10 @@ def filtered_events(events):
|
|||
if not event.has_key('title'):
|
||||
sys.stderr.write("Skipping event without a title\n");
|
||||
elif not (event.has_key('longitude') and event.has_key('latitude')):
|
||||
sys.stderr.write(("Skipping event without long/lat: %(title)s\n" \
|
||||
% event).encode(INPUT_ENCODING))
|
||||
# no error messages -> too many mails sent ...
|
||||
#sys.stderr.write(("Skipping event without long/lat: %(title)s\n" \
|
||||
# % event).encode(INPUT_ENCODING))
|
||||
pass
|
||||
elif not event.has_key('time'):
|
||||
sys.stderr.write(("Skipping event without date/time: %(title)s\n" \
|
||||
% event).encode(INPUT_ENCODING))
|
||||
|
@ -360,7 +381,8 @@ def get_gml_from_data(data):
|
|||
description += '<li style="list-style-image:url(%s)">' \
|
||||
% get_icon_url(other_event["category"], None)
|
||||
description += '%s: <a href="%s" title="Details" target="_blank">%s</a></li>' \
|
||||
% (get_date_string(other_event["time"]), other_event["event_url"], html_escape(other_event["title"]))
|
||||
% (html_escape(get_date_string(other_event["time"]).decode(DATE_ENCODING)),
|
||||
other_event["event_url"], html_escape(other_event["title"]))
|
||||
description += '</ul></li>'
|
||||
description += '</ul>'
|
||||
items.append(description)
|
||||
|
@ -371,7 +393,7 @@ def get_gml_from_data(data):
|
|||
# size of the icon
|
||||
items.append('%d,%d' % (size, size))
|
||||
# offset of the middle of the icon
|
||||
items.append('%d,%d' % (-size/2, -size/2))
|
||||
items.append('%d,%d' % (0, -size))
|
||||
# turn the array into a string
|
||||
return '\n'.join(['\t'.join(event) for event in result])
|
||||
|
||||
|
|
Loading…
Reference in a new issue