encoding issues fixed
ferwer warnings
This commit is contained in:
parent
cc83ebd691
commit
38b7268e27
1 changed files with 30 additions and 8 deletions
|
@ -32,10 +32,12 @@ import time
|
||||||
import re
|
import re
|
||||||
import locale
|
import locale
|
||||||
|
|
||||||
|
|
||||||
IMPORT_URL = "http://stadtgestalten.org/?q=termin-roh"
|
IMPORT_URL = "http://stadtgestalten.org/?q=termin-roh"
|
||||||
EXPORT_FILE = "events.gml"
|
EXPORT_FILE = "events.gml"
|
||||||
ICON_URL_PREFIX = "http://stadtgestalten.org/event_map/icons/png"
|
ICON_URL_PREFIX = "http://stadtgestalten.org/event_map/icons/png"
|
||||||
INPUT_ENCODING = 'utf-8'
|
INPUT_ENCODING = 'utf-8'
|
||||||
|
DATE_ENCODING = 'iso8859-15'
|
||||||
# number of additional events to be displayed for the same location
|
# number of additional events to be displayed for the same location
|
||||||
MAX_EXTRA_EVENTS = 3
|
MAX_EXTRA_EVENTS = 3
|
||||||
|
|
||||||
|
@ -102,6 +104,23 @@ TIME_OFFSET_HOURS = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def htmlentitydecode(s):
|
||||||
|
# convert html enitities to unicode (taken from: http://wiki.python.org/moin/EscapingHtml)
|
||||||
|
return re.sub('&(%s);' % '|'.join(htmlentitydefs.name2codepoint),
|
||||||
|
lambda m: unichr(htmlentitydefs.name2codepoint[m.group(1)]), s)
|
||||||
|
|
||||||
|
|
||||||
|
def htmlentityencode(s):
|
||||||
|
# convert unicode to html enitities (taken from: http://wiki.python.org/moin/EscapingHtml)
|
||||||
|
result = []
|
||||||
|
for char in s:
|
||||||
|
if ord(char) > 127:
|
||||||
|
result.append("&#%d;" % ord(char))
|
||||||
|
else:
|
||||||
|
result.append(char)
|
||||||
|
return u"".join(result)
|
||||||
|
|
||||||
|
|
||||||
class EventParser(HTMLParser.HTMLParser, object):
|
class EventParser(HTMLParser.HTMLParser, object):
|
||||||
""" This parser extracts the input from the the event page generated by drupal.
|
""" This parser extracts the input from the the event page generated by drupal.
|
||||||
BEWARE: the drupal page MUST be configured as a table layout with the above order
|
BEWARE: the drupal page MUST be configured as a table layout with the above order
|
||||||
|
@ -183,7 +202,7 @@ class EventParser(HTMLParser.HTMLParser, object):
|
||||||
pass
|
pass
|
||||||
elif self.current_attribute == COLUMNS['title']:
|
elif self.current_attribute == COLUMNS['title']:
|
||||||
# maybe the title is splitted by an ampersand entity
|
# maybe the title is splitted by an ampersand entity
|
||||||
event['title'] = event.get('title', '') + data
|
event['title'] = event.get('title', '') + htmlentityencode(data)
|
||||||
elif self.current_attribute == COLUMNS['time']:
|
elif self.current_attribute == COLUMNS['time']:
|
||||||
if event.has_key("time"):
|
if event.has_key("time"):
|
||||||
# the first "time" field is the start
|
# the first "time" field is the start
|
||||||
|
@ -204,13 +223,13 @@ class EventParser(HTMLParser.HTMLParser, object):
|
||||||
elif self.current_attribute == COLUMNS['category']:
|
elif self.current_attribute == COLUMNS['category']:
|
||||||
event['category'] = data.strip()
|
event['category'] = data.strip()
|
||||||
elif self.current_attribute == COLUMNS['place']:
|
elif self.current_attribute == COLUMNS['place']:
|
||||||
event['place'] = event.get('place', '') + data
|
event['place'] = event.get('place', '') + htmlentityencode(data)
|
||||||
elif self.current_attribute == COLUMNS['latitude']:
|
elif self.current_attribute == COLUMNS['latitude']:
|
||||||
event['latitude'] = data.strip()
|
event['latitude'] = data.strip()
|
||||||
elif self.current_attribute == COLUMNS['longitude']:
|
elif self.current_attribute == COLUMNS['longitude']:
|
||||||
event['longitude'] = data.strip()
|
event['longitude'] = data.strip()
|
||||||
elif self.current_attribute == COLUMNS['organizer']:
|
elif self.current_attribute == COLUMNS['organizer']:
|
||||||
event['organizer'] = event.get('organizer', '') + data
|
event['organizer'] = event.get('organizer', '') + htmlentityencode(data)
|
||||||
else:
|
else:
|
||||||
sys.stderr.write("UNKNOWN ATTRIBUTE: %d (%s)\n" % (self.current_attribute, data.encode(INPUT_ENCODING)))
|
sys.stderr.write("UNKNOWN ATTRIBUTE: %d (%s)\n" % (self.current_attribute, data.encode(INPUT_ENCODING)))
|
||||||
|
|
||||||
|
@ -249,7 +268,7 @@ def get_date_string(timestamp):
|
||||||
sys.stderr.write("Locales (%s) not found: %s\n" % (LOCALE, err_msg) \
|
sys.stderr.write("Locales (%s) not found: %s\n" % (LOCALE, err_msg) \
|
||||||
+ " Maybe you should run 'aptitude install locales-all' on debian.\n")
|
+ " Maybe you should run 'aptitude install locales-all' on debian.\n")
|
||||||
localtime = time.localtime(timestamp)
|
localtime = time.localtime(timestamp)
|
||||||
return time.strftime(locale.nl_langinfo(locale.D_T_FMT), localtime)
|
return htmlentityencode(time.strftime(locale.nl_langinfo(locale.D_T_FMT), localtime))
|
||||||
|
|
||||||
|
|
||||||
def html_escape(text):
|
def html_escape(text):
|
||||||
|
@ -277,8 +296,10 @@ def filtered_events(events):
|
||||||
if not event.has_key('title'):
|
if not event.has_key('title'):
|
||||||
sys.stderr.write("Skipping event without a title\n");
|
sys.stderr.write("Skipping event without a title\n");
|
||||||
elif not (event.has_key('longitude') and event.has_key('latitude')):
|
elif not (event.has_key('longitude') and event.has_key('latitude')):
|
||||||
sys.stderr.write(("Skipping event without long/lat: %(title)s\n" \
|
# no error messages -> too many mails sent ...
|
||||||
% event).encode(INPUT_ENCODING))
|
#sys.stderr.write(("Skipping event without long/lat: %(title)s\n" \
|
||||||
|
# % event).encode(INPUT_ENCODING))
|
||||||
|
pass
|
||||||
elif not event.has_key('time'):
|
elif not event.has_key('time'):
|
||||||
sys.stderr.write(("Skipping event without date/time: %(title)s\n" \
|
sys.stderr.write(("Skipping event without date/time: %(title)s\n" \
|
||||||
% event).encode(INPUT_ENCODING))
|
% event).encode(INPUT_ENCODING))
|
||||||
|
@ -360,7 +381,8 @@ def get_gml_from_data(data):
|
||||||
description += '<li style="list-style-image:url(%s)">' \
|
description += '<li style="list-style-image:url(%s)">' \
|
||||||
% get_icon_url(other_event["category"], None)
|
% get_icon_url(other_event["category"], None)
|
||||||
description += '%s: <a href="%s" title="Details" target="_blank">%s</a></li>' \
|
description += '%s: <a href="%s" title="Details" target="_blank">%s</a></li>' \
|
||||||
% (get_date_string(other_event["time"]), other_event["event_url"], html_escape(other_event["title"]))
|
% (html_escape(get_date_string(other_event["time"]).decode(DATE_ENCODING)),
|
||||||
|
other_event["event_url"], html_escape(other_event["title"]))
|
||||||
description += '</ul></li>'
|
description += '</ul></li>'
|
||||||
description += '</ul>'
|
description += '</ul>'
|
||||||
items.append(description)
|
items.append(description)
|
||||||
|
@ -371,7 +393,7 @@ def get_gml_from_data(data):
|
||||||
# size of the icon
|
# size of the icon
|
||||||
items.append('%d,%d' % (size, size))
|
items.append('%d,%d' % (size, size))
|
||||||
# offset of the middle of the icon
|
# offset of the middle of the icon
|
||||||
items.append('%d,%d' % (-size/2, -size/2))
|
items.append('%d,%d' % (0, -size))
|
||||||
# turn the array into a string
|
# turn the array into a string
|
||||||
return '\n'.join(['\t'.join(event) for event in result])
|
return '\n'.join(['\t'.join(event) for event in result])
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue