fixed input encoding (by applying the announced encoding of the website)

This commit is contained in:
lars 2012-04-30 11:09:40 +00:00
parent 356f4cf279
commit fd653868ab

View file

@ -189,7 +189,6 @@ class EventParser(HTMLParser.HTMLParser, object):
See "handle_charref" and "handle_entityref" below
"""
if self.in_attribute:
data = data.decode(INPUT_ENCODING)
event = self.events[-1]
if not data.strip():
# ignore empty strings
@ -250,6 +249,8 @@ def read_from_url(url):
except IOError, errmsg:
sys.stderr.write("Failed to read from input (%s): %s\n" % (url, errmsg))
sys.exit(2)
encoding = con.headers.getparam('charset')
data = data.decode(encoding)
con.close()
return data