fixed input encoding (by applying the announced encoding of the website)
This commit is contained in:
parent
356f4cf279
commit
fd653868ab
1 changed files with 2 additions and 1 deletions
|
@ -189,7 +189,6 @@ class EventParser(HTMLParser.HTMLParser, object):
|
|||
See "handle_charref" and "handle_entityref" below
|
||||
"""
|
||||
if self.in_attribute:
|
||||
data = data.decode(INPUT_ENCODING)
|
||||
event = self.events[-1]
|
||||
if not data.strip():
|
||||
# ignore empty strings
|
||||
|
@ -250,6 +249,8 @@ def read_from_url(url):
|
|||
except IOError, errmsg:
|
||||
sys.stderr.write("Failed to read from input (%s): %s\n" % (url, errmsg))
|
||||
sys.exit(2)
|
||||
encoding = con.headers.getparam('charset')
|
||||
data = data.decode(encoding)
|
||||
con.close()
|
||||
return data
|
||||
|
||||
|
|
Loading…
Reference in a new issue