fixed input encoding (by applying the announced encoding of the website)
This commit is contained in:
parent
356f4cf279
commit
fd653868ab
1 changed files with 2 additions and 1 deletions
|
@ -189,7 +189,6 @@ class EventParser(HTMLParser.HTMLParser, object):
|
||||||
See "handle_charref" and "handle_entityref" below
|
See "handle_charref" and "handle_entityref" below
|
||||||
"""
|
"""
|
||||||
if self.in_attribute:
|
if self.in_attribute:
|
||||||
data = data.decode(INPUT_ENCODING)
|
|
||||||
event = self.events[-1]
|
event = self.events[-1]
|
||||||
if not data.strip():
|
if not data.strip():
|
||||||
# ignore empty strings
|
# ignore empty strings
|
||||||
|
@ -250,6 +249,8 @@ def read_from_url(url):
|
||||||
except IOError, errmsg:
|
except IOError, errmsg:
|
||||||
sys.stderr.write("Failed to read from input (%s): %s\n" % (url, errmsg))
|
sys.stderr.write("Failed to read from input (%s): %s\n" % (url, errmsg))
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
encoding = con.headers.getparam('charset')
|
||||||
|
data = data.decode(encoding)
|
||||||
con.close()
|
con.close()
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue