improved link parser (works even without "http://")

reject submissions with suspicious usernames or content
This commit is contained in:
lars 2012-03-31 13:54:36 +00:00
parent 944b6b781c
commit 683941a842

View file

@ -296,8 +296,17 @@ def get_markup_with_links(text):
# only take the TLD part of the url # only take the TLD part of the url
short_name = url.split("/")[2] short_name = url.split("/")[2]
return """%s<a href="%s" rel="nofollow">%s</a>%s""" % (prefix, url, short_name, suffix) return """%s<a href="%s" rel="nofollow">%s</a>%s""" % (prefix, url, short_name, suffix)
def expand_protocol(match):
prefix, url, suffix = match.groups()
if url.lower().startswith("www") or suffix.startswith("/"):
# just prepend "http://"
return prefix + "http://" + url + suffix
else:
return prefix + url + suffix
# surround all urls with html markup # surround all urls with html markup
text = genshi.escape(text) text = genshi.escape(text)
text = re.sub(r"(\A|\s|\()([a-zA-Z_\-\.]+\.[a-zA-Z]{2,4})(/|\)|\s|\Z)",
expand_protocol, text)
text = re.sub(r"(\A|\s|\()(https?://[\w/\?\.\#=;,_\-\~&]*)(\)|\s|\Z)", text = re.sub(r"(\A|\s|\()(https?://[\w/\?\.\#=;,_\-\~&]*)(\)|\s|\Z)",
get_link_markup, text) get_link_markup, text)
return get_markup_with_formatted_linebreaks(text, "<br />") return get_markup_with_formatted_linebreaks(text, "<br />")
@ -527,6 +536,16 @@ def publish_twitter_alert(text, key, secret,access_key,access_secret):
except urllib2.URLError, e: except urllib2.URLError, e:
print e.reason print e.reason
def check_spam_submitter_name(name):
lower_text = re.sub("[^a-z]", "", name)
upper_text = re.sub("[^A-Z]", "", name)
return (len(lower_text) + len(upper_text) == len(name)) and \
(len(lower_text) > 3) and (len(upper_text) > 3) and \
(len(name) >= 8) and (not name.startswith(upper_text))
def check_spam_content(text):
return bool(re.search(r"(<a\s|\shref=|</a>)", text.lower()))
@bobo.query('/profile/logout') @bobo.query('/profile/logout')
def user_logout(bobo_request): def user_logout(bobo_request):
# default start page # default start page
@ -705,18 +724,30 @@ def new_poll(bobo_request, submit=None, cancel=None, author=None, title=None,
@bobo.query('/:hash_key/submit') @bobo.query('/:hash_key/submit')
def submit_content(bobo_request, hash_key=None, submitter=None, content=None): def submit_content(bobo_request, hash_key=None, submitter=None, content=None):
value_dict = get_default_values(bobo_request) value_dict = get_default_values(bobo_request)
data = {"submitter": submitter, "content": content} value_dict["errors"] = {}
data = {}
if content and check_spam_content(content):
value_dict["errors"]["content"] = \
"Spam-Verdacht: Inhalt darf keine HTML-Tags enthalten"
else:
data["content"] = content
if submitter and check_spam_submitter_name(submitter):
value_dict["errors"]["submitter"] = \
"Spam-Verdacht: bitte den Namen korrigieren"
else:
data["submitter"] = submitter
poll_id = get_poll_id(hash_key) poll_id = get_poll_id(hash_key)
if not poll_id is None: if not poll_id is None:
poll = Poll.get(poll_id) poll = Poll.get(poll_id)
value_dict["poll"] = poll value_dict["poll"] = poll
errors = {}
try: try:
data = forms.SubmitForm.to_python(data) data = forms.SubmitForm.to_python(data)
except formencode.Invalid, errors_packed: except formencode.Invalid, errors_packed:
# merge errors with previous ones - but never overwrite existing ones
errors = errors_packed.unpack_errors() errors = errors_packed.unpack_errors()
if errors or poll.is_closed(): errors.update(value_dict["errors"])
value_dict["errors"] = errors value_dict["errors"] = errors
if value_dict["errors"] or poll.is_closed():
return render("poll_details.html", input_data=data, **value_dict) return render("poll_details.html", input_data=data, **value_dict)
else: else:
# create the new submission content # create the new submission content