From 683941a8421c71f9dc18f9f7c5b61c93b9d13ae8 Mon Sep 17 00:00:00 2001 From: lars Date: Sat, 31 Mar 2012 13:54:36 +0000 Subject: [PATCH] improved link parser (works even without "http://") reject submissions with suspicious usernames or content --- wortschlucker/src/wortschlucker.py | 37 +++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/wortschlucker/src/wortschlucker.py b/wortschlucker/src/wortschlucker.py index 58f565c..25f5e14 100755 --- a/wortschlucker/src/wortschlucker.py +++ b/wortschlucker/src/wortschlucker.py @@ -296,8 +296,17 @@ def get_markup_with_links(text): # only take the TLD part of the url short_name = url.split("/")[2] return """%s%s%s""" % (prefix, url, short_name, suffix) + def expand_protocol(match): + prefix, url, suffix = match.groups() + if url.lower().startswith("www") or suffix.startswith("/"): + # just prepend "http://" + return prefix + "http://" + url + suffix + else: + return prefix + url + suffix # surround all urls with html markup text = genshi.escape(text) + text = re.sub(r"(\A|\s|\()([a-zA-Z_\-\.]+\.[a-zA-Z]{2,4})(/|\)|\s|\Z)", + expand_protocol, text) text = re.sub(r"(\A|\s|\()(https?://[\w/\?\.\#=;,_\-\~&]*)(\)|\s|\Z)", get_link_markup, text) return get_markup_with_formatted_linebreaks(text, "
") @@ -527,6 +536,16 @@ def publish_twitter_alert(text, key, secret,access_key,access_secret): except urllib2.URLError, e: print e.reason +def check_spam_submitter_name(name): + lower_text = re.sub("[^a-z]", "", name) + upper_text = re.sub("[^A-Z]", "", name) + return (len(lower_text) + len(upper_text) == len(name)) and \ + (len(lower_text) > 3) and (len(upper_text) > 3) and \ + (len(name) >= 8) and (not name.startswith(upper_text)) + +def check_spam_content(text): + return bool(re.search(r"()", text.lower())) + @bobo.query('/profile/logout') def user_logout(bobo_request): # default start page @@ -705,18 +724,30 @@ def new_poll(bobo_request, submit=None, cancel=None, author=None, title=None, @bobo.query('/:hash_key/submit') def submit_content(bobo_request, hash_key=None, submitter=None, content=None): value_dict = get_default_values(bobo_request) - data = {"submitter": submitter, "content": content} + value_dict["errors"] = {} + data = {} + if content and check_spam_content(content): + value_dict["errors"]["content"] = \ + "Spam-Verdacht: Inhalt darf keine HTML-Tags enthalten" + else: + data["content"] = content + if submitter and check_spam_submitter_name(submitter): + value_dict["errors"]["submitter"] = \ + "Spam-Verdacht: bitte den Namen korrigieren" + else: + data["submitter"] = submitter poll_id = get_poll_id(hash_key) if not poll_id is None: poll = Poll.get(poll_id) value_dict["poll"] = poll - errors = {} try: data = forms.SubmitForm.to_python(data) except formencode.Invalid, errors_packed: + # merge errors with previous ones - but never overwrite existing ones errors = errors_packed.unpack_errors() - if errors or poll.is_closed(): + errors.update(value_dict["errors"]) value_dict["errors"] = errors + if value_dict["errors"] or poll.is_closed(): return render("poll_details.html", input_data=data, **value_dict) else: # create the new submission content