codekasten/rss_fetcher.py

#!/usr/bin/env python

import os, re, sys, string
try:
    import feedparser
except ImportError:
	sys.stderr.write("Could not load python module 'feedparser'!\n")
	sys.stderr.write("Maybe you should run 'apt-get install python-feedparser.\n")
	sys.exit(1)

class FeedGrepper:
    def work_on(self, url, titles):
        #entry_html = r'<li><p class="date">%(updated)s</p><a href="%(link)s">%(title)s</a>%(summary)s</li>'
        #entry_html = r'%(title)s %(updated)s \n %(link)s'
        #print "working on: " + url
        entry_html = r'%(title)s %(link)s'
        feed = feedparser.parse(url)
        if feed.status == 404:
            return 'The url: '+ url + ' is currently 404.'
        if feed["entries"]:
            html_items = [ entry_html % e for e in feed["entries"] ]
            #print '<html><body><div>' 
            #print '\n'.join(html_items)
            for item in html_items:
                for title in titles:
                    string.find(item.lower(), title)
                    if string.find(item.lower(), title) > 0:
                        print item
            #print '\n</div></body></html>'
            return 0
        else:
            return "No feed entries found in: " + url


if __name__ == "__main__":
    '''
    grep some titles from news feeds
    '''
    titles = [ "squirrelmail", "sqwebmail",  "apache", "moinmoin", 
                "trac", "ezmlm", "qmail", "nagios", 
		"joomla", "expose", "fireboard", "arcade", "smf", "simple machines",
		"wordpress", "typo", "drupal" ]

    urls = [ "http://www.milw0rm.com/rss.php",
                "http://www.securityfocus.com/rss/vulnerabilities.xml" ]

    for url in urls:
        FeedGrepper().work_on(url, titles)
2008-07-24 08:12:52 +02:00			`#!/usr/bin/env python`

			`import os, re, sys, string`
			`try:`
			`import feedparser`
			`except ImportError:`
			`sys.stderr.write("Could not load python module 'feedparser'!\n")`
			`sys.stderr.write("Maybe you should run 'apt-get install python-feedparser.\n")`
			`sys.exit(1)`

			`class FeedGrepper:`
			`def work_on(self, url, titles):`
			`#entry_html = r'<li><p class="date">%(updated)s</p><a href="%(link)s">%(title)s</a>%(summary)s</li>'`
			`#entry_html = r'%(title)s %(updated)s \n %(link)s'`
			`#print "working on: " + url`
			`entry_html = r'%(title)s %(link)s'`
			`feed = feedparser.parse(url)`
			`if feed.status == 404:`
			`return 'The url: '+ url + ' is currently 404.'`
			`if feed["entries"]:`
			`html_items = [ entry_html % e for e in feed["entries"] ]`
			`#print '<html><body><div>'`
			`#print '\n'.join(html_items)`
			`for item in html_items:`
			`for title in titles:`
			`string.find(item.lower(), title)`
			`if string.find(item.lower(), title) > 0:`
			`print item`
			`#print '\n</div></body></html>'`
			`return 0`
			`else:`
			`return "No feed entries found in: " + url`


			`if __name__ == "__main__":`
			`'''`
			`grep some titles from news feeds`
			`'''`
			`titles = [ "squirrelmail", "sqwebmail", "apache", "moinmoin",`
			`"trac", "ezmlm", "qmail", "nagios",`
			`"joomla", "expose", "fireboard", "arcade", "smf", "simple machines",`
			`"wordpress", "typo", "drupal" ]`

			`urls = [ "http://www.milw0rm.com/rss.php",`
			`"http://www.securityfocus.com/rss/vulnerabilities.xml" ]`

			`for url in urls:`
			`FeedGrepper().work_on(url, titles)`