codekasten/rss_fetcher.py
2008-07-24 06:12:52 +00:00

50 lines
1.7 KiB
Python
Executable file

#!/usr/bin/env python
import os, re, sys, string
try:
import feedparser
except ImportError:
sys.stderr.write("Could not load python module 'feedparser'!\n")
sys.stderr.write("Maybe you should run 'apt-get install python-feedparser.\n")
sys.exit(1)
class FeedGrepper:
def work_on(self, url, titles):
#entry_html = r'<li><p class="date">%(updated)s</p><a href="%(link)s">%(title)s</a>%(summary)s</li>'
#entry_html = r'%(title)s %(updated)s \n %(link)s'
#print "working on: " + url
entry_html = r'%(title)s %(link)s'
feed = feedparser.parse(url)
if feed.status == 404:
return 'The url: '+ url + ' is currently 404.'
if feed["entries"]:
html_items = [ entry_html % e for e in feed["entries"] ]
#print '<html><body><div>'
#print '\n'.join(html_items)
for item in html_items:
for title in titles:
string.find(item.lower(), title)
if string.find(item.lower(), title) > 0:
print item
#print '\n</div></body></html>'
return 0
else:
return "No feed entries found in: " + url
if __name__ == "__main__":
'''
grep some titles from news feeds
'''
titles = [ "squirrelmail", "sqwebmail", "apache", "moinmoin",
"trac", "ezmlm", "qmail", "nagios",
"joomla", "expose", "fireboard", "arcade", "smf", "simple machines",
"wordpress", "typo", "drupal" ]
urls = [ "http://www.milw0rm.com/rss.php",
"http://www.securityfocus.com/rss/vulnerabilities.xml" ]
for url in urls:
FeedGrepper().work_on(url, titles)