FilmBar - the first usable version of a filmlist conversion/management tool

This commit is contained in:
age 2004-08-06 20:39:01 +00:00
parent 8296087a89
commit 1c58d8cc05
10 changed files with 2025 additions and 0 deletions

7
CHANGELOG Normal file
View file

@ -0,0 +1,7 @@
02004/08/06:
- started commenting
02004/07:
- the beat starts here 8]
- several unusable standalone methods
(html2xml conversion, xml-dom, gui)

39
FilmBar.py Executable file
View file

@ -0,0 +1,39 @@
#!/usr/bin/env python
__version__="dev0.3"
__author__="AGE"
__date__="02004-08-06"
class FilmBar:
import sys
def __init__(self):
try:
import pygtk
pygtk.require("2.0")
import gtk
from gtk import glade
except:
print "dumme fehlermeldung"
sys.exit(1)
import filmGUI
fg = filmGUI.FilmGUI(gtk)
gtk.mainloop()
if __name__ == "__main__":
### defaults
# wo ist die Liste vom Joerch?
htmlfilmlist = "file:///home/age/divx.html"
# in welche xml Datei sollen die Filme?
xmlfilmlist = "myfilms.xml"
fb = FilmBar()
## testing ##
### 1. ###
### i've done this once - think it's enough ;)
# fb.convertJoerchs2XML(htmlfilmlist, xmlfilmlist)
### 2. ###
#fb.importXML(xmlfilmlist)

15
TODO Normal file
View file

@ -0,0 +1,15 @@
TODO for FilmBar (02004/08)
[!] 'exit' functionality
[ ] export to html
[.] improve usability
[ ] improve/expand html reading (see: myFilms.saveXMLfromJoerchs)
[*] external .config file
[*] import joerchs html
---
meaning:
[ ] not done
[.] working on
[*] done
[!] fix me

1186
fba.glade Normal file

File diff suppressed because it is too large Load diff

8
fba.gladep Normal file
View file

@ -0,0 +1,8 @@
<?xml version="1.0" standalone="no"?> <!--*- mode: xml -*-->
<!DOCTYPE glade-project SYSTEM "http://glade.gnome.org/glade-project-2.0.dtd">
<glade-project>
<name></name>
<program_name></program_name>
<gnome_support>FALSE</gnome_support>
</glade-project>

133
filmGUI.py Normal file
View file

@ -0,0 +1,133 @@
#/usr/bin/env python
__version__="dev0.3"
__author__="AGE"
__date__="02004-08-06"
import sys
import time
import myFilms
class FilmGUI:
"""
The frontend for the FilmPseudoDB
"""
def __init__(self, gtk):
self.log = time.asctime() +" the beat starts here..\n"
# read config file
self.readConfig()
# gtk is given, it has to be known in the whole class
self.gtk = gtk
# define which gladefile to use for frontend
# uncomment if you want to use the one from the configfile
self.gladefile = "fba.glade"
self.filmgui = self.gtk.glade.XML(self.gladefile)
# set button handlers
actions = { "on_bu_insertfilm_clicked": self.clicked_insertfilm,
"on_bu_apply_files_clicked": self.clicked_apply_files,
"on_bu_read_xml_clicked": self.clicked_read_xml,
"on_bu_read_joerchs_clicked": self.clicked_read_joerchs,
#"on_bu_cancel_clicked": (self.gtk.mainquit),
#"on_bu_cancel_clicked": self.exit,
"on_window1_destroy_event": self.exit
}
# connect the actions to the events
self.filmgui.signal_autoconnect (actions)
self.filmgui.get_widget("entry6").set_text(self.htmlfilmlist)
self.filmgui.get_widget("entry7").set_text(self.xmlfilmlistin)
self.filmgui.get_widget("entry8").set_text(self.xmlfilmlistout)
self.filmgui.get_widget("entry10").set_text(self.htmlfilmlist)
self.filmgui.get_widget("entry11").set_text(self.xmlfilmlistin)
self.textview = self.filmgui.get_widget("textview1")
self.textbuffer = self.textview.get_buffer()
self.textbuffer.set_text(self.log)
self.log += "\n"+time.strftime('%H:%M:%S')
self.log += "\n\t ___program completely started___\n"
self.textbuffer.set_text(self.log)
self.textview2 = self.filmgui.get_widget("textview2")
self.textbuffer2= self.textview2.get_buffer()
self.mf = myFilms.MyFilms()
return
def readConfig(self):
"""
Get the Configuration from standard config file (see: Preferences)
"""
import preferences
myPrefs = preferences.Preferences()
self.gladefile = myPrefs["gladefile"]
self.htmlfilmlist = myPrefs["htmlfilmlist"]
self.xmlfilmlistin = myPrefs["xmlfilmlistin"]
self.xmlfilmlistout = myPrefs["xmlfilmlistout"]
self.log += "\n"+time.strftime('%H:%M:%S')
self.log += " read config file\n"
def clicked_insertfilm(self, widget):
title = self.filmgui.get_widget("entry1").get_text()
lang = self.filmgui.get_widget("entry2").get_text()
codec = self.filmgui.get_widget("entry3").get_text()
cd = self.filmgui.get_widget("entry4").get_text()
comment = self.filmgui.get_widget("entry5").get_text()
datalist = [title,lang,codec,cd,comment]
self.mf.addFilm(datalist)
self.mf.saveToXMLFile()
self.log += "\n"+time.strftime('%H:%M:%S')
self.log += " insert film"
self.log += "\n\t\""+title+"\" to: " +self.xmlfilmlistout
self.textbuffer.set_text(self.log)
self.filmgui.get_widget("entry1").set_text("")
#self.filmgui.get_widget("entry2").set_text("")
#self.filmgui.get_widget("entry3").set_text("")
#self.filmgui.get_widget("entry4").set_text("")
#self.filmgui.get_widget("entry5").set_text("")
self.textbuffer.set_text(self.log)
def clicked_apply_files(self, widget):
self.htmlfilmlist = self.filmgui.get_widget("entry6").get_text()
self.xmlfilmlistin = self.filmgui.get_widget("entry7").get_text()
self.xmlfilmlistout = self.filmgui.get_widget("entry8").get_text()
#self.mf = myFilms.MyFilms(self.xmlfilmlistin)
self.mf.setXMLFile(self.xmlfilmlistout)
self.mf.fillfromXML()
# timeformet likewise 15:47:12
self.log += "\n"+time.strftime('%H:%M:%S')
self.log += " set files"
self.log += "\n\tjoerch import:\t"+self.htmlfilmlist
self.log += "\n\txml import:\t"+self.xmlfilmlistin
self.log += "\n\txml export:\t"+self.xmlfilmlistout
self.log += "\n"
self.textbuffer.set_text(self.log)
def clicked_read_xml(self, widget):
xmlstuff = self.xmlfilmlistout + " \n===============================\n"
xmlstuff += open(self.xmlfilmlistout).read()
self.textbuffer2.set_text(xmlstuff)
def clicked_read_joerchs(self, widget):
# debugmode abchecken
cb2 = self.filmgui.get_widget("checkbutton2")
self.mf.setXMLFile(self.xmlfilmlistout)
debugout = self.mf.saveXMLfromJoerchs(self.htmlfilmlist, int(cb2.get_active()))
self.log += "\n"+time.strftime('%H:%M:%S')
self.log += " joerchs"
self.log += "\n\tread html: "+self.htmlfilmlist
self.log += "\n\tsaved xml: "+self.xmlfilmlistout
self.log += "\n\t"+debugout
self.textbuffer.set_text(self.log)
def exit(self, widget):
print "geh kacken.."
self.gtk.mainquit
if __name__ == "__main__":
'''
xmlfilmlist = "myFilms.xml"
import myFilms
mf = myFilms.MyFilms(xmlfilmlist)
mf.addFilm()
mf.setXMLFile("fee")
mf.saveToXMLFile()
'''

279
filmListXML.py Executable file
View file

@ -0,0 +1,279 @@
#!/usr/bin/env python
__version__="dev0.3"
__author__="AGE"
__date__="02004-08-06"
#is nicht schoen aber laut python developers genehm
#(import ausserhalb der klasse)
import xml.dom.minidom
# minidom ist eine kleine, einfache dom implementierung
# dom ist quasi eine baumdarstellung fuer xml inhalte
# reingehende strings escapen..
#from xml.sax.saxutils import escape
class FilmListXML:
"""
This class handles every xml operation for films.
It does just dumb reading and writing but no real test on the given
data.
@author AGE
"""
# in xmlblock wird die gesamte xml datei gehalten
xmlblock = ""
listname = "foomakilla"
def __init__(self):
self.cleanUp()
def cleanUp(self):
# data nimmt jeweils die daten eines films auf (als dictionary)
self.data = {}
#self.id = 0
def saveXMLList(self, filename):
"""
Write whole 'xmlblock' into a file.
@param String filename : name for file in which to save xml data
"""
xmlfile = open(filename,"w")
xmlfile.write(self.xmlblock)
xmlfile.close()
def begXMLList(self):
"""
Start 'xmlblock' from blank with a small xml-header.
"""
self.xmlblock = '<?xml version="1.0"?>\n'
self.xmlblock += '<all>\n\t<name>'+self.listname+'</name>\n'
def finXMLList(self):
"""
Add xml-footer to 'xmlblock'
"""
self.xmlblock += '</all>'
def setSingleXMLData(self, newfilm):
self.cleanUp()
self.data = newfilm
def setAllXMLData(self, filmlist):
for film in filmlist:
self.setSingleXMLData(film)
self.addToXMLList()
def readXMLFile(self, filename):
"""
Read a xml file and overwrite 'xmlblock'!
@param String filename : name of the file from which to read the xml data
"""
xmlfile = open(filename,"r")
self.xmlblock = xmlfile.read()
xmlfile.close()
def addToXMLList(self):
"""
Creates XML tags from given "data" (self.data), but does _not_ save them!
result looks like:
<film id="foo">
pass
<!-- blabla -->
<title>bar</title>
</film>
@return None
"""
# mensch beachte %(id)s, das geht gut mit dicts
self.xmlblock += '\t<film id="%(id)s">\n' % self.data
entrylist = self.data.keys()
entrylist.remove("id") #we don't need it anymore
# Kommentare werden zuerst behandelt
if self.data.has_key('comment'):
self.xmlblock += '\t\t<!--'
# Leerzeichen von anfang&ende entfernen
temp = str(self.data.get("comment")).strip()
if len(temp) == 0:
# DOM mag es nicht, wenn in den Tags gar nichts steht, deswegen ein 'blank'
self.xmlblock += ' '
else:
self.xmlblock += ' %s ' % temp
self.xmlblock += '-->\n'
entrylist.remove("comment") #we don't need it anymore
# alle anderen Tags durchrattern
for entry in entrylist:
#self.xmlblock += '\t\t<%s>%s</%s>\n' % (entry, self.data.get(entry), entry)
temp = str(self.data.get(entry)).strip()
if len(temp) == 0:
self.xmlblock += '\t\t<%s> </%s>\n' % (entry, entry)
else:
self.xmlblock += '\t\t<%s>%s</%s>\n' % (entry, temp, entry)
self.xmlblock += '\t</film>\n'
# aufaeumen und Stuehle hochstellen
#self.cleanUp()
def readInXML(self, mf, filename, printer=0):
"""
Parse a given xml file and check for a special structure.
The data ist written to myFilms
This is not a xml parser at all for any other xml structur.
You can fool this bitch like nothing else.. better be careful!
@param MyFilms mf : Object to fill with read films
@param String filename : Name for the xmlfile to parse to
@param Int printer : Switch for printing usefuls development messages; 0-off
"""
# dafuer sorgen, dass Datei in die Variable xmlblock eingelesen wird
self.data
if filename:
if printer > 0:
print "reading from: "+filename
self.readXMLFile(filename)
# jetzt kommt richtiger xml kram..
# als erstes bauen wir das Document Object aus dem xml Geraffel
dom = xml.dom.minidom.parseString(self.xmlblock)
# relativ unwichtige Methode
self.checkNameTag(dom)
# myFilms ist ein dictionary, das die film ids als keys benutzt und dann
# einem array zuordnet in dem die filmdaten stehen
# z.B. waere filmlist['666'][0] der film*titel* des 666. films
content = "film"
attributname = "id"
films = dom.getElementsByTagName(content)
# jetzt werden die einzelnen "film" tags in der NodeList abgearbeitet
for film in films:
# <film id="???"> lesen
if printer > 0:
print "\n%s %s: %s" % (content, attributname, film.getAttribute(attributname))
attribut = int(film.getAttribute(attributname))
# id=0 wird ignoriert, dient nur als platzhalter und taginfo
if attribut > 0 and mf.filmlist.has_key(attribut):
print "%s: %s exists" % (attributname, attribut)
return
# [id, title, lang, codec, cd, comment]
temparray = ["","","","",""]
for node in film.childNodes:
# alle bekannten film-tags in temparray schreiben
if node.nodeType == node.ELEMENT_NODE:
nodetags = node.childNodes[0]
if node.tagName == "title":
temparray[0] = str(nodetags.data)
#print "\t%s: %s" % (node.tagName, nodetags.data)
elif node.tagName == "lang":
temparray[1] = str(nodetags.data)
#print "\t%s: %s" % (node.tagName, nodetags.data)
elif node.tagName == "codec":
temparray[2] = str(nodetags.data)
#print "\t%s: %s" % (node.tagName, nodetags.data)
elif node.tagName == "cd":
temparray[3] = str(nodetags.data)
#print "\t%s: %s" % (node.tagName, nodetags.data)
else:
print "found unknown node: %s" % node.tagName
# das kommentar nicht vergessen
if node.nodeType == node.COMMENT_NODE:
temparray[4] = str(node.data)
# die array nummer entspricht hier nur zufaellig der id
# (attribut=id)
mf.filmlist[attribut] = temparray
if printer > 0:
print mf.filmlist[attribut]
# unlink() ist bei neuerem python wohl unnoetig
dom.unlink()
def checkNameTag(self, dom):
"""
Check if a leading name-tag exists in given document object.
@param DOM dom : Content of xml file
"""
xxx = "name"
try:
if dom.getElementsByTagName(xxx)[0] != None:
xxxtag = dom.getElementsByTagName(xxx)[0]
# es sollte nur einen bigtitle geben, den lesen wir mit getText
for x in xxxtag.childNodes:
if x.nodeType == x.TEXT_NODE:
#print "%s: %s" % (xxx, x.data)
pass
except:
print "%s: not found\n\tinsert <%s></%s> tag!" % (xxx, xxx, xxx)
def myFilmsToXMLFile(self, mf, xmlfilename):
"""
Convert films of a MyFilms object to xml and save in xml file.
@param MyFilms mf :
@param String xmlfilename : Filename in which xml data is saved
"""
self.cleanUp()
self.begXMLList()
films = mf.filmlist.keys()
for id in films:
#TODO: diese konvertierung verbessern, auch oben
self.data['id'] = id
self.data['title'] = mf.filmlist[id][0]
self.data['lang'] = mf.filmlist[id][1]
self.data['codec'] = mf.filmlist[id][2]
self.data['cd'] = mf.filmlist[id][3]
self.data['comment'] = mf.filmlist[id][4]
self.addToXMLList()
self.finXMLList()
self.saveXMLList(xmlfilename)
def convertJoerchs2XML(self, mf, htmlfilmlist, debug=0, printer=0):
"""
Does what it sounds like - what a surprise ;)
Read Joerchs htmlfile and convert it to xml structure.
@param MyFilms mf : actual Filmdataobject
@param String htmlfilmlist : Filename for html file, from which the films are read in
@param Int debug : Switch for debugmodus; 0-off, 1-return filmlist
@param Int printer : Switch for printing usefuls development messages; 0-off
@return List : empty, if debug>0: all films found in joerchs-html
"""
import readJoerchs
###import filmListXML
# standard Tags der xml Datei
# hier stehen die xml tag Bezeichnungen; die Reihenfolge ist die, wie
# sie auch in Joerchs html Datei ist; die Anzahl sollte
# uebereinstimmen, wenn mehr tags angegeben sind, als es in der html
# gibt, dann kommt eine leere liste zurueck
tags = [["title", "lang", "codec", "cd", "comment"]]
# htmldatei einlesen und die wichtigen tags in 'list' speichern
list = []
html = readJoerchs.ReadJoerchs(htmlfilmlist, tags)
list = html.getFilmEntries(printer)
self.cleanUp()
self.begXMLList()
# ganze liste abarbeiten
for i in range(len(list)):
# jedes tag abarbeiten
for j in range(len(tags[0])):
# wenn es das i-te element in der liste gibt
if list[i][0]:
# alle listenfelder als liste an .data uebergeben
# die reihenfolge der eingelesenen filme (= i)
# bestimmt dabei die in .data benutzte id
self.data[str(tags[0][j])] = list[i][j]
# id nicht vergessen
self.data["id"] = i
# erst jetzt werden die .data werte dem objekt gegeben
self.setSingleXMLData(self.data)
# und zum xml string zusammengebastelt
self.addToXMLList()
self.finXMLList() #schreibt das xml ende
#self.saveXMLList(xmlfilmlist) #speichert den block in datei
#self.readInXML(xmlfilmlist)
if debug>0:
list += ['\nUngewoehnliche Eintraege bitte per Hand aus dem HTML Datei loeschen! Probleme bei mir waren z.B.: unvollstaendige HTML Tags (manche Browser ignorieren das), DOS Steuerzeichen und fiese Kackscheisse, die durch Fehler im cryptofs entstanden sind.\n']
return list
return ""

143
myFilms.py Executable file
View file

@ -0,0 +1,143 @@
#!/usr/bin/env python
__version__="dev0.3"
__author__="AGE"
__date__="02004-08-06"
import filmListXML
class MyFilms:
"""
Container for filmdata.
This is 'des Pudels Kern' - the core object, around which the hole
code is designed :>
It also handles to/from xml conversion
"""
xmlfile = ""
filmlist = ""
def __init__(self):
### hier landen alle daten in leicht bearbeitbarer form
# key 0 dient als platzhalter und zum abchecken der richtigen reihenfolge
self.filmlist = {0:["title","language","codec","cd","comments"]}
#self.setXMLFile(xmlfile)
#self.fillfromXML()
def setXMLFile(self, xmlfile):
"""
Set file from which to read or write to.
@param String xmlfile : Name for file that is used for xml reading/writing
"""
self.xmlfile = xmlfile
def fillfromXML(self):
"""
Fill dictionary 'filmlist' using an external method.
"""
filmtool = filmListXML.FilmListXML()
# damit wird self.filmlist gefuellt (da self uebergeben wurde)
filmtool.readInXML(self, self.xmlfile, printer=0)
def saveXMLfromJoerchs(self, htmlfile, debug=0):
"""
Read Joerchs -> convert to xml -> save xml -> read xml into 'filmlist'
After all, 'filmlist' is hopefully filled with the films from
Joerchs html file.
'hopefully' because if an error occurs, you are really out of luck.
(es ging irgendwo irgendwas irgendwie schief ;)
In most cases you have to edit the html file yourself.
Fortunately you the get the content read so far. If there
seems anything corrupt delete/change the correspondig part in the
html file and try again. This sucks but it's not worth more
effort ;)
TODO: write more general code, that reads other html files as well
@return String :
"""
self.filmlist = {0:["title","language","codec","cd","comments"]}
filmtool = filmListXML.FilmListXML()
debugout = filmtool.convertJoerchs2XML(self, htmlfile, debug)
#gegen korrupte htmls hilft folgender output
if debug>0:
temp = debugout
debugout = "\thtmlfilmlist:"
for i in temp:
debugout += "\n"+str(i)
filmtool.saveXMLList(self.xmlfile)
#check it out
#uebergibt wieder 'self', was gut gefuellt zurueck kommen sollte
filmtool.readInXML(self, self.xmlfile, printer=0)
return str(debugout)
def saveToXMLFile(self):
"""
Give self to external method which saves 'filmlist' in a xml
file.
"""
filmsafe = filmListXML.FilmListXML()
filmsafe.myFilmsToXMLFile(self, self.xmlfile)
def getFilm(self, id):
"""
Return one film of 'filmlist'.
@param Int id : Number for the requested film
@return List/String : List of one film's data / errormessage
"""
if self.filmlist.has_key(id):
return self.filmlist[id]
else:
return "no film found with id: %s" % id
def addFilm(self, data):
"""
Add a new film to 'filmlist'
@param List data : Data of one film
"""
filmadd = filmListXML.FilmListXML()
# naechst groesste id finden
id = max(self.filmlist.keys())+1
self.filmlist[id] = data
filmadd.setSingleXMLData(self.filmlist)
#filmadd.addToXMLList()
#filmadd.saveXMLList("foo")
def changeFilm(self, id):
"""
Overwrite the film responding to the given id
(Is now obsolete: was used for module tests..)
@param Int id : Number for the requested Film
"""
if self.filmlist.has_key(id):
filmadd = filmListXML.FilmListXML()
title = raw_input("name eingeben: ")
lang = raw_input("sprache eingeben: ")
codec = raw_input("codec eingeben: ")
cd = raw_input("cds eingeben: ")
comment = raw_input("comment eingeben: ")
filmadd.setAllXMLData(self.filmlist)
self.filmlist[id] = [title,lang,codec,cd,comment]
else:
print "no film found with id: %s" % id
return
if __name__ == "__main__":
###local testing
xmlfile = "myFilms.xml"
mf = MyFilms(xmlfile)
#foo = mf.getFilm(666)
#print "%s" % foo
#mf.changeFilm(2)
list = ["foo","asd","123","bar","bla"]
mf.addFilm(list)
mf.setXMLFile("fee.new")
mf.saveToXMLFile()

138
preferences.py Executable file
View file

@ -0,0 +1,138 @@
#!/usr/bin/env python
__author__ = "AGE"
__date__ = "02004/08/06"
"""
This Class is derived mainly from 'gimini' project.
__version__ = "$Revision: 1.1 $"
__author__ = "C.Dutoit <dutoitc@hotmail.com>"
__date__ = "2003-2-14"
Thanx to Dutoit!
"""
from ConfigParser import *
import sys, os
### Set the Preferences filename
if sys.platform=="linux2" or sys.platform=="linux":
PREFS_FILENAME = os.getenv("HOME") + "/.FilmBatzen.dat"
else:
PREFS_FILENAME="FilmBatzen.dat"
class Preferences:
"""
This class handle preferences and store them into 'PREFS_FILENAME'
To use it :
- instanciate a Preferences object :
myPP=Preferences()
- to get a preference :
mypref=myPP["ma_preference"]
- to set a preference :
myPP["ma_preference"]=xxx
The preferences are automatically loaded on the first instanciation of this
class and are saved when a value is added or changed automatically, too.
"""
def __init__(self):
"""
Constructor
@author C.Dutoit <dutoitc@hotmail.com>
"""
self._config = None
self.__loadConfig()
#>--------------------------------------------------------------------------
def __getitem__(self, name):
"""
Return the preferences for the given item
@param String name : Name of the item for which we return a value
@return String : value of the pref, or None if inexistant
@since 1.1.2.7
@author C.Dutoit <dutoitc@hotmail.com>
"""
if not self._config.has_section("Main"):
print "No section: \"[Main]\""
return None
try:
return self._config.get("Main", name)
except NoOptionError:
print "No such option: \"" + name +"\""
return None
#>--------------------------------------------------------------------------
def __setitem__(self, name, value):
"""
Return the preferences for the given item
@param String name : Name of the item WITHOUT SPACES
@param String Value : Value for the given name
@raises TypeError : if the name contains spaces
@since 1.1.2.7
@author C.Dutoit <dutoitc@hotmail.com>
"""
# Add 'Main' section ?
if not self._config.has_section("Main"):
self._config.add_section("Main")
if " " in list(name):
raise TypeError, "Name cannot contain a space"
# Save
self._config.set("Main", name, str(value))
self.__saveConfig()
#>--------------------------------------------------------------------------
def __saveConfig(self):
"""
Save datas to config file
@since 1.1.2.5
@author C.Dutoit <dutoitc@hotmail.com>
"""
f=open(PREFS_FILENAME, "w")
self._config.write(f)
f.close()
#>--------------------------------------------------------------------------
def __loadConfig(self):
"""
Load datas from config file
@since 1.1.2.5
@author C.Dutoit <dutoitc@hotmail.com>
"""
# Make sure that the configuration file exist
try:
f = open(PREFS_FILENAME, "r")
f.close()
except:
try:
f = open(PREFS_FILENAME, "w")
f.write("")
f.close()
except:
print "Can't make %s for saving preferences !" % PREFS_FILENAME
return
# Read datas
self._config=ConfigParser()
self._config.read(PREFS_FILENAME)
#>--------------------------------------------------------------------------
if __name__ == "__main__":
myPP=Preferences()
mypref=myPP["ma_preference"]
myPP["ma_preference"]="xxx"

77
readJoerchs.py Executable file
View file

@ -0,0 +1,77 @@
#!/usr/bin/env python
import string
import httplib, urllib
class ReadJoerchs:
'reads films out of a pcmcia htmlfile'
def __init__(self, url, tags):
self.url = url
self.films = tags
pass
def getFilmEntries(self, printfilms):
'returns a 2D list with film data'
opener = urllib.FancyURLopener({})
conn = opener.open(self.url)
wholefile = conn.read()
j = 0
# 1000 zeilen durchlaufen
#TODO: hier fehlt 'ne passende abbruchbedingung
while j<2000:
offset=string.find(wholefile,'<tr>')
starttr = string.find(wholefile,'<tr>',offset)+len('<tr>')
endtr = string.find(wholefile,'</tr>',starttr+offset)
line = wholefile[starttr:endtr]
i = 0
temp = []
# alle <td> felder innerhalb von <tr> bis </tr> durchlaufen
# films enthaelt die anzahl der erwarteten tags
while i <= len(self.films[0]):
offset = string.find(line,'<td class="zelle">')
# kein <td> in dieser zeile gefunden
if offset == -1:
i += 1
break
else:
start = string.find(line,'<td class="zelle">',offset)+len('<td class="zelle">')
end = string.find(line,'</td>',start)
t = self.replacestuff(line[start:end])
# falls das tag jetzt leer sein sollte (&nbsp; entfernt)
if len(t) < 1:
t = " "
temp.append(t)
line = line[end:]
i+=1
if len(temp) == len(self.films[0]):
self.films.append(temp)
wholefile = wholefile[endtr:]
j+=1
return self.films
#ersetzt sowas wie &quot; durch "
def replacestuff(self, data):
'kick the shit out of hell'
#TODO: den muell hier besser gestalten
newstring=string.replace(data,'&nbsp;'," ")
newstring=string.replace(newstring,"&quot;",'"')
newstring=string.replace(newstring,"&uuml;",'ue')
newstring=string.replace(newstring,"&ouml;",'oe')
newstring=string.replace(newstring,"&auml;",'ae')
newstring=string.replace(newstring,"ö","oe")
newstring=string.replace(newstring,"ä","ae")
newstring=string.replace(newstring,"ü","ue")
return (newstring)
if __name__ == "__main__":
''' local testing
url = "file:///home/age/divx.html"
print "reading %s" % url
films = [["filmtitle","language","codec","cds","comment"]]
foo = ReadJoerchs(url, films)
x = foo.getFilmEntries()
print x
'''