280 lines
8.9 KiB
Python
280 lines
8.9 KiB
Python
|
#!/usr/bin/env python
|
||
|
__version__="dev0.3"
|
||
|
__author__="AGE"
|
||
|
__date__="02004-08-06"
|
||
|
|
||
|
#is nicht schoen aber laut python developers genehm
|
||
|
#(import ausserhalb der klasse)
|
||
|
import xml.dom.minidom
|
||
|
# minidom ist eine kleine, einfache dom implementierung
|
||
|
# dom ist quasi eine baumdarstellung fuer xml inhalte
|
||
|
|
||
|
# reingehende strings escapen..
|
||
|
#from xml.sax.saxutils import escape
|
||
|
|
||
|
class FilmListXML:
|
||
|
"""
|
||
|
This class handles every xml operation for films.
|
||
|
It does just dumb reading and writing but no real test on the given
|
||
|
data.
|
||
|
|
||
|
@author AGE
|
||
|
"""
|
||
|
# in xmlblock wird die gesamte xml datei gehalten
|
||
|
xmlblock = ""
|
||
|
listname = "foomakilla"
|
||
|
|
||
|
def __init__(self):
|
||
|
self.cleanUp()
|
||
|
|
||
|
def cleanUp(self):
|
||
|
# data nimmt jeweils die daten eines films auf (als dictionary)
|
||
|
self.data = {}
|
||
|
#self.id = 0
|
||
|
|
||
|
def saveXMLList(self, filename):
|
||
|
"""
|
||
|
Write whole 'xmlblock' into a file.
|
||
|
|
||
|
@param String filename : name for file in which to save xml data
|
||
|
"""
|
||
|
xmlfile = open(filename,"w")
|
||
|
xmlfile.write(self.xmlblock)
|
||
|
xmlfile.close()
|
||
|
|
||
|
def begXMLList(self):
|
||
|
"""
|
||
|
Start 'xmlblock' from blank with a small xml-header.
|
||
|
"""
|
||
|
self.xmlblock = '<?xml version="1.0"?>\n'
|
||
|
self.xmlblock += '<all>\n\t<name>'+self.listname+'</name>\n'
|
||
|
|
||
|
def finXMLList(self):
|
||
|
"""
|
||
|
Add xml-footer to 'xmlblock'
|
||
|
"""
|
||
|
self.xmlblock += '</all>'
|
||
|
|
||
|
def setSingleXMLData(self, newfilm):
|
||
|
self.cleanUp()
|
||
|
self.data = newfilm
|
||
|
|
||
|
def setAllXMLData(self, filmlist):
|
||
|
for film in filmlist:
|
||
|
self.setSingleXMLData(film)
|
||
|
self.addToXMLList()
|
||
|
|
||
|
def readXMLFile(self, filename):
|
||
|
"""
|
||
|
Read a xml file and overwrite 'xmlblock'!
|
||
|
|
||
|
@param String filename : name of the file from which to read the xml data
|
||
|
"""
|
||
|
xmlfile = open(filename,"r")
|
||
|
self.xmlblock = xmlfile.read()
|
||
|
xmlfile.close()
|
||
|
|
||
|
def addToXMLList(self):
|
||
|
"""
|
||
|
Creates XML tags from given "data" (self.data), but does _not_ save them!
|
||
|
result looks like:
|
||
|
<film id="foo">
|
||
|
pass
|
||
|
<!-- blabla -->
|
||
|
<title>bar</title>
|
||
|
</film>
|
||
|
|
||
|
@return None
|
||
|
"""
|
||
|
# mensch beachte %(id)s, das geht gut mit dicts
|
||
|
self.xmlblock += '\t<film id="%(id)s">\n' % self.data
|
||
|
entrylist = self.data.keys()
|
||
|
entrylist.remove("id") #we don't need it anymore
|
||
|
# Kommentare werden zuerst behandelt
|
||
|
if self.data.has_key('comment'):
|
||
|
self.xmlblock += '\t\t<!--'
|
||
|
# Leerzeichen von anfang&ende entfernen
|
||
|
temp = str(self.data.get("comment")).strip()
|
||
|
if len(temp) == 0:
|
||
|
# DOM mag es nicht, wenn in den Tags gar nichts steht, deswegen ein 'blank'
|
||
|
self.xmlblock += ' '
|
||
|
else:
|
||
|
self.xmlblock += ' %s ' % temp
|
||
|
self.xmlblock += '-->\n'
|
||
|
entrylist.remove("comment") #we don't need it anymore
|
||
|
# alle anderen Tags durchrattern
|
||
|
for entry in entrylist:
|
||
|
#self.xmlblock += '\t\t<%s>%s</%s>\n' % (entry, self.data.get(entry), entry)
|
||
|
temp = str(self.data.get(entry)).strip()
|
||
|
if len(temp) == 0:
|
||
|
self.xmlblock += '\t\t<%s> </%s>\n' % (entry, entry)
|
||
|
else:
|
||
|
self.xmlblock += '\t\t<%s>%s</%s>\n' % (entry, temp, entry)
|
||
|
self.xmlblock += '\t</film>\n'
|
||
|
# aufaeumen und Stuehle hochstellen
|
||
|
#self.cleanUp()
|
||
|
|
||
|
|
||
|
def readInXML(self, mf, filename, printer=0):
|
||
|
"""
|
||
|
Parse a given xml file and check for a special structure.
|
||
|
The data ist written to myFilms
|
||
|
This is not a xml parser at all for any other xml structur.
|
||
|
You can fool this bitch like nothing else.. better be careful!
|
||
|
|
||
|
@param MyFilms mf : Object to fill with read films
|
||
|
@param String filename : Name for the xmlfile to parse to
|
||
|
@param Int printer : Switch for printing usefuls development messages; 0-off
|
||
|
"""
|
||
|
# dafuer sorgen, dass Datei in die Variable xmlblock eingelesen wird
|
||
|
self.data
|
||
|
if filename:
|
||
|
if printer > 0:
|
||
|
print "reading from: "+filename
|
||
|
self.readXMLFile(filename)
|
||
|
# jetzt kommt richtiger xml kram..
|
||
|
# als erstes bauen wir das Document Object aus dem xml Geraffel
|
||
|
dom = xml.dom.minidom.parseString(self.xmlblock)
|
||
|
# relativ unwichtige Methode
|
||
|
self.checkNameTag(dom)
|
||
|
# myFilms ist ein dictionary, das die film ids als keys benutzt und dann
|
||
|
# einem array zuordnet in dem die filmdaten stehen
|
||
|
# z.B. waere filmlist['666'][0] der film*titel* des 666. films
|
||
|
content = "film"
|
||
|
attributname = "id"
|
||
|
films = dom.getElementsByTagName(content)
|
||
|
# jetzt werden die einzelnen "film" tags in der NodeList abgearbeitet
|
||
|
for film in films:
|
||
|
# <film id="???"> lesen
|
||
|
if printer > 0:
|
||
|
print "\n%s %s: %s" % (content, attributname, film.getAttribute(attributname))
|
||
|
attribut = int(film.getAttribute(attributname))
|
||
|
# id=0 wird ignoriert, dient nur als platzhalter und taginfo
|
||
|
if attribut > 0 and mf.filmlist.has_key(attribut):
|
||
|
print "%s: %s exists" % (attributname, attribut)
|
||
|
return
|
||
|
|
||
|
# [id, title, lang, codec, cd, comment]
|
||
|
temparray = ["","","","",""]
|
||
|
for node in film.childNodes:
|
||
|
# alle bekannten film-tags in temparray schreiben
|
||
|
if node.nodeType == node.ELEMENT_NODE:
|
||
|
nodetags = node.childNodes[0]
|
||
|
if node.tagName == "title":
|
||
|
temparray[0] = str(nodetags.data)
|
||
|
#print "\t%s: %s" % (node.tagName, nodetags.data)
|
||
|
elif node.tagName == "lang":
|
||
|
temparray[1] = str(nodetags.data)
|
||
|
#print "\t%s: %s" % (node.tagName, nodetags.data)
|
||
|
elif node.tagName == "codec":
|
||
|
temparray[2] = str(nodetags.data)
|
||
|
#print "\t%s: %s" % (node.tagName, nodetags.data)
|
||
|
elif node.tagName == "cd":
|
||
|
temparray[3] = str(nodetags.data)
|
||
|
#print "\t%s: %s" % (node.tagName, nodetags.data)
|
||
|
else:
|
||
|
print "found unknown node: %s" % node.tagName
|
||
|
# das kommentar nicht vergessen
|
||
|
if node.nodeType == node.COMMENT_NODE:
|
||
|
temparray[4] = str(node.data)
|
||
|
# die array nummer entspricht hier nur zufaellig der id
|
||
|
# (attribut=id)
|
||
|
mf.filmlist[attribut] = temparray
|
||
|
if printer > 0:
|
||
|
print mf.filmlist[attribut]
|
||
|
|
||
|
# unlink() ist bei neuerem python wohl unnoetig
|
||
|
dom.unlink()
|
||
|
|
||
|
def checkNameTag(self, dom):
|
||
|
"""
|
||
|
Check if a leading name-tag exists in given document object.
|
||
|
|
||
|
@param DOM dom : Content of xml file
|
||
|
"""
|
||
|
xxx = "name"
|
||
|
try:
|
||
|
if dom.getElementsByTagName(xxx)[0] != None:
|
||
|
xxxtag = dom.getElementsByTagName(xxx)[0]
|
||
|
# es sollte nur einen bigtitle geben, den lesen wir mit getText
|
||
|
for x in xxxtag.childNodes:
|
||
|
if x.nodeType == x.TEXT_NODE:
|
||
|
#print "%s: %s" % (xxx, x.data)
|
||
|
pass
|
||
|
except:
|
||
|
print "%s: not found\n\tinsert <%s></%s> tag!" % (xxx, xxx, xxx)
|
||
|
|
||
|
def myFilmsToXMLFile(self, mf, xmlfilename):
|
||
|
"""
|
||
|
Convert films of a MyFilms object to xml and save in xml file.
|
||
|
|
||
|
@param MyFilms mf :
|
||
|
@param String xmlfilename : Filename in which xml data is saved
|
||
|
"""
|
||
|
self.cleanUp()
|
||
|
self.begXMLList()
|
||
|
films = mf.filmlist.keys()
|
||
|
for id in films:
|
||
|
#TODO: diese konvertierung verbessern, auch oben
|
||
|
self.data['id'] = id
|
||
|
self.data['title'] = mf.filmlist[id][0]
|
||
|
self.data['lang'] = mf.filmlist[id][1]
|
||
|
self.data['codec'] = mf.filmlist[id][2]
|
||
|
self.data['cd'] = mf.filmlist[id][3]
|
||
|
self.data['comment'] = mf.filmlist[id][4]
|
||
|
self.addToXMLList()
|
||
|
self.finXMLList()
|
||
|
self.saveXMLList(xmlfilename)
|
||
|
|
||
|
def convertJoerchs2XML(self, mf, htmlfilmlist, debug=0, printer=0):
|
||
|
"""
|
||
|
Does what it sounds like - what a surprise ;)
|
||
|
Read Joerchs htmlfile and convert it to xml structure.
|
||
|
|
||
|
@param MyFilms mf : actual Filmdataobject
|
||
|
@param String htmlfilmlist : Filename for html file, from which the films are read in
|
||
|
@param Int debug : Switch for debugmodus; 0-off, 1-return filmlist
|
||
|
@param Int printer : Switch for printing usefuls development messages; 0-off
|
||
|
@return List : empty, if debug>0: all films found in joerchs-html
|
||
|
"""
|
||
|
import readJoerchs
|
||
|
###import filmListXML
|
||
|
# standard Tags der xml Datei
|
||
|
# hier stehen die xml tag Bezeichnungen; die Reihenfolge ist die, wie
|
||
|
# sie auch in Joerchs html Datei ist; die Anzahl sollte
|
||
|
# uebereinstimmen, wenn mehr tags angegeben sind, als es in der html
|
||
|
# gibt, dann kommt eine leere liste zurueck
|
||
|
tags = [["title", "lang", "codec", "cd", "comment"]]
|
||
|
# htmldatei einlesen und die wichtigen tags in 'list' speichern
|
||
|
list = []
|
||
|
html = readJoerchs.ReadJoerchs(htmlfilmlist, tags)
|
||
|
list = html.getFilmEntries(printer)
|
||
|
|
||
|
self.cleanUp()
|
||
|
self.begXMLList()
|
||
|
# ganze liste abarbeiten
|
||
|
for i in range(len(list)):
|
||
|
# jedes tag abarbeiten
|
||
|
for j in range(len(tags[0])):
|
||
|
# wenn es das i-te element in der liste gibt
|
||
|
if list[i][0]:
|
||
|
# alle listenfelder als liste an .data uebergeben
|
||
|
# die reihenfolge der eingelesenen filme (= i)
|
||
|
# bestimmt dabei die in .data benutzte id
|
||
|
self.data[str(tags[0][j])] = list[i][j]
|
||
|
# id nicht vergessen
|
||
|
self.data["id"] = i
|
||
|
# erst jetzt werden die .data werte dem objekt gegeben
|
||
|
self.setSingleXMLData(self.data)
|
||
|
# und zum xml string zusammengebastelt
|
||
|
self.addToXMLList()
|
||
|
|
||
|
self.finXMLList() #schreibt das xml ende
|
||
|
#self.saveXMLList(xmlfilmlist) #speichert den block in datei
|
||
|
#self.readInXML(xmlfilmlist)
|
||
|
if debug>0:
|
||
|
list += ['\nUngewoehnliche Eintraege bitte per Hand aus dem HTML Datei loeschen! Probleme bei mir waren z.B.: unvollstaendige HTML Tags (manche Browser ignorieren das), DOS Steuerzeichen und fiese Kackscheisse, die durch Fehler im cryptofs entstanden sind.\n']
|
||
|
return list
|
||
|
return ""
|
||
|
|