codekasten/FilmBar/filmListXML.py

#!/usr/bin/env python
__version__="dev0.3"
__author__="AGE"
__date__="02004-08-06"

#is nicht schoen aber laut python developers genehm
#(import ausserhalb der klasse)
import xml.dom.minidom
# minidom ist eine kleine, einfache dom implementierung
# dom ist quasi eine baumdarstellung fuer xml inhalte

# reingehende strings escapen..
#from xml.sax.saxutils import escape

class FilmListXML:
    """
    This class handles every xml operation for films.
    It does just dumb reading and writing but no real test on the given
    data.

    @author AGE
    """
    # in xmlblock wird die gesamte xml datei gehalten
    xmlblock = ""
    listname = "foomakilla"

    def __init__(self):
	self.cleanUp()	

    def cleanUp(self):
	# data nimmt jeweils die daten eines films auf (als dictionary)
	self.data = {}
	#self.id = 0

    def saveXMLList(self, filename):
	"""	
	Write whole 'xmlblock' into a file.

	@param String filename : name for file in which to save xml data
	"""
	xmlfile = open(filename,"w")
	xmlfile.write(self.xmlblock)
	xmlfile.close()
	
    def begXMLList(self):
	"""
	Start 'xmlblock' from blank with a small xml-header.
	"""
	self.xmlblock = '<?xml version="1.0"?>\n'
	self.xmlblock += '<all>\n\t<name>'+self.listname+'</name>\n'

    def finXMLList(self):
	"""
	Add xml-footer to 'xmlblock'
	"""
	self.xmlblock += '</all>'
	
    def setSingleXMLData(self, newfilm):
	self.cleanUp()
	self.data = newfilm

    def setAllXMLData(self, filmlist):
	for film in filmlist:
	    self.setSingleXMLData(film)
	    self.addToXMLList()

    def readXMLFile(self, filename):
	"""
	Read a xml file and overwrite 'xmlblock'!

	@param String filename : name of the file from which to read the xml data
	"""
	xmlfile = open(filename,"r")
	self.xmlblock = xmlfile.read()
	xmlfile.close()

    def addToXMLList(self):
	"""
	Creates XML tags from given "data" (self.data), but does _not_ save them!
	result looks like:
	    <film id="foo">
			pass
			<!-- blabla -->
		<title>bar</title>
	    </film>

	@return None
	"""
	# mensch beachte %(id)s, das geht gut mit dicts
	self.xmlblock += '\t<film id="%(id)s">\n' % self.data
	entrylist = self.data.keys()
	entrylist.remove("id") #we don't need it anymore
	# Kommentare werden zuerst behandelt
	if self.data.has_key('comment'):
	    self.xmlblock += '\t\t<!--'
	    # Leerzeichen von anfang&ende entfernen
	    temp = str(self.data.get("comment")).strip()
	    if len(temp) == 0:
		# DOM mag es nicht, wenn in den Tags gar nichts steht, deswegen ein 'blank'
		self.xmlblock += ' '
	    else:
		self.xmlblock += ' %s ' % temp
	    self.xmlblock += '-->\n'
	    entrylist.remove("comment") #we don't need it anymore
	# alle anderen Tags durchrattern
	for entry in entrylist:
	    #self.xmlblock += '\t\t<%s>%s</%s>\n' % (entry, self.data.get(entry), entry)
	    temp = str(self.data.get(entry)).strip()
	    if len(temp) == 0:
		self.xmlblock += '\t\t<%s> </%s>\n' % (entry, entry)
	    else: 
		self.xmlblock += '\t\t<%s>%s</%s>\n' % (entry, temp, entry)
	self.xmlblock += '\t</film>\n'
	# aufaeumen und Stuehle hochstellen
	#self.cleanUp()


    def readInXML(self, mf, filename, printer=0):
	"""
	Parse a given xml file and check for a special structure.
	The data ist written to myFilms
	This is not a xml parser at all for any other xml structur.
	You can fool this bitch like nothing else.. better be careful!

	@param MyFilms mf : Object to fill with read films
	@param String filename : Name for the xmlfile to parse to
	@param Int printer : Switch for printing usefuls development messages; 0-off
	"""
	# dafuer sorgen, dass Datei in die Variable xmlblock eingelesen wird
	self.data
	if filename:
	    if printer > 0:
		print "reading from: "+filename
	    self.readXMLFile(filename)
	# jetzt kommt richtiger xml kram..
	# als erstes bauen wir das Document Object aus dem xml Geraffel
	dom = xml.dom.minidom.parseString(self.xmlblock)
	# relativ unwichtige Methode
	self.checkNameTag(dom)
	# myFilms ist ein dictionary, das die film ids als keys benutzt und dann
	# einem array zuordnet in dem die filmdaten stehen
	# z.B. waere filmlist['666'][0] der film*titel* des 666. films
	content = "film"
	attributname = "id"
	films = dom.getElementsByTagName(content)
	# jetzt werden die einzelnen "film" tags in der NodeList abgearbeitet
	for film in films:
	    # <film id="???"> lesen
	    if printer > 0:
		print "\n%s %s: %s" % (content, attributname, film.getAttribute(attributname))
	    attribut = int(film.getAttribute(attributname))
	    # id=0 wird ignoriert, dient nur als platzhalter und taginfo
	    if attribut > 0 and mf.filmlist.has_key(attribut):
		print "%s: %s exists" % (attributname, attribut)
		return

	    # [id, title, lang, codec, cd, comment]
	    temparray = ["","","","",""]
	    for node in film.childNodes:
		# alle bekannten film-tags in temparray schreiben
		if node.nodeType == node.ELEMENT_NODE:
		    nodetags = node.childNodes[0]
		    if node.tagName == "title":
			temparray[0] = str(nodetags.data)
			#print "\t%s: %s" % (node.tagName, nodetags.data)
		    elif node.tagName == "lang":
			temparray[1] = str(nodetags.data)
			#print "\t%s: %s" % (node.tagName, nodetags.data)
		    elif node.tagName == "codec":
			temparray[2] = str(nodetags.data)
			#print "\t%s: %s" % (node.tagName, nodetags.data)
		    elif node.tagName == "cd":
			temparray[3] = str(nodetags.data)
			#print "\t%s: %s" % (node.tagName, nodetags.data)
		    else:
			print "found unknown node: %s" % node.tagName
		# das kommentar nicht vergessen
		if node.nodeType == node.COMMENT_NODE:
		    temparray[4] = str(node.data)
	    # die array nummer entspricht hier nur zufaellig der id
	    # (attribut=id)
	    mf.filmlist[attribut] = temparray
	    if printer > 0:
		print mf.filmlist[attribut]

	# unlink() ist bei neuerem python wohl unnoetig
	dom.unlink()

    def checkNameTag(self, dom):
	"""
	Check if a leading name-tag exists in given document object.

	@param DOM dom : Content of xml file
	"""
	xxx = "name"
	try:
	    if dom.getElementsByTagName(xxx)[0] != None:
		xxxtag = dom.getElementsByTagName(xxx)[0]
		# es sollte nur einen bigtitle geben, den lesen wir mit getText
		for x in xxxtag.childNodes:
		    if x.nodeType == x.TEXT_NODE:
			#print "%s: %s" % (xxx, x.data)
			pass
	except:
	    print "%s: not found\n\tinsert <%s></%s> tag!" % (xxx, xxx, xxx)

    def myFilmsToXMLFile(self, mf, xmlfilename):
	"""
	Convert films of a MyFilms object to xml and save in xml file.

	@param MyFilms mf : 
	@param String xmlfilename : Filename in which xml data is saved
	"""
	self.cleanUp()
	self.begXMLList()
	films = mf.filmlist.keys()
	for id in films:
	    #TODO: diese konvertierung verbessern, auch oben
	    self.data['id'] = id
	    self.data['title']  = mf.filmlist[id][0]
	    self.data['lang'] = mf.filmlist[id][1]
	    self.data['codec'] = mf.filmlist[id][2]
	    self.data['cd'] = mf.filmlist[id][3]
	    self.data['comment'] = mf.filmlist[id][4]
	    self.addToXMLList()
	self.finXMLList()
	self.saveXMLList(xmlfilename)

    def convertJoerchs2XML(self, mf, htmlfilmlist, debug=0, printer=0):
	"""
	Does what it sounds like - what a surprise ;)
	Read Joerchs htmlfile and convert it to xml structure.

	@param MyFilms mf : actual Filmdataobject
	@param String htmlfilmlist : Filename for html file, from which the films are read in
	@param Int debug : Switch for debugmodus; 0-off, 1-return filmlist
	@param Int printer : Switch for printing usefuls development messages; 0-off
	@return List : empty, if debug>0: all films found in joerchs-html
	"""
	import readJoerchs
	###import filmListXML
	# standard Tags der xml Datei 
	# hier stehen die xml tag Bezeichnungen; die Reihenfolge ist die, wie
	# sie auch in Joerchs html Datei ist; die Anzahl sollte
	# uebereinstimmen, wenn mehr tags angegeben sind, als es in der html
	# gibt, dann kommt eine leere liste zurueck
	tags = [["title", "lang", "codec", "cd", "comment"]]
	# htmldatei einlesen und die wichtigen tags in 'list' speichern
	list = []
	html = readJoerchs.ReadJoerchs(htmlfilmlist, tags)
	list = html.getFilmEntries(printer)

	self.cleanUp()
	self.begXMLList()
	# ganze liste abarbeiten
	for i in range(len(list)):
	    # jedes tag abarbeiten
	    for j in range(len(tags[0])):
		# wenn es das i-te element in der liste gibt
		if list[i][0]:
		    # alle listenfelder als liste an .data uebergeben
		    # die reihenfolge der eingelesenen filme (= i)
		    # bestimmt dabei die in .data benutzte id 
		    self.data[str(tags[0][j])] = list[i][j]
	    # id nicht vergessen
	    self.data["id"] = i
	    # erst jetzt werden die .data werte dem objekt gegeben
	    self.setSingleXMLData(self.data)
	    # und zum xml string zusammengebastelt 
	    self.addToXMLList()

	self.finXMLList() #schreibt das xml ende
	#self.saveXMLList(xmlfilmlist) #speichert den block in datei
	#self.readInXML(xmlfilmlist)
	if debug>0:
	    list += ['\nUngewoehnliche Eintraege bitte per Hand aus dem HTML Datei loeschen! Probleme bei mir waren z.B.: unvollstaendige HTML Tags (manche Browser ignorieren das), DOS Steuerzeichen und fiese Kackscheisse, die durch Fehler im cryptofs entstanden sind.\n']
	    return list
	return ""
FilmBar - the first usable version of a filmlist conversion/management tool 2004-08-06 20:39:01 +00:00			`#!/usr/bin/env python`
			`__version__="dev0.3"`
			`__author__="AGE"`
			`__date__="02004-08-06"`

			`#is nicht schoen aber laut python developers genehm`
			`#(import ausserhalb der klasse)`
			`import xml.dom.minidom`
			`# minidom ist eine kleine, einfache dom implementierung`
			`# dom ist quasi eine baumdarstellung fuer xml inhalte`

			`# reingehende strings escapen..`
			`#from xml.sax.saxutils import escape`

			`class FilmListXML:`
			`"""`
			`This class handles every xml operation for films.`
			`It does just dumb reading and writing but no real test on the given`
			`data.`

			`@author AGE`
			`"""`
			`# in xmlblock wird die gesamte xml datei gehalten`
			`xmlblock = ""`
			`listname = "foomakilla"`

			`def __init__(self):`
			`self.cleanUp()`

			`def cleanUp(self):`
			`# data nimmt jeweils die daten eines films auf (als dictionary)`
			`self.data = {}`
			`#self.id = 0`

			`def saveXMLList(self, filename):`
			`"""`
			`Write whole 'xmlblock' into a file.`

			`@param String filename : name for file in which to save xml data`
			`"""`
			`xmlfile = open(filename,"w")`
			`xmlfile.write(self.xmlblock)`
			`xmlfile.close()`

			`def begXMLList(self):`
			`"""`
			`Start 'xmlblock' from blank with a small xml-header.`
			`"""`
			`self.xmlblock = '<?xml version="1.0"?>\n'`
			`self.xmlblock += '<all>\n\t<name>'+self.listname+'</name>\n'`

			`def finXMLList(self):`
			`"""`
			`Add xml-footer to 'xmlblock'`
			`"""`
			`self.xmlblock += '</all>'`

			`def setSingleXMLData(self, newfilm):`
			`self.cleanUp()`
			`self.data = newfilm`

			`def setAllXMLData(self, filmlist):`
			`for film in filmlist:`
			`self.setSingleXMLData(film)`
			`self.addToXMLList()`

			`def readXMLFile(self, filename):`
			`"""`
			`Read a xml file and overwrite 'xmlblock'!`

			`@param String filename : name of the file from which to read the xml data`
			`"""`
			`xmlfile = open(filename,"r")`
			`self.xmlblock = xmlfile.read()`
			`xmlfile.close()`

			`def addToXMLList(self):`
			`"""`
			`Creates XML tags from given "data" (self.data), but does _not_ save them!`
			`result looks like:`
			`<film id="foo">`
			`pass`
			`<!-- blabla -->`
			`<title>bar</title>`
			`</film>`

			`@return None`
			`"""`
			`# mensch beachte %(id)s, das geht gut mit dicts`
			`self.xmlblock += '\t<film id="%(id)s">\n' % self.data`
			`entrylist = self.data.keys()`
			`entrylist.remove("id") #we don't need it anymore`
			`# Kommentare werden zuerst behandelt`
			`if self.data.has_key('comment'):`
			`self.xmlblock += '\t\t<!--'`
			`# Leerzeichen von anfang&ende entfernen`
			`temp = str(self.data.get("comment")).strip()`
			`if len(temp) == 0:`
			`# DOM mag es nicht, wenn in den Tags gar nichts steht, deswegen ein 'blank'`
			`self.xmlblock += ' '`
			`else:`
			`self.xmlblock += ' %s ' % temp`
			`self.xmlblock += '-->\n'`
			`entrylist.remove("comment") #we don't need it anymore`
			`# alle anderen Tags durchrattern`
			`for entry in entrylist:`
			`#self.xmlblock += '\t\t<%s>%s</%s>\n' % (entry, self.data.get(entry), entry)`
			`temp = str(self.data.get(entry)).strip()`
			`if len(temp) == 0:`
			`self.xmlblock += '\t\t<%s> </%s>\n' % (entry, entry)`
			`else:`
			`self.xmlblock += '\t\t<%s>%s</%s>\n' % (entry, temp, entry)`
			`self.xmlblock += '\t</film>\n'`
			`# aufaeumen und Stuehle hochstellen`
			`#self.cleanUp()`


			`def readInXML(self, mf, filename, printer=0):`
			`"""`
			`Parse a given xml file and check for a special structure.`
			`The data ist written to myFilms`
			`This is not a xml parser at all for any other xml structur.`
			`You can fool this bitch like nothing else.. better be careful!`

			`@param MyFilms mf : Object to fill with read films`
			`@param String filename : Name for the xmlfile to parse to`
			`@param Int printer : Switch for printing usefuls development messages; 0-off`
			`"""`
			`# dafuer sorgen, dass Datei in die Variable xmlblock eingelesen wird`
			`self.data`
			`if filename:`
			`if printer > 0:`
			`print "reading from: "+filename`
			`self.readXMLFile(filename)`
			`# jetzt kommt richtiger xml kram..`
			`# als erstes bauen wir das Document Object aus dem xml Geraffel`
			`dom = xml.dom.minidom.parseString(self.xmlblock)`
			`# relativ unwichtige Methode`
			`self.checkNameTag(dom)`
			`# myFilms ist ein dictionary, das die film ids als keys benutzt und dann`
			`# einem array zuordnet in dem die filmdaten stehen`
			`# z.B. waere filmlist['666'][0] der filmtitel des 666. films`
			`content = "film"`
			`attributname = "id"`
			`films = dom.getElementsByTagName(content)`
			`# jetzt werden die einzelnen "film" tags in der NodeList abgearbeitet`
			`for film in films:`
			`# <film id="???"> lesen`
			`if printer > 0:`
			`print "\n%s %s: %s" % (content, attributname, film.getAttribute(attributname))`
			`attribut = int(film.getAttribute(attributname))`
			`# id=0 wird ignoriert, dient nur als platzhalter und taginfo`
			`if attribut > 0 and mf.filmlist.has_key(attribut):`
			`print "%s: %s exists" % (attributname, attribut)`
			`return`

			`# [id, title, lang, codec, cd, comment]`
			`temparray = ["","","","",""]`
			`for node in film.childNodes:`
			`# alle bekannten film-tags in temparray schreiben`
			`if node.nodeType == node.ELEMENT_NODE:`
			`nodetags = node.childNodes[0]`
			`if node.tagName == "title":`
			`temparray[0] = str(nodetags.data)`
			`#print "\t%s: %s" % (node.tagName, nodetags.data)`
			`elif node.tagName == "lang":`
			`temparray[1] = str(nodetags.data)`
			`#print "\t%s: %s" % (node.tagName, nodetags.data)`
			`elif node.tagName == "codec":`
			`temparray[2] = str(nodetags.data)`
			`#print "\t%s: %s" % (node.tagName, nodetags.data)`
			`elif node.tagName == "cd":`
			`temparray[3] = str(nodetags.data)`
			`#print "\t%s: %s" % (node.tagName, nodetags.data)`
			`else:`
			`print "found unknown node: %s" % node.tagName`
			`# das kommentar nicht vergessen`
			`if node.nodeType == node.COMMENT_NODE:`
			`temparray[4] = str(node.data)`
			`# die array nummer entspricht hier nur zufaellig der id`
			`# (attribut=id)`
			`mf.filmlist[attribut] = temparray`
			`if printer > 0:`
			`print mf.filmlist[attribut]`

			`# unlink() ist bei neuerem python wohl unnoetig`
			`dom.unlink()`

			`def checkNameTag(self, dom):`
			`"""`
			`Check if a leading name-tag exists in given document object.`

			`@param DOM dom : Content of xml file`
			`"""`
			`xxx = "name"`
			`try:`
			`if dom.getElementsByTagName(xxx)[0] != None:`
			`xxxtag = dom.getElementsByTagName(xxx)[0]`
			`# es sollte nur einen bigtitle geben, den lesen wir mit getText`
			`for x in xxxtag.childNodes:`
			`if x.nodeType == x.TEXT_NODE:`
			`#print "%s: %s" % (xxx, x.data)`
			`pass`
			`except:`
			`print "%s: not found\n\tinsert <%s></%s> tag!" % (xxx, xxx, xxx)`

			`def myFilmsToXMLFile(self, mf, xmlfilename):`
			`"""`
			`Convert films of a MyFilms object to xml and save in xml file.`

			`@param MyFilms mf :`
			`@param String xmlfilename : Filename in which xml data is saved`
			`"""`
			`self.cleanUp()`
			`self.begXMLList()`
			`films = mf.filmlist.keys()`
			`for id in films:`
			`#TODO: diese konvertierung verbessern, auch oben`
			`self.data['id'] = id`
			`self.data['title'] = mf.filmlist[id][0]`
			`self.data['lang'] = mf.filmlist[id][1]`
			`self.data['codec'] = mf.filmlist[id][2]`
			`self.data['cd'] = mf.filmlist[id][3]`
			`self.data['comment'] = mf.filmlist[id][4]`
			`self.addToXMLList()`
			`self.finXMLList()`
			`self.saveXMLList(xmlfilename)`

			`def convertJoerchs2XML(self, mf, htmlfilmlist, debug=0, printer=0):`
			`"""`
			`Does what it sounds like - what a surprise ;)`
			`Read Joerchs htmlfile and convert it to xml structure.`

			`@param MyFilms mf : actual Filmdataobject`
			`@param String htmlfilmlist : Filename for html file, from which the films are read in`
			`@param Int debug : Switch for debugmodus; 0-off, 1-return filmlist`
			`@param Int printer : Switch for printing usefuls development messages; 0-off`
			`@return List : empty, if debug>0: all films found in joerchs-html`
			`"""`
			`import readJoerchs`
			`###import filmListXML`
			`# standard Tags der xml Datei`
			`# hier stehen die xml tag Bezeichnungen; die Reihenfolge ist die, wie`
			`# sie auch in Joerchs html Datei ist; die Anzahl sollte`
			`# uebereinstimmen, wenn mehr tags angegeben sind, als es in der html`
			`# gibt, dann kommt eine leere liste zurueck`
			`tags = [["title", "lang", "codec", "cd", "comment"]]`
			`# htmldatei einlesen und die wichtigen tags in 'list' speichern`
			`list = []`
			`html = readJoerchs.ReadJoerchs(htmlfilmlist, tags)`
			`list = html.getFilmEntries(printer)`

			`self.cleanUp()`
			`self.begXMLList()`
			`# ganze liste abarbeiten`
			`for i in range(len(list)):`
			`# jedes tag abarbeiten`
			`for j in range(len(tags[0])):`
			`# wenn es das i-te element in der liste gibt`
			`if list[i][0]:`
			`# alle listenfelder als liste an .data uebergeben`
			`# die reihenfolge der eingelesenen filme (= i)`
			`# bestimmt dabei die in .data benutzte id`
			`self.data[str(tags[0][j])] = list[i][j]`
			`# id nicht vergessen`
			`self.data["id"] = i`
			`# erst jetzt werden die .data werte dem objekt gegeben`
			`self.setSingleXMLData(self.data)`
			`# und zum xml string zusammengebastelt`
			`self.addToXMLList()`

			`self.finXMLList() #schreibt das xml ende`
			`#self.saveXMLList(xmlfilmlist) #speichert den block in datei`
			`#self.readInXML(xmlfilmlist)`
			`if debug>0:`
			`list += ['\nUngewoehnliche Eintraege bitte per Hand aus dem HTML Datei loeschen! Probleme bei mir waren z.B.: unvollstaendige HTML Tags (manche Browser ignorieren das), DOS Steuerzeichen und fiese Kackscheisse, die durch Fehler im cryptofs entstanden sind.\n']`
			`return list`
			`return ""`