codekasten/filmListXML.py

#!/usr/bin/env python
__version__="dev0.3"
__author__="AGE"
__date__="02004-08-06"

#is nicht schoen aber laut python developers genehm
#(import ausserhalb der klasse)
import xml.dom.minidom
# minidom ist eine kleine, einfache dom implementierung
# dom ist quasi eine baumdarstellung fuer xml inhalte

# reingehende strings escapen..
#from xml.sax.saxutils import escape

class FilmListXML:
    """
    This class handles every xml operation for films.
    It does just dumb reading and writing but no real test on the given
    data.

    @author AGE
    """
    # in xmlblock wird die gesamte xml datei gehalten
    xmlblock = ""
    listname = "foomakilla"

    def __init__(self):
	self.cleanUp()

    def cleanUp(self):
	# data nimmt jeweils die daten eines films auf (als dictionary)
	self.data = {}
	#self.id = 0

    def saveXMLList(self, filename):
	"""
	Write whole 'xmlblock' into a file.

	@param String filename : name for file in which to save xml data
	"""
	xmlfile = open(filename,"w")
	xmlfile.write(self.xmlblock)
	xmlfile.close()

    def begXMLList(self):
	"""
	Start 'xmlblock' from blank with a small xml-header.
	"""
	self.xmlblock = '<?xml version="1.0"?>\n'
	self.xmlblock += '<all>\n\t<name>'+self.listname+'</name>\n'

    def finXMLList(self):
	"""
	Add xml-footer to 'xmlblock'
	"""
	self.xmlblock += '</all>'

    def setSingleXMLData(self, newfilm):
	self.cleanUp()
	self.data = newfilm

    def setAllXMLData(self, filmlist):
	for film in filmlist:
	    self.setSingleXMLData(film)
	    self.addToXMLList()

    def readXMLFile(self, filename):
	"""
	Read a xml file and overwrite 'xmlblock'!

	@param String filename : name of the file from which to read the xml data
	"""
	xmlfile = open(filename,"r")
	self.xmlblock = xmlfile.read()
	xmlfile.close()

    def addToXMLList(self):
	"""
	Creates XML tags from given "data" (self.data), but does _not_ save them!
	result looks like:
	    <film id="foo">
			pass
			<!-- blabla -->
		<title>bar</title>
	    </film>

	@return None
	"""
	# mensch beachte %(id)s, das geht gut mit dicts
	self.xmlblock += '\t<film id="%(id)s">\n' % self.data
	entrylist = self.data.keys()
	entrylist.remove("id") #we don't need it anymore
	# Kommentare werden zuerst behandelt
	if self.data.has_key('comment'):
	    self.xmlblock += '\t\t<!--'
	    # Leerzeichen von anfang&ende entfernen
	    temp = str(self.data.get("comment")).strip()
	    if len(temp) == 0:
		# DOM mag es nicht, wenn in den Tags gar nichts steht, deswegen ein 'blank'
		self.xmlblock += ' '
	    else:
		self.xmlblock += ' %s ' % temp
	    self.xmlblock += '-->\n'
	    entrylist.remove("comment") #we don't need it anymore
	# alle anderen Tags durchrattern
	for entry in entrylist:
	    #self.xmlblock += '\t\t<%s>%s</%s>\n' % (entry, self.data.get(entry), entry)
	    temp = str(self.data.get(entry)).strip()
	    if len(temp) == 0:
		self.xmlblock += '\t\t<%s> </%s>\n' % (entry, entry)
	    else:
		self.xmlblock += '\t\t<%s>%s</%s>\n' % (entry, temp, entry)
	self.xmlblock += '\t</film>\n'
	# aufaeumen und Stuehle hochstellen
	#self.cleanUp()


    def readInXML(self, mf, filename, printer=0):
	"""
	Parse a given xml file and check for a special structure.
	The data ist written to myFilms
	This is not a xml parser at all for any other xml structur.
	You can fool this bitch like nothing else.. better be careful!

	@param MyFilms mf : Object to fill with read films
	@param String filename : Name for the xmlfile to parse to
	@param Int printer : Switch for printing usefuls development messages; 0-off
	"""
	# dafuer sorgen, dass Datei in die Variable xmlblock eingelesen wird
	self.data
	if filename:
	    if printer > 0:
		print "reading from: "+filename
	    self.readXMLFile(filename)
	# jetzt kommt richtiger xml kram..
	# als erstes bauen wir das Document Object aus dem xml Geraffel
	dom = xml.dom.minidom.parseString(self.xmlblock)
	# relativ unwichtige Methode
	self.checkNameTag(dom)
	# myFilms ist ein dictionary, das die film ids als keys benutzt und dann
	# einem array zuordnet in dem die filmdaten stehen
	# z.B. waere filmlist['666'][0] der film*titel* des 666. films
	content = "film"
	attributname = "id"
	films = dom.getElementsByTagName(content)
	# jetzt werden die einzelnen "film" tags in der NodeList abgearbeitet
	for film in films:
	    # <film id="???"> lesen
	    if printer > 0:
		print "\n%s %s: %s" % (content, attributname, film.getAttribute(attributname))
	    attribut = int(film.getAttribute(attributname))
	    # id=0 wird ignoriert, dient nur als platzhalter und taginfo
	    if attribut > 0 and mf.filmlist.has_key(attribut):
		print "%s: %s exists" % (attributname, attribut)
		return

	    # [id, title, lang, codec, cd, comment]
	    temparray = ["","","","",""]
	    for node in film.childNodes:
		# alle bekannten film-tags in temparray schreiben
		if node.nodeType == node.ELEMENT_NODE:
		    nodetags = node.childNodes[0]
		    if node.tagName == "title":
			temparray[0] = str(nodetags.data)
			#print "\t%s: %s" % (node.tagName, nodetags.data)
		    elif node.tagName == "lang":
			temparray[1] = str(nodetags.data)
			#print "\t%s: %s" % (node.tagName, nodetags.data)
		    elif node.tagName == "codec":
			temparray[2] = str(nodetags.data)
			#print "\t%s: %s" % (node.tagName, nodetags.data)
		    elif node.tagName == "cd":
			temparray[3] = str(nodetags.data)
			#print "\t%s: %s" % (node.tagName, nodetags.data)
		    else:
			print "found unknown node: %s" % node.tagName
		# das kommentar nicht vergessen
		if node.nodeType == node.COMMENT_NODE:
		    temparray[4] = str(node.data)
	    # die array nummer entspricht hier nur zufaellig der id
	    # (attribut=id)
	    mf.filmlist[attribut] = temparray
	    if printer > 0:
		print mf.filmlist[attribut]

	# unlink() ist bei neuerem python wohl unnoetig
	dom.unlink()

    def checkNameTag(self, dom):
	"""
	Check if a leading name-tag exists in given document object.

	@param DOM dom : Content of xml file
	"""
	xxx = "name"
	try:
	    if dom.getElementsByTagName(xxx)[0] != None:
		xxxtag = dom.getElementsByTagName(xxx)[0]
		# es sollte nur einen bigtitle geben, den lesen wir mit getText
		for x in xxxtag.childNodes:
		    if x.nodeType == x.TEXT_NODE:
			#print "%s: %s" % (xxx, x.data)
			pass
	except:
	    print "%s: not found\n\tinsert <%s></%s> tag!" % (xxx, xxx, xxx)

    def myFilmsToXMLFile(self, mf, xmlfilename):
	"""
	Convert films of a MyFilms object to xml and save in xml file.

	@param MyFilms mf :
	@param String xmlfilename : Filename in which xml data is saved
	"""
	self.cleanUp()
	self.begXMLList()
	films = mf.filmlist.keys()
	for id in films:
	    #TODO: diese konvertierung verbessern, auch oben
	    self.data['id'] = id
	    self.data['title']  = mf.filmlist[id][0]
	    self.data['lang'] = mf.filmlist[id][1]
	    self.data['codec'] = mf.filmlist[id][2]
	    self.data['cd'] = mf.filmlist[id][3]
	    self.data['comment'] = mf.filmlist[id][4]
	    self.addToXMLList()
	self.finXMLList()
	self.saveXMLList(xmlfilename)

    def convertJoerchs2XML(self, mf, htmlfilmlist, debug=0, printer=0):
	"""
	Does what it sounds like - what a surprise ;)
	Read Joerchs htmlfile and convert it to xml structure.

	@param MyFilms mf : actual Filmdataobject
	@param String htmlfilmlist : Filename for html file, from which the films are read in
	@param Int debug : Switch for debugmodus; 0-off, 1-return filmlist
	@param Int printer : Switch for printing usefuls development messages; 0-off
	@return List : empty, if debug>0: all films found in joerchs-html
	"""
	import readJoerchs
	###import filmListXML
	# standard Tags der xml Datei
	# hier stehen die xml tag Bezeichnungen; die Reihenfolge ist die, wie
	# sie auch in Joerchs html Datei ist; die Anzahl sollte
	# uebereinstimmen, wenn mehr tags angegeben sind, als es in der html
	# gibt, dann kommt eine leere liste zurueck
	tags = [["title", "lang", "codec", "cd", "comment"]]
	# htmldatei einlesen und die wichtigen tags in 'list' speichern
	list = []
	html = readJoerchs.ReadJoerchs(htmlfilmlist, tags)
	list = html.getFilmEntries(printer)

	self.cleanUp()
	self.begXMLList()
	# ganze liste abarbeiten
	for i in range(len(list)):
	    # jedes tag abarbeiten
	    for j in range(len(tags[0])):
		# wenn es das i-te element in der liste gibt
		if list[i][0]:
		    # alle listenfelder als liste an .data uebergeben
		    # die reihenfolge der eingelesenen filme (= i)
		    # bestimmt dabei die in .data benutzte id
		    self.data[str(tags[0][j])] = list[i][j]
	    # id nicht vergessen
	    self.data["id"] = i
	    # erst jetzt werden die .data werte dem objekt gegeben
	    self.setSingleXMLData(self.data)
	    # und zum xml string zusammengebastelt
	    self.addToXMLList()

	self.finXMLList() #schreibt das xml ende
	#self.saveXMLList(xmlfilmlist) #speichert den block in datei
	#self.readInXML(xmlfilmlist)
	if debug>0:
	    list += ['\nUngewoehnliche Eintraege bitte per Hand aus dem HTML Datei loeschen! Probleme bei mir waren z.B.: unvollstaendige HTML Tags (manche Browser ignorieren das), DOS Steuerzeichen und fiese Kackscheisse, die durch Fehler im cryptofs entstanden sind.\n']
	    return list
	return ""