codekasten/xml2typo3/xmlreader.py
2008-04-16 14:19:32 +00:00

146 lines
4.3 KiB
Python
Executable file

#!/usr/bin/env python
"""
this script fetches single nodes from a eventmanagement xml file
"""
from optparse import OptionParser
from xml.dom.minidom import parse
import mysql_connect
def main():
usage = "usage: %prog [options] arg"
parser = OptionParser(usage)
parser.add_option("-f", "--file", dest="filename",
help="read xml data from FILENAME")
parser.add_option("-c", "--config", dest="configfile",
help="read DB config from FILENAME")
(options, args) = parser.parse_args()
if options.filename:
print "reading XML file: %s " % options.filename
else:
print parser.print_help()
return
dic = readXML(options.filename)
xmltags = ["INFORMATION", "WANN_START_XML", "WANN_ENDE", "WO_XML", "WAS_XML", "RUBRIK_XML", "SONSTIGES"]
print
print "XML Quelle: ", dic["FMPDSORESULT"]["DATABASE"]
#print "Data 0", dic["FMPDSORESULT"]["ROW"][0]
#print "Data 1", dic["FMPDSORESULT"]["ROW"][1]
entry = 0
days = 0
data = []
test = mysql_connect.DataBase()
for item in dic["FMPDSORESULT"]["ROW"]:
entry += 1
if len(item["DATUM"]) > 1:
days +=1
text = "\tDatum: %s" % item["DATUM"]
#filename = "/tmp/szene"+str(days)
#f = open(filename,mode="a")
#f.write(text)
#print text
data.append(test.prepare_event(item))
'''
for content in xmltags:
text = ("%s: %s") % (content, item[content])
#f.write(text)
print text
'''
#f.close()
print "Tage durchlaufen: %i" % days
entries = len(dic["FMPDSORESULT"]["ROW"])
print "Eintraege insgesamt: %i" % entries
if entries != entry:
print "Es gibt %i Eintraege in der xml Datei aber nur %i wurden durchlaufen" % (entries, entry)
## finally put the mess into mysql db
test.connect()
print "Datenbank fuellen.."
#test.prepare_new_month("Mai")
#test.insert_event(data)
test.close()
class NotTextNodeError:
pass
def getTextFromNode(node):
"""
scans through all children of node and gathers the
text. if node has non-text child-nodes, then
NotTextNodeError is raised.
"""
t = ""
for n in node.childNodes:
if n.nodeType == n.TEXT_NODE:
t += n.nodeValue
else:
raise NotTextNodeError
return t
def nodeToDic(node):
"""
nodeToDic() scans through the children of node and makes a
dictionary from the content.
three cases are differentiated:
- if the node contains no other nodes, it is a text-node
and {nodeName:text} is merged into the dictionary.
- if there is more than one child with the same name
then these children will be appended to a list and this
list is merged to the dictionary in the form: {nodeName:list}.
- else, nodeToDic() will call itself recursively on
the nodes children (merging {nodeName:nodeToDic()} to
the dictionary).
"""
dic = {}
multlist = {} # holds temporary lists where there are multiple children
for n in node.childNodes:
multiple = False
if n.nodeType != n.ELEMENT_NODE:
continue
# find out if there are multiple records
if len(node.getElementsByTagName(n.nodeName)) > 1:
multiple = True
# and set up the list to hold the values
if not multlist.has_key(n.nodeName):
multlist[n.nodeName] = []
try:
#text node
text = getTextFromNode(n).strip().encode('utf-8')
except NotTextNodeError:
if multiple:
# append to our list
multlist[n.nodeName].append(nodeToDic(n))
dic.update({n.nodeName:multlist[n.nodeName]})
continue
else:
# 'normal' node
dic.update({n.nodeName:nodeToDic(n)})
continue
# text node
if multiple:
multlist[n.nodeName].append(text)
dic.update({n.nodeName:multlist[n.nodeName]})
else:
dic.update({n.nodeName:text})
return dic
def readXML(filename):
dom = parse(filename)
return nodeToDic(dom)
if __name__ == "__main__":
main()