codekasten/xml2typo3.2/xmlreader.py

213 lines
6.8 KiB
Python
Raw Normal View History

2011-11-03 22:14:10 +00:00
#!/usr/bin/env python
"""
this script fetches single nodes from a eventmanagement xml file
"""
from optparse import OptionParser
from xml.dom.minidom import parse
import os, mysql_connect, configobj
def main():
usage = "usage: %prog [options]"
parser = OptionParser(usage)
parser.add_option("--insert-xml", dest="filename",
help="read events from xml FILENAME")
parser.add_option("-c", "--config", dest="configfile",
help="read DB config from FILENAME")
parser.add_option("--add-month", dest="newmonth", default=False, action="store_true",
help="create new template for a month")
parser.add_option("--delete-events", dest="delevents",
help="delete alle events under a month ID")
parser.add_option("-v", "--verbose", dest="verbose",
help="change Loglevel (0-3)")
(options, args) = parser.parse_args()
if options.configfile:
print "getting config from: %s " % options.configfile
if not os.access(options.configfile, os.R_OK) or not os.path.isfile(options.configfile):
print "Error: could not read configuration file (%s)\n" % options.configfile
return
else:
conffile = os.path.realpath(options.configfile)
prefs = get_preferences(conffile)
else:
print parser.print_help()
return
if options.filename:
print "reading XML file: %s " % options.filename
if not os.access(options.filename, os.R_OK) or not os.path.isfile(options.filename):
print "Error: could not read configuration file (%s)\n" % options.filename
return
#else:
#print parser.print_help()
#return
if options.newmonth:
create_new_month(prefs["static"]["pid"], prefs["month"]["number"])
return
if options.delevents:
test = mysql_connect.DataBase()
test.connect()
test.delete_month_entries(options.delevents)
test.close()
return
#if not options.filename or not options.newmonth:
#print parser.print_help()
#return
loglevel = 0
if options.verbose: loglevel = int(options.verbose)
if loglevel >= 1: print prefs
dic = readXML(options.filename)
xmltags = prefs["static"]["xmltags"]
if loglevel >= 2: print xmltags
if loglevel >= 1: print "XML Quelle: ", dic["FMPDSORESULT"]["DATABASE"]
#print "Data 0", dic["FMPDSORESULT"]["ROW"][0]
#print "Data 1", dic["FMPDSORESULT"]["ROW"][1]
entry = 0
month = prefs["month"]["number"]
days = 0
data = []
rubrikid = prefs["static"]["pid"]
test = mysql_connect.DataBase()
test.connect()
for item in dic["FMPDSORESULT"]["ROW"]:
entry += 1
if len(item["DATUM"]) > 1:
days +=1
text = "\tDatum: %s" % item["DATUM"]
#filename = "/tmp/szene"+str(days)
#f = open(filename,mode="a")
#f.write(text)
#print text
if len(item["RUBRIK_XML"]) > 1:
rubrikid = test.create_rubrik(item["RUBRIK_XML"], prefs["static"]["pid"], month)
data.append(test.prepare_event(item, month, days, prefs["static"]["pid"], rubrikid))
'''
for content in xmltags:
text = ("%s: %s") % (content, item[content])
#f.write(text)
print text
'''
#f.close()
print "Tage durchlaufen: %i" % days
entries = len(dic["FMPDSORESULT"]["ROW"])
print "Eintraege insgesamt: %i" % entries
if entries != entry:
print "Es gibt %i Eintraege in der xml Datei aber nur %i wurden durchlaufen" % (entries, entry)
## finally put the mess into mysql db
print "Datenbank fuellen.."
test.insert_event(data)
test.close()
class NotTextNodeError:
pass
def getTextFromNode(node):
"""
scans through all children of node and gathers the
text. if node has non-text child-nodes, then
NotTextNodeError is raised.
"""
t = ""
for n in node.childNodes:
if n.nodeType == n.TEXT_NODE:
t += n.nodeValue
else:
raise NotTextNodeError
return t
def nodeToDic(node):
"""
nodeToDic() scans through the children of node and makes a
dictionary from the content.
three cases are differentiated:
- if the node contains no other nodes, it is a text-node
and {nodeName:text} is merged into the dictionary.
- if there is more than one child with the same name
then these children will be appended to a list and this
list is merged to the dictionary in the form: {nodeName:list}.
- else, nodeToDic() will call itself recursively on
the nodes children (merging {nodeName:nodeToDic()} to
the dictionary).
"""
dic = {}
multlist = {} # holds temporary lists where there are multiple children
for n in node.childNodes:
multiple = False
if n.nodeType != n.ELEMENT_NODE:
continue
# find out if there are multiple records
if len(node.getElementsByTagName(n.nodeName)) > 1:
multiple = True
# and set up the list to hold the values
if not multlist.has_key(n.nodeName):
multlist[n.nodeName] = []
try:
#text node
text = getTextFromNode(n).strip().encode('utf-8')
except NotTextNodeError:
if multiple:
# append to our list
multlist[n.nodeName].append(nodeToDic(n))
dic.update({n.nodeName:multlist[n.nodeName]})
continue
else:
# 'normal' node
dic.update({n.nodeName:nodeToDic(n)})
continue
# text node
if multiple:
multlist[n.nodeName].append(text)
dic.update({n.nodeName:multlist[n.nodeName]})
else:
dic.update({n.nodeName:text})
return dic
def readXML(filename):
dom = parse(filename)
return nodeToDic(dom)
def get_preferences(config_file):
import StringIO
try:
prefs = configobj.ConfigObj(config_file)
if prefs:
#print "found config: %s" % prefs.items()
pass
else:
raise CBConfigUnavailableError("failed to load the config file: %s" % config_file)
except IOError, err_msg:
raise CBConfigUnavailableError("unable to open the config file (%s): %s" % (config_file, err_msg))
except configobj.ConfigObjError, err_msg:
raise CBConfigError("failed to load config file (%s): %s" % (config_file, err_msg))
return prefs
def create_new_month(parentid, month):
print "Creating new monthly template.."
db = mysql_connect.DataBase()
db.connect()
db.prepare_new_month(parentid, month)
db.close()
if __name__ == "__main__":
main()