#!/usr/bin/env python """ this script fetches single nodes from a eventmanagement xml file """ from optparse import OptionParser from xml.dom.minidom import parse import mysql_connect def main(): usage = "usage: %prog [options] arg" parser = OptionParser(usage) parser.add_option("-f", "--file", dest="filename", help="read xml data from FILENAME") parser.add_option("-c", "--config", dest="configfile", help="read DB config from FILENAME") (options, args) = parser.parse_args() if options.filename: print "reading XML file: %s " % options.filename else: print parser.print_help() return dic = readXML(options.filename) xmltags = ["INFORMATION", "WANN_START_XML", "WANN_ENDE", "WO_XML", "WAS_XML", "RUBRIK_XML", "SONSTIGES"] print print "XML Quelle: ", dic["FMPDSORESULT"]["DATABASE"] #print "Data 0", dic["FMPDSORESULT"]["ROW"][0] #print "Data 1", dic["FMPDSORESULT"]["ROW"][1] entry = 0 days = 0 data = [] test = mysql_connect.DataBase() for item in dic["FMPDSORESULT"]["ROW"]: entry += 1 if len(item["DATUM"]) > 1: days +=1 text = "\tDatum: %s" % item["DATUM"] #filename = "/tmp/szene"+str(days) #f = open(filename,mode="a") #f.write(text) #print text data.append(test.prepare_event(item)) ''' for content in xmltags: text = ("%s: %s") % (content, item[content]) #f.write(text) print text ''' #f.close() print "Tage durchlaufen: %i" % days entries = len(dic["FMPDSORESULT"]["ROW"]) print "Eintraege insgesamt: %i" % entries if entries != entry: print "Es gibt %i Eintraege in der xml Datei aber nur %i wurden durchlaufen" % (entries, entry) ## finally put the mess into mysql db test.connect() print "Datenbank fuellen.." #test.prepare_new_month("Mai") #test.insert_event(data) test.close() class NotTextNodeError: pass def getTextFromNode(node): """ scans through all children of node and gathers the text. if node has non-text child-nodes, then NotTextNodeError is raised. """ t = "" for n in node.childNodes: if n.nodeType == n.TEXT_NODE: t += n.nodeValue else: raise NotTextNodeError return t def nodeToDic(node): """ nodeToDic() scans through the children of node and makes a dictionary from the content. three cases are differentiated: - if the node contains no other nodes, it is a text-node and {nodeName:text} is merged into the dictionary. - if there is more than one child with the same name then these children will be appended to a list and this list is merged to the dictionary in the form: {nodeName:list}. - else, nodeToDic() will call itself recursively on the nodes children (merging {nodeName:nodeToDic()} to the dictionary). """ dic = {} multlist = {} # holds temporary lists where there are multiple children for n in node.childNodes: multiple = False if n.nodeType != n.ELEMENT_NODE: continue # find out if there are multiple records if len(node.getElementsByTagName(n.nodeName)) > 1: multiple = True # and set up the list to hold the values if not multlist.has_key(n.nodeName): multlist[n.nodeName] = [] try: #text node text = getTextFromNode(n).strip().encode('utf-8') except NotTextNodeError: if multiple: # append to our list multlist[n.nodeName].append(nodeToDic(n)) dic.update({n.nodeName:multlist[n.nodeName]}) continue else: # 'normal' node dic.update({n.nodeName:nodeToDic(n)}) continue # text node if multiple: multlist[n.nodeName].append(text) dic.update({n.nodeName:multlist[n.nodeName]}) else: dic.update({n.nodeName:text}) return dic def readXML(filename): dom = parse(filename) return nodeToDic(dom) if __name__ == "__main__": main()