codekasten/xml2typo3/xmlreader.py

#!/usr/bin/env python
"""
this script fetches single nodes from a eventmanagement xml file
"""
from optparse import OptionParser
from xml.dom.minidom import parse
import mysql_connect

def main():
    usage = "usage: %prog [options] arg"
    parser = OptionParser(usage)
    parser.add_option("-f", "--file", dest="filename",
                      help="read xml data from FILENAME")
    parser.add_option("-c", "--config", dest="configfile",
                      help="read DB config from FILENAME")
    (options, args) = parser.parse_args()
    if options.filename:
        print "reading XML file: %s " % options.filename
    else:
        print parser.print_help()
        return
    dic = readXML(options.filename)

    xmltags = ["INFORMATION", "WANN_START_XML", "WANN_ENDE", "WO_XML", "WAS_XML", "RUBRIK_XML", "SONSTIGES"]
    print
    print "XML Quelle: ", dic["FMPDSORESULT"]["DATABASE"]
    #print "Data 0", dic["FMPDSORESULT"]["ROW"][0]
    #print "Data 1", dic["FMPDSORESULT"]["ROW"][1]
    entry = 0
    days = 0
    data = []
    test = mysql_connect.DataBase()

    for item in dic["FMPDSORESULT"]["ROW"]:
        entry += 1
        if len(item["DATUM"]) > 1:
            days +=1
            text =  "\tDatum: %s" % item["DATUM"]
            #filename = "/tmp/szene"+str(days)
            #f = open(filename,mode="a")
            #f.write(text)
            #print text
        data.append(test.prepare_event(item))
        '''
        for content in xmltags:
            text = ("%s: %s") % (content, item[content])
            #f.write(text)
            print text
        '''

    #f.close()

    print "Tage durchlaufen: %i" % days
    entries = len(dic["FMPDSORESULT"]["ROW"])
    print "Eintraege insgesamt: %i" % entries
    if entries != entry:
        print "Es gibt %i Eintraege in der xml Datei aber nur %i wurden durchlaufen" % (entries, entry)

    ## finally put the mess into mysql db
    test.connect()
    print "Datenbank fuellen.."
    #test.prepare_new_month("Mai")
    #test.insert_event(data)
    test.close()


class NotTextNodeError:
    pass


def getTextFromNode(node):
    """
    scans through all children of node and gathers the
    text. if node has non-text child-nodes, then
    NotTextNodeError is raised.
    """
    t = ""
    for n in node.childNodes:
        if n.nodeType == n.TEXT_NODE:
            t += n.nodeValue
        else:
            raise NotTextNodeError
    return t


def nodeToDic(node):
    """
    nodeToDic() scans through the children of node and makes a
    dictionary from the content.
    three cases are differentiated:
    - if the node contains no other nodes, it is a text-node
    and {nodeName:text} is merged into the dictionary.
    - if there is more than one child with the same name
    then these children will be appended to a list and this
    list is merged to the dictionary in the form: {nodeName:list}.
    - else, nodeToDic() will call itself recursively on
    the nodes children (merging {nodeName:nodeToDic()} to
    the dictionary).
    """
    dic = {}
    multlist = {} # holds temporary lists where there are multiple children
    for n in node.childNodes:
        multiple = False
        if n.nodeType != n.ELEMENT_NODE:
            continue
        # find out if there are multiple records
        if len(node.getElementsByTagName(n.nodeName)) > 1:
            multiple = True
            # and set up the list to hold the values
            if not multlist.has_key(n.nodeName):
                multlist[n.nodeName] = []

        try:
            #text node
            text = getTextFromNode(n).strip().encode('utf-8')
        except NotTextNodeError:
            if multiple:
                # append to our list
                multlist[n.nodeName].append(nodeToDic(n))
                dic.update({n.nodeName:multlist[n.nodeName]})
                continue
            else:
                # 'normal' node
                dic.update({n.nodeName:nodeToDic(n)})
                continue

        # text node
        if multiple:
            multlist[n.nodeName].append(text)
            dic.update({n.nodeName:multlist[n.nodeName]})
        else:
            dic.update({n.nodeName:text})
    return dic


def readXML(filename):
    dom = parse(filename)
    return nodeToDic(dom)


if __name__ == "__main__":
    main()