#!/usr/bin/python # -*- coding: utf-8 -*- # Setup VIM: ex: et ts=2 sw=2 : # # Main Script doing the conversion. # See README for details. # # Author: Elan Ruusamäe import sys, os, os.path, re, codecs import getopt from MoinMoin import user, wikiutil from MoinMoin.request import RequestCLI from MoinMoin.logfile import editlog from MoinMoin.Page import Page from shutil import copyfile, copystat from os import listdir, mkdir from os.path import isdir, basename from doku import DokuWiki from moinformat import moin2doku USEC = 1000000 def check_dirs(moin_pages_dir, output_dir): if moin_pages_dir and not isdir(moin_pages_dir): print >> sys.stderr, "MoinMoin pages directory doesn't exist!" sys.exit(1) if not isdir(output_dir): print >> sys.stderr, "Output directory doesn't exist!" sys.exit(1) pagedir = os.path.join(output_dir, 'pages') if not isdir(pagedir): mkdir(pagedir) mediadir = os.path.join(output_dir, 'media') if not isdir(mediadir): mkdir(mediadir) metadir = os.path.join(output_dir, 'meta') if not isdir(metadir): mkdir(metadir) def get_path_names(moin_pages_dir, basenames = False): items = listdir(moin_pages_dir) pathnames = [] for item in items: absitem = os.path.join(moin_pages_dir, item) if isdir(absitem): if basenames: pathnames.append(item) else: pathnames.append(absitem) return pathnames def readfile(filename): with open(filename, 'r') as f: text = f.read() return unicode(text.decode('utf-8')) def writefile(filename, content, overwrite=False): dir = os.path.split(filename)[0] if not isdir(dir): os.makedirs(dir); if os.path.exists(filename) and overwrite == False: raise OSError, 'File already exists: %s' % filename f = codecs.open(filename, 'w', 'utf-8') f.write(content) f.close() # pagedir = MoinMoin page dir # ns = DokuWiki namespace where attachments to copy def copy_attachments(pagedir, ns): srcdir = os.path.join(pagedir, 'attachments') if not isdir(srcdir): return attachment_dir = os.path.join(output_dir, 'media', dw.mediaFN(ns)) if not isdir(attachment_dir): os.makedirs(attachment_dir); attachments = listdir(srcdir) for attachment in attachments: src = os.path.join(srcdir, attachment) dst = os.path.join(output_dir, 'media', dw.mediaFN(dw.cleanID("%s/%s" % (ns, attachment)))) copyfile(src, dst) copystat(src, dst) def print_help(): program = sys.argv[0] print "Usage: %s -m -d " % program print "Convert MoinMoin pages to DokuWiki." print "Options:" print "-m DIR - MoinMoin pages dir" print "-d DIR - Dokuwiki pages dir" print "-a - Convert Attic pages (history)" print "-f - overwrite output files" print "-F FILE - convert single file" print "" print "%s -m moinmoin/data/pages /var/lib/dokuwiki/pages" % program print "%s -F moinmoin/data/pages/frontpage -d out" % program sys.exit(0) # return unicode encoded wikiname # input is a dir from moinmoin pages/ dir def wikiname(filename): return wikiutil.unquoteWikiname(basename(filename)) def convert_editlog(pagedir, output = None, overwrite = False): pagedir = os.path.abspath(pagedir) print "pagedir: %s" % pagedir pagename = wikiname(pagedir) if not output: output = pagename pagelog = os.path.join(pagedir, 'edit-log') edit_log = editlog.EditLog(request, filename = pagelog) changes = {} for log in edit_log: # not supported. perhaps add anyway? if log.action in ('ATTNEW', 'ATTDEL', 'ATTDRW'): continue # 1201095949 192.168.2.23 E start glen@delfi.ee author = log.hostname if log.userid: userdata = user.User(request, log.userid) if userdata.name: author = userdata.name try: action = { 'SAVE' : 'E', 'SAVENEW' : 'C', 'SAVE/REVERT' : 'R', }[log.action] except KeyError: action = log.action mtime = str(log.ed_time_usecs / USEC) changes[mtime] = "\t".join([mtime, log.addr, action, dw.cleanID(log.pagename), author, log.comment]) # see if we have missing entries, try to recover page = Page(request, pagename) if len(page.getRevList()) != len(changes): print "RECOVERING edit-log, missing %d entries" % (len(page.getRevList()) - len(changes)) for rev in page.getRevList(): page = Page(request, pagename, rev = rev) mtime = page.mtime_usecs() / USEC if not mtime: pagefile, realrev, exists = page.get_rev(rev = rev); if os.path.exists(pagefile): mtime = int(os.path.getmtime(pagefile)) print "Recovered %s: %s" % (rev, mtime) mtime = str(mtime) if not changes.has_key(mtime): changes[mtime] = "\t".join([mtime, '127.0.0.1', '?', dw.cleanID(pagename), 'root', 'recovered entry']) print "ADDING %s" % mtime changes = sorted(changes.values()) out_file = os.path.join(output_dir, 'meta', dw.metaFN(output, '.changes')) writefile(out_file, "\n".join(changes), overwrite = overwrite) def convertfile(pagedir, output = None, overwrite = False): pagedir = os.path.abspath(pagedir) pagename = wikiname(pagedir) if not output: output = pagename page = Page(request, pagename) if page.isUnderlayPage(): print "SKIP UNDERLAY" return current_rev = page.current_rev() if convert_attic: revs = page.getRevList() else: revs = [current_rev] for rev in revs: page = Page(request, pagename, rev = rev) pagefile, realrev, exists = page.get_rev(rev = rev); content = moin2doku(pagename, page.get_raw_body()) mtime = page.mtime_usecs() / USEC if not mtime: if os.path.exists(pagefile): mtime = int(os.path.getmtime(pagefile)) print "recovered %s: %s" % (rev, mtime) if not mtime: print "NO REVISION: for %s" % pagefile continue if rev == current_rev: out_file = os.path.join(output_dir, 'pages', dw.wikiFN(output)) else: out_file = os.path.join(output_dir, 'attic', dw.wikiFN(output, str(mtime))) writefile(out_file, content, overwrite = overwrite) copystat(pagefile, out_file) ns = dw.getNS(dw.cleanID(output)) copy_attachments(pagedir, ns) # convert edit-log, it's always present even if current page is not convert_editlog(pagedir, output = output, overwrite = overwrite) return 1 # # "main" starts here # try: opts, args = getopt.getopt(sys.argv[1:], 'hfam:d:F:', [ "help" ]) except getopt.GetoptError, e: print >> sys.stderr, 'Incorrect parameters! Use --help switch to learn more.: %s' % e sys.exit(1) overwrite = False input_file = None moin_pages_dir = None output_dir = None convert_attic = False for o, a in opts: if o == "--help" or o == "-h": print_help() if o == "-f": overwrite = True if o == "-m": moin_pages_dir = a if o == "-a": convert_attic = True if o == "-d": output_dir = a if o == "-F": input_file = a if not moin_pages_dir and not input_file: print_help() print >> sys.stderr, 'No input file or page dir to process' sys.exit(1) check_dirs(moin_pages_dir, output_dir) print "Input dir is: '%s'" % moin_pages_dir print "Output dir is: '%s'" % output_dir dw = DokuWiki() request = RequestCLI() if input_file != None: res = convertfile(input_file, overwrite = overwrite) else: converted = 0 # special: process frontpage so that MoinMoin frontpage gets saved as DokuWiki frontpage page = wikiutil.getFrontPage(request) res = convertfile(page.getPagePath(), output = dw.getId(), overwrite = overwrite) if res != None: converted += 1 pathnames = get_path_names(moin_pages_dir) for pathname in pathnames: if pathname.count('MoinEditorBackup') > 0: print "SKIP %s: skip backups" % pathname continue res = convertfile(pathname, overwrite = overwrite) if res != None: converted += 1 print "Processed %d files, converted %d" % (len(pathnames), converted)