moin2doku/moin2doku.py

282 lines
7.9 KiB
Python
Raw Normal View History

#!/usr/bin/python
# -*- coding: utf-8 -*-
# Setup VIM: ex: et ts=2 sw=2 :
#
2011-02-10 10:00:19 +01:00
# Main Script doing the conversion.
# See README for details.
#
2011-02-10 10:00:19 +01:00
# Author: Elan Ruusamäe <glen@pld-linux.org>
import sys, os, os.path, re, codecs
2011-02-06 21:16:01 +01:00
import getopt
from MoinMoin import user, wikiutil
2012-09-18 02:04:03 +02:00
from MoinMoin.request import RequestCLI
from MoinMoin.logfile import editlog
from MoinMoin.Page import Page
2011-02-06 23:29:51 +01:00
from shutil import copyfile, copystat
from os import listdir, mkdir
2011-02-06 17:30:38 +01:00
from os.path import isdir, basename
from doku import DokuWiki
from moinformat import moin2doku
2011-02-06 17:30:38 +01:00
USEC = 1000000
2011-02-06 17:30:38 +01:00
def check_dirs(moin_pages_dir, output_dir):
if moin_pages_dir and not isdir(moin_pages_dir):
2011-02-06 23:14:47 +01:00
print >> sys.stderr, "MoinMoin pages directory doesn't exist!"
sys.exit(1)
2011-02-06 23:14:47 +01:00
if not isdir(output_dir):
print >> sys.stderr, "Output directory doesn't exist!"
sys.exit(1)
2011-02-06 17:30:38 +01:00
pagedir = os.path.join(output_dir, 'pages')
if not isdir(pagedir):
mkdir(pagedir)
mediadir = os.path.join(output_dir, 'media')
if not isdir(mediadir):
mkdir(mediadir)
2012-09-18 02:04:03 +02:00
metadir = os.path.join(output_dir, 'meta')
if not isdir(metadir):
mkdir(metadir)
def get_path_names(moin_pages_dir, basenames = False):
2011-02-06 23:14:47 +01:00
items = listdir(moin_pages_dir)
pathnames = []
2011-02-06 23:14:47 +01:00
for item in items:
absitem = os.path.join(moin_pages_dir, item)
if isdir(absitem):
if basenames:
2011-02-06 23:14:47 +01:00
pathnames.append(item)
else:
pathnames.append(absitem)
2011-02-06 23:14:47 +01:00
return pathnames
2011-02-06 17:30:38 +01:00
def readfile(filename):
with open(filename, 'r') as f:
text = f.read()
return unicode(text.decode('utf-8'))
def writefile(filename, content, overwrite=False):
dir = os.path.split(filename)[0]
if not isdir(dir):
os.makedirs(dir);
if os.path.exists(filename) and overwrite == False:
raise OSError, 'File already exists: %s' % filename
f = codecs.open(filename, 'w', 'utf-8')
2012-09-19 00:02:45 +02:00
f.write(content)
f.close()
# pagedir = MoinMoin page dir
# ns = DokuWiki namespace where attachments to copy
def copy_attachments(pagedir, ns):
srcdir = os.path.join(pagedir, 'attachments')
if not isdir(srcdir):
return
attachment_dir = os.path.join(output_dir, 'media', dw.mediaFN(ns))
if not isdir(attachment_dir):
2011-02-06 23:09:55 +01:00
os.makedirs(attachment_dir);
attachments = listdir(srcdir)
for attachment in attachments:
src = os.path.join(srcdir, attachment)
dst = os.path.join(output_dir, 'media', dw.mediaFN(dw.cleanID("%s/%s" % (ns, attachment))))
2011-02-06 23:29:51 +01:00
copyfile(src, dst)
copystat(src, dst)
2011-02-06 17:30:38 +01:00
def print_help():
program = sys.argv[0]
print "Usage: %s -m <moinmoin pages directory> -d <output directory>" % program
2011-02-06 23:14:47 +01:00
print "Convert MoinMoin pages to DokuWiki."
print "Options:"
print "-m DIR - MoinMoin pages dir"
print "-d DIR - Dokuwiki pages dir"
2012-09-19 22:07:13 +02:00
print "-a - Convert Attic pages (history)"
print "-f - overwrite output files"
print "-F FILE - convert single file"
print ""
print "%s -m moinmoin/data/pages /var/lib/dokuwiki/pages" % program
print "%s -F moinmoin/data/pages/frontpage -d out" % program
2011-02-06 23:14:47 +01:00
sys.exit(0)
2011-02-06 17:30:38 +01:00
# return unicode encoded wikiname
# input is a dir from moinmoin pages/ dir
def wikiname(filename):
return wikiutil.unquoteWikiname(basename(filename))
def convert_editlog(pagedir, output = None, overwrite = False):
2012-09-18 02:04:03 +02:00
pagedir = os.path.abspath(pagedir)
print "pagedir: %s" % pagedir
2012-09-18 02:04:03 +02:00
pagename = wikiname(pagedir)
if not output:
output = pagename
2012-09-18 02:04:29 +02:00
pagelog = os.path.join(pagedir, 'edit-log')
2012-09-18 02:04:03 +02:00
edit_log = editlog.EditLog(request, filename = pagelog)
2012-09-19 22:01:24 +02:00
changes = {}
2012-09-18 02:04:03 +02:00
for log in edit_log:
# not supported. perhaps add anyway?
if log.action in ('ATTNEW', 'ATTDEL', 'ATTDRW'):
continue
# 1201095949 192.168.2.23 E start glen@delfi.ee
author = log.hostname
if log.userid:
userdata = user.User(request, log.userid)
if userdata.name:
author = userdata.name
try:
action = {
'SAVE' : 'E',
'SAVENEW' : 'C',
'SAVE/REVERT' : 'R',
2012-09-18 02:04:03 +02:00
}[log.action]
except KeyError:
action = log.action
2012-09-19 22:01:24 +02:00
mtime = str(log.ed_time_usecs / USEC)
changes[mtime] = "\t".join([mtime, log.addr, action, dw.cleanID(log.pagename), author, log.comment])
# see if we have missing entries, try to recover
page = Page(request, pagename)
if len(page.getRevList()) != len(changes):
print "RECOVERING edit-log, missing %d entries" % (len(page.getRevList()) - len(changes))
for rev in page.getRevList():
page = Page(request, pagename, rev = rev)
mtime = page.mtime_usecs() / USEC
if not mtime:
pagefile, realrev, exists = page.get_rev(rev = rev);
if os.path.exists(pagefile):
mtime = int(os.path.getmtime(pagefile))
print "Recovered %s: %s" % (rev, mtime)
mtime = str(mtime)
if not changes.has_key(mtime):
changes[mtime] = "\t".join([mtime, '127.0.0.1', '?', dw.cleanID(pagename), 'root', 'recovered entry'])
print "ADDING %s" % mtime
2012-09-18 02:04:03 +02:00
2012-09-19 22:01:24 +02:00
changes = sorted(changes.values())
out_file = os.path.join(output_dir, 'meta', dw.metaFN(output, '.changes'))
2012-09-18 02:04:03 +02:00
writefile(out_file, "\n".join(changes), overwrite = overwrite)
def convertfile(pagedir, output = None, overwrite = False):
pagedir = os.path.abspath(pagedir)
pagename = wikiname(pagedir)
if not output:
output = pagename
page = Page(request, pagename)
if page.isUnderlayPage():
print "SKIP UNDERLAY"
return
current_rev = page.current_rev()
2012-09-19 22:07:13 +02:00
if convert_attic:
revs = page.getRevList()
else:
revs = [current_rev]
for rev in revs:
page = Page(request, pagename, rev = rev)
pagefile, realrev, exists = page.get_rev(rev = rev);
content = moin2doku(pagename, page.get_raw_body())
2012-09-19 21:01:52 +02:00
mtime = page.mtime_usecs() / USEC
2012-09-19 21:01:52 +02:00
if not mtime:
if os.path.exists(pagefile):
mtime = int(os.path.getmtime(pagefile))
2012-09-19 22:01:24 +02:00
print "recovered %s: %s" % (rev, mtime)
2012-09-19 21:01:52 +02:00
if not mtime:
print "NO REVISION: for %s" % pagefile
continue
if rev == current_rev:
out_file = os.path.join(output_dir, 'pages', dw.wikiFN(output))
else:
2012-09-19 21:01:52 +02:00
out_file = os.path.join(output_dir, 'attic', dw.wikiFN(output, str(mtime)))
2012-09-17 23:42:09 +02:00
writefile(out_file, content, overwrite = overwrite)
copystat(pagefile, out_file)
ns = dw.getNS(dw.cleanID(output))
copy_attachments(pagedir, ns)
# convert edit-log, it's always present even if current page is not
convert_editlog(pagedir, output = output, overwrite = overwrite)
2011-02-06 23:14:47 +01:00
return 1
#
# "main" starts here
#
2011-02-06 21:16:01 +01:00
try:
2012-09-19 22:07:13 +02:00
opts, args = getopt.getopt(sys.argv[1:], 'hfam:d:F:', [ "help" ])
except getopt.GetoptError, e:
2011-02-06 23:14:47 +01:00
print >> sys.stderr, 'Incorrect parameters! Use --help switch to learn more.: %s' % e
sys.exit(1)
2011-02-06 21:16:01 +01:00
overwrite = False
input_file = None
moin_pages_dir = None
output_dir = None
2012-09-19 22:07:13 +02:00
convert_attic = False
2011-02-06 21:16:01 +01:00
for o, a in opts:
if o == "--help" or o == "-h":
2011-02-06 23:14:47 +01:00
print_help()
if o == "-f":
overwrite = True
if o == "-m":
moin_pages_dir = a
2012-09-19 22:07:13 +02:00
if o == "-a":
convert_attic = True
if o == "-d":
output_dir = a
if o == "-F":
input_file = a
if not moin_pages_dir and not input_file:
print_help()
print >> sys.stderr, 'No input file or page dir to process'
2011-02-06 21:16:01 +01:00
sys.exit(1)
check_dirs(moin_pages_dir, output_dir)
2011-02-06 17:30:38 +01:00
print "Input dir is: '%s'" % moin_pages_dir
print "Output dir is: '%s'" % output_dir
dw = DokuWiki()
2012-09-18 02:04:03 +02:00
request = RequestCLI()
if input_file != None:
res = convertfile(input_file, overwrite = overwrite)
else:
converted = 0
# special: process frontpage so that MoinMoin frontpage gets saved as DokuWiki frontpage
page = wikiutil.getFrontPage(request)
res = convertfile(page.getPagePath(), output = dw.getId(), overwrite = overwrite)
if res != None:
converted += 1
pathnames = get_path_names(moin_pages_dir)
for pathname in pathnames:
2012-09-18 02:04:03 +02:00
if pathname.count('MoinEditorBackup') > 0:
print "SKIP %s: skip backups" % pathname
continue
2011-02-06 23:14:47 +01:00
res = convertfile(pathname, overwrite = overwrite)
if res != None:
converted += 1
print "Processed %d files, converted %d" % (len(pathnames), converted)