moin2doku/moin2doku.py

347 lines
9.7 KiB
Python
Raw Normal View History

#!/usr/bin/python
# -*- coding: utf-8 -*-
2012-09-22 18:13:38 +02:00
# Setup VIM: ex: noet ts=2 sw=2 :
#
2011-02-10 10:00:19 +01:00
# Main Script doing the conversion.
# See README for details.
#
2011-02-10 10:00:19 +01:00
# Author: Elan Ruusamäe <glen@pld-linux.org>
2012-09-22 18:23:47 +02:00
# Version: 1.0
2011-02-10 10:00:19 +01:00
import sys, os, os.path, re, codecs
2011-02-06 21:16:01 +01:00
import getopt
from MoinMoin import user, wikiutil
2016-01-05 17:13:48 +01:00
from MoinMoin.web.contexts import ScriptContext as RequestCLI
2012-09-18 02:04:03 +02:00
from MoinMoin.logfile import editlog
from MoinMoin.Page import Page
2011-02-06 23:29:51 +01:00
from shutil import copyfile, copystat
from os import listdir, mkdir
2011-02-06 17:30:38 +01:00
from os.path import isdir, basename
from doku import DokuWiki
from moinformat import moin2doku
import random
2011-02-06 17:30:38 +01:00
# sys.setdefaultencoding() does not exist, here!
reload(sys) # Reload does the trick!
sys.setdefaultencoding('cp1252')
USEC = 1000000
2012-09-22 18:10:06 +02:00
def init_dirs(output_dir):
2012-09-22 18:14:17 +02:00
if not isdir(output_dir):
print >> sys.stderr, "Output directory doesn't exist!"
sys.exit(1)
2011-02-06 17:30:38 +01:00
2012-09-22 18:14:17 +02:00
pagedir = os.path.join(output_dir, 'pages')
if not isdir(pagedir):
mkdir(pagedir)
2012-09-22 18:14:17 +02:00
mediadir = os.path.join(output_dir, 'media')
if not isdir(mediadir):
mkdir(mediadir)
2012-09-22 18:14:17 +02:00
metadir = os.path.join(output_dir, 'meta')
if not isdir(metadir):
mkdir(metadir)
2012-09-18 02:04:03 +02:00
def readfile(filename):
2016-01-05 17:13:48 +01:00
f = open(filename, 'r')
text = f.read()
2012-09-22 18:14:17 +02:00
return unicode(text.decode('utf-8'))
def writefile(filename, content, overwrite=False):
2012-09-22 18:14:17 +02:00
dir = os.path.dirname(os.path.abspath(filename))
if not isdir(dir):
os.makedirs(dir);
2012-09-22 18:14:17 +02:00
if os.path.exists(filename) and overwrite == False:
2016-01-05 17:13:48 +01:00
raise (OSError, 'File already exists: %s' % filename)
# ensure it's a list
if not isinstance(content, (list, tuple)):
content = [content]
2012-09-22 18:14:17 +02:00
f = codecs.open(filename, 'w', 'utf-8')
f.writelines([line + u'\n' for line in content])
2012-09-22 18:14:17 +02:00
f.close()
def encode_relaxed(text):
return text.encode("ascii", errors="ignore")
2012-09-22 22:10:50 +02:00
# page = MoinMoin Page oject
# ns = DokuWiki namespace where attachments to copy
def copy_attachments(page, ns,randomID):
2012-09-22 22:10:50 +02:00
srcdir = page.getPagePath('attachments', check_create = 0)
2012-09-22 18:14:17 +02:00
if not isdir(srcdir):
return
2012-09-22 18:14:17 +02:00
attachment_dir = os.path.join(output_dir, 'media', dw.mediaFN(ns))
if not isdir(attachment_dir):
os.makedirs(attachment_dir);
2012-09-22 18:14:17 +02:00
attachments = listdir(srcdir)
for attachment in attachments:
try:
src = os.path.join(srcdir, attachment)
dst = os.path.join(output_dir, 'media', dw.mediaFN(dw.cleanID(u"%s/%s" % (ns, str(randomID)+attachment))))
copyfile(src, dst)
copystat(src, dst)
except UnicodeDecodeError:
print 'ERROR: unable to convert attachment "%s"' % encode_relaxed(attachment)
2011-02-06 17:30:38 +01:00
def print_help():
2012-09-22 18:14:17 +02:00
program = sys.argv[0]
print "Usage: %s OPTIONS" % program
print "Convert MoinMoin pages to DokuWiki."
print "Options:"
print "-d DIR - output directory"
print "-a - Convert Attic pages (history)"
print "-f - overwrite output files"
print "-F FILE - convert single file"
print "-r FILE - write config for redirect plugin"
print ""
print "%s -a -d /var/lib/dokuwiki" % program
print "%s -F moinmoin/data/pages/frontpage -d out" % program
sys.exit(0)
2011-02-06 17:30:38 +01:00
# return unicode encoded wikiname
# input is a dir from moinmoin pages/ dir
def wikiname(filename):
2012-09-22 18:14:17 +02:00
return wikiutil.unquoteWikiname(basename(filename))
2012-09-22 22:04:08 +02:00
def convert_editlog(page, output = None, overwrite = False):
pagedir = page.getPagePath()
2012-09-22 18:14:17 +02:00
pagename = wikiname(pagedir)
if not output:
output = pagename
2012-09-22 22:04:08 +02:00
edit_log = editlog.EditLog(request, page.getPagePath('edit-log'))
2012-09-22 18:14:17 +02:00
changes = {}
for log in edit_log:
# not supported. perhaps add anyway?
if log.action in ('ATTNEW', 'ATTDEL', 'ATTDRW'):
continue
# 1201095949 192.168.2.23 E start glen@delfi.ee
author = log.hostname
if log.userid:
userdata = user.User(request, log.userid)
if userdata.name:
author = userdata.name
try:
action = {
'SAVE' : 'E',
'SAVENEW' : 'C',
'SAVE/REVERT' : 'R',
}[log.action]
except KeyError:
action = log.action
mtime = str(log.ed_time_usecs / USEC)
changes[mtime] = u"\t".join([mtime, log.addr, action, dw.cleanID(log.pagename), author, log.comment])
2012-09-22 18:14:17 +02:00
# see if we have missing entries, try to recover
page = Page(request, pagename)
if len(page.getRevList()) != len(changes):
print "RECOVERING edit-log, missing %d entries" % (len(page.getRevList()) - len(changes))
for rev in page.getRevList():
page = Page(request, pagename, rev = rev)
mtime = page.mtime_usecs() / USEC
if not mtime:
pagefile, realrev, exists = page.get_rev(rev = rev);
if os.path.exists(pagefile):
mtime = int(os.path.getmtime(pagefile))
print "Recovered %s: %s" % (rev, mtime)
mtime = str(mtime)
if not changes.has_key(mtime):
changes[mtime] = u"\t".join([mtime, '127.0.0.1', '?', dw.cleanID(pagename), 'root', 'recovered entry'])
2012-09-22 18:14:17 +02:00
print "ADDING %s" % mtime
changes = sorted(changes.values())
out_file = os.path.join(output_dir, 'meta', dw.metaFN(output, '.changes'))
writefile(out_file, changes, overwrite = overwrite)
2012-09-18 02:04:03 +02:00
2012-09-22 21:55:37 +02:00
def convertfile(page, output = None, overwrite = False):
pagedir = page.getPagePath()
2012-09-22 18:14:17 +02:00
pagename = wikiname(pagedir)
2012-09-22 18:14:17 +02:00
if not output:
2016-01-07 18:40:29 +01:00
output = pagename
2012-09-22 18:14:17 +02:00
print "Converting %s" % encode_relaxed(pagename)
2012-09-22 18:14:17 +02:00
if page.isUnderlayPage():
print "underlay: %s" % page.request.cfg.data_underlay_dir
print "underlay: %s" % request.cfg.data_underlay_dir
print "SKIP UNDERLAY: %s" % encode_relaxed(pagename)
2012-09-22 21:46:46 +02:00
return False
2012-09-22 18:14:17 +02:00
current_exists = page.exists()
current_rev = page.current_rev()
if convert_attic:
revs = page.getRevList()
else:
revs = [current_rev]
# Generate random ID Number for collision avoidance when attachments in Namespace have the same name
randomID = random.randint(101,999)
2012-09-22 18:14:17 +02:00
for rev in revs:
page = Page(request, pagename, rev = rev)
pagefile, realrev, exists = page.get_rev(rev = rev);
mtime = page.mtime_usecs() / USEC
if not mtime:
if os.path.exists(pagefile) != exists:
raise Exception, "IT SHOULD NOT HAPPEN"
if os.path.exists(pagefile):
mtime = int(os.path.getmtime(pagefile))
print "recovered %s: %s" % (rev, mtime)
if not mtime:
print "NO REVISION: for %s" % encode_relaxed(pagefile)
2012-09-22 18:14:17 +02:00
continue
if rev == current_rev:
out_file = os.path.join(output_dir, 'pages', dw.wikiFN(output))
if not convert_attic and not exists:
# if not converting attic, allow current version may not exist anymore
continue
else:
out_file = os.path.join(output_dir, 'attic', dw.wikiFN(output, str(mtime)))
content = moin2doku(pagename, page.get_raw_body(),randomID)
2012-09-22 18:14:17 +02:00
if len(content) == 0:
# raise Exception, "No content"
print "NO CONTENT: exists: %s,%s" % (exists, os.path.exists(pagefile))
writefile(out_file, content, overwrite = overwrite)
copystat(pagefile, out_file)
ID = dw.cleanID(output)
copy_attachments(page, dw.getNS(ID),randomID)
2012-09-22 18:14:17 +02:00
# convert edit-log, it's always present even if current page is not
2012-09-22 22:04:08 +02:00
convert_editlog(page, output = output, overwrite = overwrite)
2012-09-22 18:14:17 +02:00
# add to redirect.conf if filenames differ
# and page must exist (no redirect for deleted pages)
if redirect_conf and current_exists:
# redirect dokuwiki plugin is quite picky
# - it doesn't understand if entries are not lowercase
# - it doesn't understand if paths are separated by forward slash
old_page = pagename.lower().replace('/', ':').replace(' ', '_')
if old_page != ID:
redirect_map[old_page] = ID
print "Converted %s as %s" % (encode_relaxed(pagename), dw.wikiFN(output))
2012-09-23 19:50:12 +02:00
2012-09-22 21:46:46 +02:00
return True
#
# "main" starts here
#
2012-09-22 20:23:43 +02:00
# setup utf8 output
if sys.stdout.isatty():
default_encoding = sys.stdout.encoding
else:
import locale
default_encoding = locale.getpreferredencoding()
sys.stdout = codecs.getwriter(default_encoding)(sys.stdout);
2011-02-06 21:16:01 +01:00
try:
2012-09-22 21:55:37 +02:00
opts, args = getopt.getopt(sys.argv[1:], 'hfad:p:r:i:I:', [ "help" ])
except getopt.GetoptError, e:
2012-09-22 18:14:17 +02:00
print >> sys.stderr, 'Incorrect parameters! Use --help switch to learn more.: %s' % e
sys.exit(1)
2011-02-06 21:16:01 +01:00
overwrite = False
2012-09-22 21:55:37 +02:00
convert_page = None
output_dir = None
2012-09-19 22:07:13 +02:00
convert_attic = False
redirect_conf = False
redirect_map = {}
2012-09-22 20:27:48 +02:00
page_filter = []
2011-02-06 21:16:01 +01:00
for o, a in opts:
2012-09-22 18:14:17 +02:00
if o == "--help" or o == "-h":
print_help()
if o == "-f":
overwrite = True
if o == "-a":
convert_attic = True
if o == "-r":
redirect_conf = a
2012-09-22 20:27:48 +02:00
if o == "-i":
page_filter.append(a)
if o == "-I":
page_filter.extend(readfile(a).split("\n"))
2012-09-22 18:14:17 +02:00
if o == "-d":
output_dir = a
2012-09-22 21:55:37 +02:00
if o == "-p":
convert_page = a
2012-09-22 18:13:27 +02:00
if not output_dir:
2012-09-22 18:14:17 +02:00
print_help()
sys.exit(1)
2012-09-22 18:13:27 +02:00
print "Output dir is: '%s'" % output_dir
2012-09-22 18:10:06 +02:00
init_dirs(output_dir)
dw = DokuWiki()
2012-09-18 02:04:03 +02:00
request = RequestCLI()
2012-09-22 21:58:01 +02:00
pages = {}
2012-09-22 21:55:37 +02:00
if convert_page != None:
pagename = wikiname(convert_page)
pages[pagename] = pagename
else:
2012-09-22 20:27:48 +02:00
filter = None
if page_filter:
def name_filter(name):
return name not in page_filter
filter = name_filter
2012-09-22 18:14:17 +02:00
# get list of all pages in wiki
2012-09-22 20:32:04 +02:00
# hide underlay dir temporarily
underlay_dir = request.rootpage.cfg.data_underlay_dir
2016-01-05 17:13:48 +01:00
print(underlay_dir)
2012-09-22 20:32:04 +02:00
request.rootpage.cfg.data_underlay_dir = None
pages = request.rootpage.getPageList(user = '', exists = not convert_attic, filter = filter)
pages = dict(zip(pages, pages))
# restore
2012-09-22 20:32:04 +02:00
request.rootpage.cfg.data_underlay_dir = underlay_dir
2012-09-22 19:58:24 +02:00
# insert frontpage,
# so that MoinMoin frontpage gets saved as DokuWiki frontpage based on their configs
frontpage = wikiutil.getFrontPage(request)
2012-09-22 20:06:47 +02:00
if pages.has_key(frontpage.page_name):
del pages[frontpage.page_name]
pages[dw.getId()] = frontpage.page_name
2012-09-22 19:58:24 +02:00
print "--------------------------------------------------"
for output, pagename in pages.items():
print " - %s" % encode_relaxed(pagename)
print "--------------------------------------------------"
2012-09-22 21:58:01 +02:00
converted = 0
for output, pagename in pages.items():
page = Page(request, pagename)
res = convertfile(page, output = output, overwrite = overwrite)
2012-09-22 21:58:01 +02:00
if res != None:
converted += 1
print "Processed %d files, converted %d" % (len(pages), converted)
if redirect_conf:
2012-09-22 18:14:17 +02:00
print "Writing %s: %d items" % (redirect_conf, len(redirect_map))
content = [u"\t".join(pair) for pair in redirect_map.items()]
2012-09-22 18:14:17 +02:00
writefile(redirect_conf, content, overwrite = overwrite)