use use installed MoinMoin and DokuWiki to get accurate results

This commit is contained in:
Elan Ruusamäe 2011-02-10 10:49:37 +02:00
parent 52428be665
commit d339053bf1

View file

@ -8,12 +8,13 @@
# Call with the name of the directory containing the MoinMoin pages and that # Call with the name of the directory containing the MoinMoin pages and that
# of the directory to receive the DokuWiki pages on the command line: # of the directory to receive the DokuWiki pages on the command line:
# #
# python moin2doku.py ./moin/data/pages/ ./doku/ # You need to run this on host where MoinMoin is configured and DokuWiki is
# configured, it will use current configuration from both wikis.
# #
# then move the doku pages to e.g. /var/www/MyWikiName/data/pages/, # python moin2doku.py ./moin/data/pages/
# move the media files to e.g. /var/www/MyWikiName/data/media/, #
# set ownership: chown -R www-data:www-data /var/www/MyWikiName/data/pages/* # set ownership: chown -R www-data:www-data /var/lib/dokuwiki/pages/*
# chown -R www-data:www-data /var/www/MyWikiName/data/media/* # chown -R www-data:www-data /var/lib/dokuwiki/media/*
# #
# This script doesn't do all the work, and some of the work it does is # This script doesn't do all the work, and some of the work it does is
# wrong. For instance attachment links end up with the trailing "|}}" # wrong. For instance attachment links end up with the trailing "|}}"
@ -32,6 +33,7 @@ import getopt
from shutil import copyfile, copystat from shutil import copyfile, copystat
from os import listdir from os import listdir
from os.path import isdir, basename from os.path import isdir, basename
from doku import DokuWiki
def check_dirs(moin_pages_dir, output_dir): def check_dirs(moin_pages_dir, output_dir):
if not isdir(moin_pages_dir): if not isdir(moin_pages_dir):
@ -68,10 +70,10 @@ def writefile(filename, content, overwrite=False):
f.writelines([it.rstrip() + '\n' for it in content if it]) f.writelines([it.rstrip() + '\n' for it in content if it])
f.close() f.close()
def get_current_revision(page_dir): def get_current_revision(pagedir):
rev_dir = os.path.join(page_dir, 'revisions') rev_dir = os.path.join(pagedir, 'revisions')
# try "current" file first # try "current" file first
f = os.path.join(page_dir, 'current') f = os.path.join(pagedir, 'current')
if os.path.exists(f): if os.path.exists(f):
rev = readfile(f)[0].rstrip() rev = readfile(f)[0].rstrip()
try: try:
@ -85,7 +87,7 @@ def get_current_revision(page_dir):
revisions.sort() revisions.sort()
rev = revisions[-1] rev = revisions[-1]
print "%s rev: %s" % (page_dir, rev) print "%s rev: %s" % (pagedir, rev)
f = os.path.join(rev_dir, rev) f = os.path.join(rev_dir, rev)
if not os.path.exists(f): if not os.path.exists(f):
# deleted pages have '00000002' in current, and no existing file # deleted pages have '00000002' in current, and no existing file
@ -93,28 +95,34 @@ def get_current_revision(page_dir):
return f return f
def copy_attachments(page_dir, attachment_dir): # pagedir = MoinMoin page dir
dir = os.path.join(page_dir, 'attachments') # ns = DokuWiki namespace where attachments to copy
def copy_attachments(pagedir, ns):
dir = os.path.join(pagedir, 'attachments')
if not isdir(dir): if not isdir(dir):
return return
attachment_dir = dw.mediaFn(ns)
if not isdir(attachment_dir): if not isdir(attachment_dir):
os.makedirs(attachment_dir); os.makedirs(attachment_dir);
attachments = listdir(dir) attachments = listdir(dir)
for attachment in attachments: for attachment in attachments:
src = os.path.join(dir, attachment) src = os.path.join(dir, attachment)
dst = os.path.join(attachment_dir, attachment.lower()) dst = dw.mediaFn(dw.cleanID("%s/%s" % (ns, attachment)))
copyfile(src, dst) copyfile(src, dst)
copystat(src, dst) copystat(src, dst)
def convert_markup(content, filename): # convert page markup
# pagename: name of current page (MoinMoin name)
# content: page content (MoinMoin markup)
def convert_markup(pagename, content):
""" """
convert page markup convert page markup
""" """
namespace = ':' namespace = ':'
for i in range(0, len(filename) - 1): # for i in range(0, len(filename) - 1):
namespace += filename[i] + ':' # namespace += filename[i] + ':'
# http://www.pld-linux.org/SyntaxReference # http://www.pld-linux.org/SyntaxReference
regexp = ( regexp = (
@ -155,7 +163,7 @@ def convert_markup(content, filename):
# web link with title # web link with title
('\[((?:http|https|file)[^\s]+)\s+(.+?)\]', '[[\\1|\\2]]'), ('\[((?:http|https|file)[^\s]+)\s+(.+?)\]', '[[\\1|\\2]]'),
('\["/(.*)"\]', '[['+filename[-1]+':\\1]]'), # ('\["/(.*)"\]', '[['+filename[-1]+':\\1]]'),
# code blocks # code blocks
# open and language # open and language
@ -203,69 +211,39 @@ def print_help():
print "-f FILE - convert signle file" print "-f FILE - convert signle file"
sys.exit(0) sys.exit(0)
def unquote(filename): # return unicode encoded wikiname
filename = filename.lower() # input is a dir from moinmoin pages/ dir
filename = filename.replace('(2d)', '-') # hyphen def wikiname(filename):
filename = filename.replace('(20)', '_') # space->underscore from MoinMoin import wikiutil
filename = filename.replace('(2e)', '_') # decimal point->underscore return wikiutil.unquoteWikiname(basename(filename))
filename = filename.replace('(29)', '_') # )->underscore
filename = filename.replace('(28)', '_') # (->underscore
filename = filename.replace('.', '_') # decimal point->underscore
filename = filename.replace('(2c20)', '_') # comma + space->underscore
filename = filename.replace('(2028)', '_') # space + (->underscore
filename = filename.replace('(2920)', '_') # ) + space->underscore
filename = filename.replace('(2220)', 'inch_') # " + space->inch + underscore
filename = filename.replace('(3a20)', '_') # : + space->underscore
filename = filename.replace('(202827)', '_') # space+(+'->underscore
filename = filename.replace('(2720)', '_') # '+ space->underscore
filename = filename.replace('(c3bc)', 'ue') # umlaut
filename = filename.replace('(c384)', 'Ae') # umlaut
filename = filename.replace('(c3a4)', 'ae') # umlaut
filename = filename.replace('(c3b6)', 'oe') # umlaut
return filename
def convertfile(pathname, overwrite = False): def convertfile(pagedir, overwrite = False):
print "-> %s" % pathname pagedir = os.path.abspath(pagedir)
curr_rev = get_current_revision(pathname) print "-> %s" % pagedir
curr_rev = get_current_revision(pagedir)
if curr_rev == None: if curr_rev == None:
print "SKIP %s: no current revision" % pathname print "SKIP %s: no current revision" % pagedir
return return
if not os.path.exists(curr_rev): if not os.path.exists(curr_rev):
print "SKIP %s: filename missing" % curr_rev print "SKIP %s: filename missing" % curr_rev
return return
page_name = basename(pathname) pagename = wikiname(pagedir)
if page_name.count('MoinEditorBackup') > 0: print "pagename: [%s]" % pagename
print "SKIP %s: skip backups" % pathname
if pagename.count('MoinEditorBackup') > 0:
print "SKIP %s: skip backups" % pagedir
return return
content = readfile(curr_rev) content = readfile(curr_rev)
content = convert_markup(pagename, content)
page_name = unquote(page_name) out_file = dw.wikiFn(pagename)
print "dokuname: %s" % page_name print "dokuname: [%s]" % out_file
# split by namespace separator
ns = page_name.split('(2f)')
count = len(ns)
id = ns[-1]
dir = output_dir
attachment_dir = os.path.join(output_dir, 'media')
# root namespace files go to "unsorted"
if count == 1:
ns.insert(0, 'unsorted')
for p in ns[:-1]:
dir = os.path.join(dir, p);
attachment_dir = os.path.join(attachment_dir, p);
content = convert_markup(content, ns)
out_file = os.path.join(dir, id + '.txt')
writefile(out_file, content, overwrite = overwrite) writefile(out_file, content, overwrite = overwrite)
copy_attachments(pathname, attachment_dir) ns = dw.getNS(dw.cleanID(pagename))
copy_attachments(pagedir, ns)
return 1 return 1
@ -299,6 +277,8 @@ check_dirs(moin_pages_dir, output_dir)
print 'Input dir is: %s.' % moin_pages_dir print 'Input dir is: %s.' % moin_pages_dir
print 'Output dir is: %s.' % output_dir print 'Output dir is: %s.' % output_dir
dw = DokuWiki()
if inputfile != None: if inputfile != None:
res = convertfile(inputfile, overwrite = overwrite) res = convertfile(inputfile, overwrite = overwrite)
else: else: