Elan Ruusamäe 2011-02-06 19:05:22 +02:00
parent 5f64d725ee
commit c0629ccb89

View file

@ -1,6 +1,29 @@
#!/usr/bin/python #!/usr/bin/python
import sys, os, os.path #
import re # moin2doku.py
#
# A script for converting MoinMoin version 1.3+ wiki data to DokuWiki format.
# Call with the name of the directory containing the MoinMoin pages and that
# of the directory to receive the DokuWiki pages on the command line:
#
# python moin2doku.py ./moin/data/pages/ ./doku/
#
# then move the doku pages to e.g. /var/www/MyWikiName/data/pages/,
# move the media files to e.g. /var/www/MyWikiName/data/media/,
# set ownership: chown -R www-data:www-data /var/www/MyWikiName/data/pages/*
# chown -R www-data:www-data /var/www/MyWikiName/data/media/*
#
# This script doesn't do all the work, and some of the work it does is
# wrong. For instance attachment links end up with the trailing "|}}"
# on the line following the link. This works, but doesn't look good.
# The script interprets a "/" in a pagename as a namespace delimiter and
# creates and fills namespace subdirectories accordingly.
#
# version 0.1 02.2010 Slim Gaillard, based on the "extended python"
# convert.py script here:
# http://www.dokuwiki.org/tips:moinmoin2doku
#
import sys, os, os.path, re, pdb
from os import listdir from os import listdir
from os.path import isdir, basename from os.path import isdir, basename
@ -13,14 +36,16 @@ def check_dirs(moin_pages_dir, output_dir):
print >> sys.stderr, "Output directory doesn't exist!" print >> sys.stderr, "Output directory doesn't exist!"
sys.exit(1) sys.exit(1)
def get_page_names(moin_pages_dir): def get_path_names(moin_pages_dir):
items = listdir(moin_pages_dir) items = listdir(moin_pages_dir)
pages = [] pathnames = []
for item in items: for item in items:
item = os.path.join(moin_pages_dir, item) item = os.path.join(moin_pages_dir, item)
if isdir(item): if isdir(item):
pages.append(item) pathnames.append(item)
return pages
return pathnames
def get_current_revision(page_dir): def get_current_revision(page_dir):
rev_dir = os.path.join(page_dir, 'revisions') rev_dir = os.path.join(page_dir, 'revisions')
@ -34,8 +59,10 @@ def copy_attachments(page_dir, attachment_dir):
dir = os.path.join(page_dir,'attachments') dir = os.path.join(page_dir,'attachments')
if isdir(dir): if isdir(dir):
attachments = listdir(dir) attachments = listdir(dir)
#pdb.set_trace()
for attachment in attachments: for attachment in attachments:
os.system ('cp "' + dir +'/' + attachment + '" "' + attachment_dir +'"') cmd_string = 'cp "' + dir +'/' + attachment + '" "' + attachment_dir + attachment.lower() + '"'
os.system ( cmd_string )
def convert_page(page, file): def convert_page(page, file):
namespace = ':' namespace = ':'
@ -48,9 +75,11 @@ def convert_page(page, file):
('\[\[BR\]\]', '\n'), # newline ('\[\[BR\]\]', '\n'), # newline
('#pragma section-numbers off', ''), # remove ('#pragma section-numbers off', ''), # remove
('^##.*?\\n', ''), # remove ('^##.*?\\n', ''), # remove
('\[:(.*):', '[[\\1]] '), # internal link ('\["', '[['), # internal link open
('\[\[(.*)/(.*)\]\]', '[[\\1:\\2]]'), ('"\]', ']]'), # internal link close
('(\[\[.*\]\]).*\]', '\\1'), #('\[:(.*):', '[[\\1]] '), # original internal link expressions
#('\[\[(.*)/(.*)\]\]', '[[\\1:\\2]]'),
#('(\[\[.*\]\]).*\]', '\\1'),
('\[(http.*) .*\]', '[[\\1]]'), # web link ('\[(http.*) .*\]', '[[\\1]]'), # web link
('\["/(.*)"\]', '[['+file[-1]+':\\1]]'), ('\["/(.*)"\]', '[['+file[-1]+':\\1]]'),
('\{{3}', '<>code>'), # code open ('\{{3}', '<>code>'), # code open
@ -58,7 +87,7 @@ def convert_page(page, file):
('^\s\s\s\s\*', ' *'), ('^\s\s\s\s\*', ' *'),
('^\s\s\s\*', ' *'), ('^\s\s\s\*', ' *'),
('^\s\s\*', ' *'), ('^\s\s\*', ' *'),
('^\s\*', ' *'), # lists must have not only but 2 whitespaces before * ('^\s\*', ' *'), # lists must have 2 whitespaces before the asterisk
('^\s\s\s\s1\.', ' -'), ('^\s\s\s\s1\.', ' -'),
('^\s\s1\.', ' -'), ('^\s\s1\.', ' -'),
('^\s1\.', ' -'), ('^\s1\.', ' -'),
@ -93,7 +122,30 @@ def print_parameter_error():
print >> sys.stderr, 'Incorrect parameters! Use --help switch to learn more.' print >> sys.stderr, 'Incorrect parameters! Use --help switch to learn more.'
sys.exit(1) sys.exit(1)
if __name__ == '__main__': def fix_name( filename ):
filename = filename.lower()
filename = filename.replace('(2d)', '-') # hyphen
filename = filename.replace('(20)', '_') # space->underscore
filename = filename.replace('(2e)', '_') # decimal point->underscore
filename = filename.replace('(29)', '_') # )->underscore
filename = filename.replace('(28)', '_') # (->underscore
filename = filename.replace('.', '_') # decimal point->underscore
filename = filename.replace('(2c20)', '_') # comma + space->underscore
filename = filename.replace('(2028)', '_') # space + (->underscore
filename = filename.replace('(2920)', '_') # ) + space->underscore
filename = filename.replace('(2220)', 'inch_') # " + space->inch + underscore
filename = filename.replace('(3a20)', '_') # : + space->underscore
filename = filename.replace('(202827)', '_') # space+(+'->underscore
filename = filename.replace('(2720)', '_') # '+ space->underscore
filename = filename.replace('(c3bc)', 'ue') # umlaut
filename = filename.replace('(c384)', 'Ae') # umlaut
filename = filename.replace('(c3a4)', 'ae') # umlaut
filename = filename.replace('(c3b6)', 'oe') # umlaut
return filename
#
# "main" starts here
#
if len(sys.argv) > 1: if len(sys.argv) > 1:
if sys.argv[1] in ('-h', '--help'): if sys.argv[1] in ('-h', '--help'):
print_help() print_help()
@ -109,44 +161,55 @@ if __name__ == '__main__':
print 'Input dir is: %s.' % moin_pages_dir print 'Input dir is: %s.' % moin_pages_dir
print 'Output dir is: %s.' % output_dir print 'Output dir is: %s.' % output_dir
print
pages = get_page_names(moin_pages_dir) pathnames = get_path_names(moin_pages_dir)
for page in pages:
curr_rev = get_current_revision(page) for pathname in pathnames:
if os.path.exists(curr_rev): #pdb.set_trace() # start debugging here
page_name = basename(page).lower()
curr_rev = get_current_revision( pathname )
if not os.path.exists( curr_rev ) : continue
page_name = basename(pathname)
if page_name.count('MoinEditorBackup') > 0 : continue # don't convert backups
curr_rev_desc = file(curr_rev, 'r') curr_rev_desc = file(curr_rev, 'r')
curr_rev_content = curr_rev_desc.readlines() curr_rev_content = curr_rev_desc.readlines()
curr_rev_desc.close() curr_rev_desc.close()
if 'moineditorbackup' not in page_name: #dont convert backups page_name = fix_name( page_name )
page_name = page_name.replace('(2d)', '-')
page_name = page_name.replace('(c3bc)', 'ue')
page_name = page_name.replace('(c384)', 'Ae')
page_name = page_name.replace('(c3a4)', 'ae')
page_name = page_name.replace('(c3b6)', 'oe')
split = page_name.split('(2f)') # namespaces split = page_name.split('(2f)') # namespaces
count = len(split) count = len(split)
dateiname = split[-1] dateiname = split[-1]
dir = output_dir dir = output_dir
attachment_dir = output_dir + '../media/' # changed from attachment_dir = output_dir + '../media/':
attachment_dir = output_dir + 'media/'
if not isdir (attachment_dir):
os.mkdir(attachment_dir)
if count == 1: if count == 1:
dir += 'unsorted' dir += 'unsorted'
if not isdir (dir): if not isdir (dir):
os.mkdir(dir) os.mkdir(dir)
attachment_dir += 'unsorted/' attachment_dir += 'unsorted/'
if not isdir (attachment_dir): if not isdir (attachment_dir):
os.mkdir(attachment_dir) os.mkdir(attachment_dir)
for i in range(0, count - 1): for i in range(0, count - 1):
dir += split[i] + '/' dir += split[i] + '/'
if not isdir (dir): if not isdir (dir):
os.mkdir(dir) os.mkdir(dir)
attachment_dir += split[i] + '/' attachment_dir += split[i] + '/'
if not isdir (attachment_dir): if not isdir (attachment_dir):
os.mkdir(attachment_dir) os.mkdir(attachment_dir)
if count == 1: if count == 1:
str = 'unsorted/' + page_name str = 'unsorted/' + page_name
split = str.split('/') split = str.split('/')
@ -157,6 +220,7 @@ if __name__ == '__main__':
out_file = os.path.join(dir, dateiname + '.txt') out_file = os.path.join(dir, dateiname + '.txt')
out_desc = file(out_file, 'w') out_desc = file(out_file, 'w')
out_desc.writelines([it.rstrip() + '\n' for it in curr_rev_content if it]) out_desc.writelines([it.rstrip() + '\n' for it in curr_rev_content if it])
out_desc.close() out_desc.close()
copy_attachments(page, attachment_dir)
# pdb.set_trace() # start debugging here
copy_attachments(pathname, attachment_dir)