* Added Windows hints and v1.2 infos
* added some windows batch files
* switched from 'cliopts' to 'cli'
* "named args"
* All arguments passed to php are UTF-8 encoded
* removed local path from repository. Use doku.local.php
* added/changed support for
  - small
  - big
  - anchordef
  - definition_list
  - comment
  - include and search macros
* added #pragma directives as comments to dokuwiki
* recreate "_dokuwiki.changes"
* Support for non-ascii characters in media files
This commit is contained in:
Jens Hofschröer 2019-03-26 12:47:14 +01:00 committed by GitHub
parent ec684eaefc
commit 9b3341f836
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 412 additions and 32 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
settings.local.cmd
*.log
*.pyc
out/

View file

@ -6,12 +6,12 @@ Converts also page history and edit-log.
http://www.dokuwiki.org/tips:moinmoin2doku
Tested with MoinMoin 1.9 and DokuWiki 2014-12-09 releases
Tested with MoinMoin 1.9.9 and DokuWiki 2018-04-22 releases under Windows 7
You need to run this on host where both MoinMoin and DokuWiki are configured,
it uses current configuration from both wikis.
Edit doku.php if your DokuWiki installation is other than /usr/share/dokuwiki
Edit `doku.php` if your DokuWiki installation is other than `/usr/share/dokuwiki`
To convert moinmoin all pages with history, invoke:
```
@ -33,6 +33,28 @@ and ensure ownership of files is correct:
```
additionally, depending on your configuration, you may need to gzip the attic pages.
Hints for Windows Users
-----------------------
The Batchfiles (`*.cmd`) should help to do the conversation under Windows. You should
create a copy of the `settings.cmd` and call it `settings.local.cmd` to set your
own local paths.
Call `moin2doku.cmd` to convert the full MoinMoin Wiki. All DokuWiki pages will be
written to an `out` folder in the current directory.
This will convert a single page:
```
D:\moin2doku\> moin2doku.cmd MyMoinPage
```
Set `%OUTDIR%` to an alternativ output folder. This should not be the dokuwiki `data`
folder if you want to do a full conversation.
The `reindex.cmd` will call the `bin/indexer.php`-Skript.
History
=======
@ -62,3 +84,10 @@ I put repo online so others have better starting point than I did.
version 1.1 (2015)
----------------
Modifed the script to work with newer Moin versions and API changes.
version 1.2 (2019-01)
----------------
Some modifications to work with current DokuWiki and added more formattings.
Search GitHub Forks for newer versions of this project.

View file

@ -13,10 +13,14 @@
if ('cli' != php_sapi_name()) die();
define('DOKU_INC', '/home/caddy/wikifarm/dokuwiki/dokuwiki/');
//add to following define of 'DOKU_INC' to your "doku.local.php" file and adjust the path:
//define('DOKU_INC', '/home/caddy/wikifarm/dokuwiki/dokuwiki/');
//define('DOKU_INC', "d:/website/wwwroot/dokuwiki/" );
require_once './doku.local.php';
require_once DOKU_INC.'inc/init.php';
require_once DOKU_INC.'inc/common.php';
require_once DOKU_INC.'inc/cliopts.php';
require_once DOKU_INC.'inc/cli.php';
# disable gzip regardless of config, then we don't have to compress when converting
$conf['compression'] = 0; //compress old revisions: (0: off) ('gz': gnuzip) ('bz2': bzip)
@ -29,29 +33,33 @@ function strip_dir($dir, $fn) {
return end(explode($dir.'/', $fn, 2));
}
switch ($argv[1]) {
$action = $argv[1];
$argPage = $argv[2];
//filext = $argv[3];
switch ($action) {
case 'cleanID':
echo cleanID($argv[2]);
echo cleanID($argPage);
break;
case 'wikiFN':
if ($argc > 3 && $argv[3]) {
echo strip_dir($conf['olddir'], wikiFN($argv[2], $argv[3]));
echo strip_dir($conf['olddir'], wikiFN($argPage, $argv[3]));
} else {
echo strip_dir($conf['datadir'], wikiFN($argv[2]));
echo strip_dir($conf['datadir'], wikiFN($argPage));
}
break;
case 'mediaFN':
echo strip_dir($conf['mediadir'], mediaFN($argv[2]));
echo strip_dir($conf['mediadir'], mediaFN($argPage));
break;
case 'metaFN':
echo strip_dir($conf['metadir'], metaFN($argv[2], $argv[3]));
echo strip_dir($conf['metadir'], metaFN($argPage, $argv[3]));
break;
case 'getNS':
echo getNS($argv[2]);
echo getNS($argPage);
break;
case 'getId':
echo getId();
break;
default:
die("Unknown knob: {$argv[1]}");
die("Unknown knob: {$action}");
}

17
doku.py
View file

@ -11,6 +11,9 @@
import sys
import subprocess
from MoinMoin import log
logging = log.getLogger(__name__)
class DokuWiki:
def __init__(self):
self.callcache = {}
@ -24,10 +27,18 @@ class DokuWiki:
def __call(self, method, *args):
args = list(args)
key = "%s:%s" % (method, ",".join(args))
uargs = []
for arg in args:
try:
arg.decode('utf-8')
#already UTF-8 ready
uargs.append(arg)
except UnicodeError:
uargs.append(arg.encode('utf-8'))
key = "%s:%s" % (method, ",".join(uargs))
if not self.callcache.has_key(key):
cmd = ['./doku.php', method ] + args
res = subprocess.Popen(cmd, stdin = None, stdout = subprocess.PIPE, stderr = sys.stderr, close_fds = True).communicate()
cmd = ['php', './doku.php', method ] + uargs
res = subprocess.Popen(cmd, stdin = None, stdout = subprocess.PIPE, stderr = sys.stderr, close_fds = False).communicate()
self.callcache[key] = unicode(res[0].decode('utf-8'))
print "%s->%s" % (cmd, self.callcache[key])
return self.callcache[key]

36
moin2doku.cmd Normal file
View file

@ -0,0 +1,36 @@
@echo off
setlocal
call settings.cmd
if not "%1"=="" goto :singlePage %1
if "%OUTDIR%"=="" (
call :deldir out\attic || (pause & goto :eof)
call :deldir out\media || (pause & goto :eof)
call :deldir out\meta || (pause & goto :eof)
call :deldir out\pages || (pause & goto :eof)
if exist %~n0.pages.log del %~n0.pages.log
if not exist out md out
set OUTDIR=%CD%\out
)
call python moin2doku.py %DOKU_FULL_HISTORY% -d "%OUTDIR:\=/%" >"%~n0.log" 2>"%~n0.err.log"
goto :eof
:deldir
if exist %1 rd /s/q %1
if exist %1 exit /B 1
goto :eof
:singlePage
if "%OUTDIR%"=="" set OUTDIR=%CD%\out
call python moin2doku.py %DOKU_FULL_HISTORY% -p "%MOIN_DATA_HOME%\pages\%~1" -f -d "%OUTDIR:\=/%" >>"%~n0.log" 2>>"%~n0.err.log" || type "%~n0.err.log"
if %ERRORLEVEL% == 0 if exist "%DOKU_ANIMALS_HOME%\%ANIMAL%\conf\local.php" (
rem touching "%DOKU_ANIMALS_HOME%\%ANIMAL%\conf\local.php" to invalidate cache
pushd "%DOKU_ANIMALS_HOME%\%ANIMAL%\conf"
copy /y/b local.php +,, >nul
popd
)
goto :eof

View file

@ -21,6 +21,10 @@ from doku import DokuWiki
from moinformat import moin2doku
import random
# sys.setdefaultencoding() does not exist, here!
reload(sys) # Reload does the trick!
sys.setdefaultencoding('cp1252')
USEC = 1000000
def init_dirs(output_dir):
@ -74,10 +78,13 @@ def copy_attachments(page, ns,randomID):
attachments = listdir(srcdir)
for attachment in attachments:
try:
src = os.path.join(srcdir, attachment)
dst = os.path.join(output_dir, 'media', dw.mediaFN(dw.cleanID("%s/%s" % (ns, str(randomID)+attachment))))
dst = os.path.join(output_dir, 'media', dw.mediaFN(dw.cleanID(u"%s/%s" % (ns, str(randomID)+attachment))))
copyfile(src, dst)
copystat(src, dst)
except UnicodeDecodeError:
print 'ERROR: unable to convert attachment "%s"' % attachment
def print_help():
program = sys.argv[0]
@ -161,6 +168,8 @@ def convertfile(page, output = None, overwrite = False):
if not output:
output = pagename
print "Converting %s" % pagename
if page.isUnderlayPage():
print "underlay: %s" % page.request.cfg.data_underlay_dir
print "underlay: %s" % request.cfg.data_underlay_dir
@ -313,6 +322,11 @@ else:
del pages[frontpage.page_name]
pages[dw.getId()] = frontpage.page_name
print "--------------------------------------------------"
for output, pagename in pages.items():
print " - %s" % pagename
print "--------------------------------------------------"
converted = 0
for output, pagename in pages.items():
page = Page(request, pagename)

69
reindex.cmd Normal file
View file

@ -0,0 +1,69 @@
@echo off
setlocal
call settings.cmd
pushd "%DOKU_ANIMALS_HOME%\%ANIMAL%"
if not exist data\pages\ (
echo WARNUNG:
echo Es wurden kein "pages" Verzeichnis gefunden. Wurde die neue Version bereits
echo ins Zielverzeichnis kopiert?
pause
goto :eof
)
if not exist data\media\logo.png (
echo working in %CD%
rem call :cleanup data\attic || goto :ende
call :cleanup data\cache || goto :ende
call :cleanup data\index || goto :ende
call :cleanup data\locks || goto :ende
call :cleanup data\media_attic || goto :ende
call :cleanup data\media_meta || goto :ende
call :cleanup data\tmp || goto :ende
xcopy /S/I/Y/Q common\*.* data
)
REM if exist data\pages\startseiteneu.txt (
REM ren data\pages\startseiteneu.txt startseite.txt
REM ren data\meta\startseiteneu.changes startseite.changes
REM )
popd
pushd "%DOKU_HOME%"
php bin\indexer.php || goto :ende
popd
pushd "%DOKU_ANIMALS_HOME%\%ANIMAL%"
if exist data\meta\_dokuwiki.changes del data\meta\_dokuwiki.changes
if exist data\meta\_dokuwiki.changes del data\meta\_dokuwiki.changes
(
for /F "delims=*" %%D in ('dir /b/S/A:D data\meta\*.*') do (
if exist "%%D\*.changes" type "%%D\*.changes" 2>nul
)
) > _dokuwiki_unsorted.changes
sort _dokuwiki_unsorted.changes /O data\meta\_dokuwiki.changes
del _dokuwiki_unsorted.changes
echo --- compressing old files
for /F "delims=*" %%T in ('dir data\attic\*.txt /s/b') do (
"c:\Program Files\7-Zip\7z.exe" a -bso0 "%%T.gz" "%%T" && del "%%T" || goto :ende
)
echo --- done
:ende
popd
pause
goto :eof
:cleanup
if not exist %1 goto :eof
echo cleaning up %1
rd /s/q %1
if exist %1 exit /b 1
md %1
echo >nul 2>"%~1\_dummy"

45
settings.cmd Normal file
View file

@ -0,0 +1,45 @@
@echo off
REM -- no setlocal in this script!
REM make a copy of this file and adjust the paths
set PHP_HOME=c:\Program Files\php
set PYTHON_HOME=c:\Python27
REM -- MoinMoin settings
set MOIN_HOME=c:\wwwroot\wiki\moin
set MOIN_CONFIG=%MOIN_HOME%\wiki\config
REM set MOIN_CONFIG=c:\wwwroot\moinfarmdata\config
set MOIN_DATA_HOME=%MOIN_HOME%\wiki\data
REM set MOIN_CONFIG=c:\wwwroot\moinfarmdata\<farmwiki>
REM -- DokuWiki settings
set DOKU_HOME=c:\wwwroot\wiki\dokuwiki
set DOKU_ANIMALS_HOME=%DOKU_HOME%
REM set DOKU_ANIMALS_HOME=c:\wwwroot\dokufarmdata
REM set animal=<yourFarmAnimalName>
REM comment this in to do a full converstion
REM set DOKU_FULL_HISTORY=-a
REM -- path to your php.ini used by your webserver
REM set PHP_INI_SCAN_DIR=c:\Program Files\ApacheHttpd\conf\
REM -- set this to your "production" dokuwiki if you want to update only.
REM set OUTDIR=%DOKU_ANIMALS_HOME%\%animal%\data
REM ----8<--------8<--------8<--------8<--------8<--------8<--------8<----
REM -- remove everything beyond the line from your copy
chcp 1252
if exist "%~dpn0.local.cmd" call "%~dpn0.local.cmd"
REM -- %animal% must be lowercase!
(
set animal=
set animal=%animal%
)
set PATH=%PATH%;%PYTHON_HOME%;%PHP_HOME%
set PYTHONPATH=.
set PYTHONPATH=%PYTHONPATH%;%MOIN_HOME%
set PYTHONPATH=%PYTHONPATH%;%MOIN_HOME%\MoinMoin\support
set PYTHONPATH=%PYTHONPATH%;%MOIN_CONFIG%
set PYTHONPATH=%PYTHONPATH%;

View file

@ -13,6 +13,9 @@ from MoinMoin.formatter import FormatterBase
from MoinMoin import config
from MoinMoin.Page import Page
from types import *
from MoinMoin import log
logging = log.getLogger(__name__)
# TODO: let base class MoinMoin/formatter/base.py handle not implemented methods
@ -73,6 +76,14 @@ class Formatter(FormatterBase):
if on:
if interwiki == 'Self':
return self.pagelink(on, pagename, **kw)
interwikis = {
'WikiPedia':'wp',
'FrWikiPedia':'wpfr',
'DeWikiPedia':'wpde',
'MetaWikiPedia':'wpmeta'
}
if interwiki in interwikis:
return '[[%s>%s|' % (interwikis.get(interwiki), pagename)
return '[[%s>%s|' % (interwiki, pagename)
else:
return ']]'
@ -134,6 +145,7 @@ class Formatter(FormatterBase):
self.list_type = '*'
else:
self.list_depth -= 1
if self.list_depth <= 0:
self.list_type = ' '
return ['', '\n'][on]
@ -151,7 +163,7 @@ class Formatter(FormatterBase):
def code(self, on, **kw):
""" `typewriter` or {{{typerwriter}}, for code blocks i hope codeblock works """
return ["''", "''"][not on]
return ["''%%", "%%''"][not on]
def sup(self, on, **kw):
return ['<sup>', '</sup>'][not on]
@ -162,12 +174,20 @@ class Formatter(FormatterBase):
def strike(self, on, **kw):
return ['<del>', '</del>'][not on]
def small(self, on, **kw):
#https://www.dokuwiki.org/plugin:wrap
return ['<wrap lo>', '</wrap>'][not on]
def big(self, on, **kw):
#https://www.dokuwiki.org/plugin:wrap
return ['<wrap hi>', '</wrap>'][not on]
def preformatted(self, on, **kw):
FormatterBase.preformatted(self, on)
result = ''
if self.in_p:
result = self.paragraph(0)
return result + ['<file>', '</file>\n'][not on]
return result + ['<code>', '</code>\n'][not on]
def paragraph(self, on, **kw):
FormatterBase.paragraph(self, on)
@ -192,7 +212,7 @@ class Formatter(FormatterBase):
self.in_table = 1
else:
self.in_table = 0
return ''
return ['', '\n'][not on]
def table_row(self, on, attrs={}, **kw):
return ['\n', '|'][not on]
@ -201,8 +221,8 @@ class Formatter(FormatterBase):
return ['|', ''][not on]
def anchordef(self, id):
# not supported
return ''
# https://www.dokuwiki.org/plugin:anchor
return '{{anchor:'+id+'}}'
def anchorlink(self, on, name='', **kw):
# kw.id not supported, we hope the anchor matches existing heading on page
@ -212,16 +232,18 @@ class Formatter(FormatterBase):
return ['__', '__'][not on]
def definition_list(self, on, **kw):
# https://www.dokuwiki.org/plugin:definitionlist
result = ''
if self.in_p:
result = self.paragraph(0)
return result + ['<gloss>', '</gloss>'][not on]
return result
def definition_term(self, on, compact=0, **kw):
return ['<label>', '</label>'][not on]
#MoinMoin does no wiki markup in DL-Terms
return [' ;%%', '%%\n'][not on]
def definition_desc(self, on, **kw):
return ['<item>', '</item>'][not on]
return [' :', '\n'][not on]
def image(self, src=None, **kw):
valid_attrs = ['src', 'width', 'height', 'alt', 'title']
@ -268,8 +290,11 @@ class Formatter(FormatterBase):
def comment(self, text):
# real comments (lines with two hash marks)
if text[0:2] == '##':
#return "/* %s */\n" % text[2:].strip()
return ''
# https://www.dokuwiki.org/plugin:comment
comment = text[2:].strip()
if len(comment)>1:
return "/* %s */\n" % text[2:].strip()
return '\n'
# Some kind of Processing Instruction
# http://moinmo.in/HelpOnProcessingInstructions
@ -279,6 +304,7 @@ class Formatter(FormatterBase):
if tokens[0] == 'acl':
# TODO: fill acl.auth.php
logging.info('SKIPPING ACL: %s', text)
return ''
if tokens[0] == 'deprecated':
@ -289,6 +315,10 @@ class Formatter(FormatterBase):
if tokens[0] == 'pragma':
# TODO: can do 'description' via 'meta' dokuwiki plugin
pargs = tokens[1].split(None, 1)
if pargs[0]=='section-numbers':
return '/* meta: %s */' % tokens
logging.info('SKIPPING PRAGMA: %s', tokens)
#return "/* pragma: %s */\n" % " ".join(tokens[1:])
return ''
@ -313,16 +343,150 @@ class Formatter(FormatterBase):
def inherit(args):
return apply(FormatterBase.macro, (self, macro_obj, name, args))
def randomQuote(args):
# https://www.dokuwiki.org/plugin:xfortune
return '{{xfortune>quote:'+args+'.txt}}'
def monthcal(args):
# https://www.dokuwiki.org/plugin:monthcal
selfname = self.page.page_name
return '{{monthcal:create_links=short,namespace='+selfname.replace('/',':')+'}}'
def navigation(args):
# https://www.dokuwiki.org/plugin:alphaindex
selfname = self.page.page_name
args = args.split(',')
if len(args)>0:
try:
result = {
'slides': '[<>]',
'children': '{{alphaindex>:%s#1|nons incol}}' % selfname.replace('/',':'),
'siblings': '{{alphaindex>.#1|nons incol}}',
'slideshow': '/* no support for slideshow navigation */'
}[args[0].strip()]
except KeyError:
result = '/* Unknown Navigation: %s #%s#*/' % args, args[0].strip()
else:
result = '/* Unsupported Navigation: %s */' % args
return result
def footnote(args):
return '((%s))' % args
def dateTimeMacro(args):
#https://www.dokuwiki.org/plugin:date
#args = args.split(',');
return '{{date>%%c|timestamp=strtotime("%s")|locale=de}}' % args
def dateMacro(args):
#https://www.dokuwiki.org/plugin:date
#args = args.split(',');
return '{{date>%%x|timestamp=strtotime("%s")|locale=de}}' % args
def includeMacro(args):
#https://www.dokuwiki.org/plugin:include
#logging.info('Include(%s)' % args)
args = map(unicode.strip, args.split(','));
#dokupage = ":".join(pagename.split("/"))
if len(args)==1:
return '{{page>%s&nodate}}' % ":".join(args[0].split("/"))
elif(u'titlesonly' in args):
#https://www.dokuwiki.org/plugin:changes
#https://www.dokuwiki.org/plugin:pagelist
selfname = self.page.page_name
selfNs = ":".join(selfname.split("/")).lower()
pairs = [arg.split('=') for arg in args]
# attrs = {}
# for key, value in pairs:
# attrs[key] = value
#logging.info('pairs:"%s"' % pairs)
incName = ''#pairs[0]
incCount = -1
incTitlesOnly = False
notNamedParam = 0
for pair in pairs:
if len(pair)==1:
if u'titlesonly'==pair:
notNamedParam = -1
incTitlesOnly = True
elif notNamedParam >=0:
if notNamedParam==0:
incName = pairs[notNamedParam]
notNamedParam += 1;
else:
notNamedParam = -1
if u'items'==pair[0]:
incCount = int(pair[1])
resultArgs = '-h1 -textPages=""'
#(keys,values) = map()
if incCount > 0:
resultArgs += ' -idAndTitle -simpleList -sortId -nbItemsMax=%d' % incCount
else:
resultArgs += ' -nbCol=2'
##<nspages fortran:mailarchiv -pregPagesOn="/2.*/" -h1 -nbCol=2 -textPages="">
## Lister der letzten 10 Mails:
##<nspages .:mailarchiv -nbItemsMax=10 -sortId -simpleList -idAndTitle -reverse -h1 -nbCol=1 -textPages="">
if incName[0]=='^':
nspagedelim = incName.rfind('/')
ns = ":".join(incName[1:nspagedelim].split('/')).lower()
incPageReg = incName[(nspagedelim+1):]
resultArgs += ' -pregPagesOn="/^%s/"' % incPageReg
else:
ns = selfNs
return '<nspages %s %s>' % (ns, resultArgs)
else:
logging.info('UNSUPPORTED INCLUDE "%s"' % args)
return '/* Unsupported Include: %s */' % args
def fullsearch(args):
#args=None >> {searchform ns=}
#args='' >> {{backlinks>.}}
#args!='' >> {{search><args>}}
#ignore special searches. see MoinMoin page "HilfeZumSuchen"
if args is None:
return '{searchform ns=}'
elif ':' in args or ' ' in args:
logging.info('UNSUPPORTED SEARCH %s(%s)' % (name, args))
return '/* Unsupported Search %s(%s). may be backlinks plugin will help */' % (name, args)
elif args=='':
return '{{backlinks>.}}'
elif name=='PageList':
return '{{backlinks>%s}}' % ":".join(args.split('/')).lower();
else:
logging.info('UNSUPPORTED SEARCH %s(%s)' % (name, args))
return '/* Unsupported Search %s(%s) */' % (name, args)
try:
lookup = {
'BR' : '\\\\',
'BR' : ' \\\\ ',
'br' : ' \\\\ ',
'MailTo' : email,
'GetText' : args,
'ShowSmileys' : inherit,
'ShowAttachedFiles' : showAttachedFiles,
'Include' : inherit
'Include' : includeMacro,
#no real fulltext search!
'FullSearch' : fullsearch,
'FullSearchCached' : fullsearch,
'PageList' : fullsearch,
'MonthCalendar' : monthcal,
'Navigation' : navigation,
'TableOfContents' : '',
'RandomQuote': randomQuote,
'Anchor': inherit,
'Action': inherit,
'Icon': inherit,
'FootNote': footnote,
'Date': dateMacro,
'DateTime': dateTimeMacro
}[name]
except KeyError:
logging.info('UNDEFINED MACRO "%s"' % name)
lookup = '/* UndefinedMacro: %s(%s) */' % (name, args)
if type(lookup) == FunctionType: