diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0e73e3e --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +settings.local.cmd +*.log +*.pyc +out/ diff --git a/README.md b/README.md index 7d7674d..1ccbac4 100755 --- a/README.md +++ b/README.md @@ -6,12 +6,12 @@ Converts also page history and edit-log. http://www.dokuwiki.org/tips:moinmoin2doku -Tested with MoinMoin 1.9 and DokuWiki 2014-12-09 releases +Tested with MoinMoin 1.9.9 and DokuWiki 2018-04-22 releases under Windows 7 You need to run this on host where both MoinMoin and DokuWiki are configured, it uses current configuration from both wikis. -Edit doku.php if your DokuWiki installation is other than /usr/share/dokuwiki +Edit `doku.php` if your DokuWiki installation is other than `/usr/share/dokuwiki` To convert moinmoin all pages with history, invoke: ``` @@ -33,6 +33,28 @@ and ensure ownership of files is correct: ``` additionally, depending on your configuration, you may need to gzip the attic pages. + +Hints for Windows Users +----------------------- + +The Batchfiles (`*.cmd`) should help to do the conversation under Windows. You should +create a copy of the `settings.cmd` and call it `settings.local.cmd` to set your +own local paths. + +Call `moin2doku.cmd` to convert the full MoinMoin Wiki. All DokuWiki pages will be +written to an `out` folder in the current directory. + +This will convert a single page: +``` +D:\moin2doku\> moin2doku.cmd MyMoinPage +``` + +Set `%OUTDIR%` to an alternativ output folder. This should not be the dokuwiki `data` +folder if you want to do a full conversation. + +The `reindex.cmd` will call the `bin/indexer.php`-Skript. + + History ======= @@ -62,3 +84,10 @@ I put repo online so others have better starting point than I did. version 1.1 (2015) ---------------- Modifed the script to work with newer Moin versions and API changes. + + +version 1.2 (2019-01) +---------------- +Some modifications to work with current DokuWiki and added more formattings. + +Search GitHub Forks for newer versions of this project. diff --git a/doku.php b/doku.php index 50714df..65c376a 100755 --- a/doku.php +++ b/doku.php @@ -13,10 +13,14 @@ if ('cli' != php_sapi_name()) die(); -define('DOKU_INC', '/home/caddy/wikifarm/dokuwiki/dokuwiki/'); +//add to following define of 'DOKU_INC' to your "doku.local.php" file and adjust the path: +//define('DOKU_INC', '/home/caddy/wikifarm/dokuwiki/dokuwiki/'); +//define('DOKU_INC', "d:/website/wwwroot/dokuwiki/" ); +require_once './doku.local.php'; + require_once DOKU_INC.'inc/init.php'; require_once DOKU_INC.'inc/common.php'; -require_once DOKU_INC.'inc/cliopts.php'; +require_once DOKU_INC.'inc/cli.php'; # disable gzip regardless of config, then we don't have to compress when converting $conf['compression'] = 0; //compress old revisions: (0: off) ('gz': gnuzip) ('bz2': bzip) @@ -29,29 +33,33 @@ function strip_dir($dir, $fn) { return end(explode($dir.'/', $fn, 2)); } -switch ($argv[1]) { +$action = $argv[1]; +$argPage = $argv[2]; +//filext = $argv[3]; + +switch ($action) { case 'cleanID': - echo cleanID($argv[2]); + echo cleanID($argPage); break; case 'wikiFN': if ($argc > 3 && $argv[3]) { - echo strip_dir($conf['olddir'], wikiFN($argv[2], $argv[3])); + echo strip_dir($conf['olddir'], wikiFN($argPage, $argv[3])); } else { - echo strip_dir($conf['datadir'], wikiFN($argv[2])); + echo strip_dir($conf['datadir'], wikiFN($argPage)); } break; case 'mediaFN': - echo strip_dir($conf['mediadir'], mediaFN($argv[2])); + echo strip_dir($conf['mediadir'], mediaFN($argPage)); break; case 'metaFN': - echo strip_dir($conf['metadir'], metaFN($argv[2], $argv[3])); + echo strip_dir($conf['metadir'], metaFN($argPage, $argv[3])); break; case 'getNS': - echo getNS($argv[2]); + echo getNS($argPage); break; case 'getId': echo getId(); break; default: - die("Unknown knob: {$argv[1]}"); + die("Unknown knob: {$action}"); } diff --git a/doku.py b/doku.py index ad94aa9..c750a19 100755 --- a/doku.py +++ b/doku.py @@ -11,6 +11,9 @@ import sys import subprocess +from MoinMoin import log +logging = log.getLogger(__name__) + class DokuWiki: def __init__(self): self.callcache = {} @@ -24,10 +27,18 @@ class DokuWiki: def __call(self, method, *args): args = list(args) - key = "%s:%s" % (method, ",".join(args)) + uargs = [] + for arg in args: + try: + arg.decode('utf-8') + #already UTF-8 ready + uargs.append(arg) + except UnicodeError: + uargs.append(arg.encode('utf-8')) + key = "%s:%s" % (method, ",".join(uargs)) if not self.callcache.has_key(key): - cmd = ['./doku.php', method ] + args - res = subprocess.Popen(cmd, stdin = None, stdout = subprocess.PIPE, stderr = sys.stderr, close_fds = True).communicate() + cmd = ['php', './doku.php', method ] + uargs + res = subprocess.Popen(cmd, stdin = None, stdout = subprocess.PIPE, stderr = sys.stderr, close_fds = False).communicate() self.callcache[key] = unicode(res[0].decode('utf-8')) print "%s->%s" % (cmd, self.callcache[key]) return self.callcache[key] diff --git a/moin2doku.cmd b/moin2doku.cmd new file mode 100644 index 0000000..bea9f15 --- /dev/null +++ b/moin2doku.cmd @@ -0,0 +1,36 @@ +@echo off +setlocal + +call settings.cmd + +if not "%1"=="" goto :singlePage %1 + +if "%OUTDIR%"=="" ( + call :deldir out\attic || (pause & goto :eof) + call :deldir out\media || (pause & goto :eof) + call :deldir out\meta || (pause & goto :eof) + call :deldir out\pages || (pause & goto :eof) + if exist %~n0.pages.log del %~n0.pages.log + if not exist out md out + set OUTDIR=%CD%\out +) + +call python moin2doku.py %DOKU_FULL_HISTORY% -d "%OUTDIR:\=/%" >"%~n0.log" 2>"%~n0.err.log" + +goto :eof +:deldir +if exist %1 rd /s/q %1 +if exist %1 exit /B 1 + +goto :eof +:singlePage +if "%OUTDIR%"=="" set OUTDIR=%CD%\out +call python moin2doku.py %DOKU_FULL_HISTORY% -p "%MOIN_DATA_HOME%\pages\%~1" -f -d "%OUTDIR:\=/%" >>"%~n0.log" 2>>"%~n0.err.log" || type "%~n0.err.log" +if %ERRORLEVEL% == 0 if exist "%DOKU_ANIMALS_HOME%\%ANIMAL%\conf\local.php" ( + rem touching "%DOKU_ANIMALS_HOME%\%ANIMAL%\conf\local.php" to invalidate cache + pushd "%DOKU_ANIMALS_HOME%\%ANIMAL%\conf" + copy /y/b local.php +,, >nul + popd +) + +goto :eof diff --git a/moin2doku.py b/moin2doku.py index 51ee558..fd60983 100755 --- a/moin2doku.py +++ b/moin2doku.py @@ -21,6 +21,10 @@ from doku import DokuWiki from moinformat import moin2doku import random +# sys.setdefaultencoding() does not exist, here! +reload(sys) # Reload does the trick! +sys.setdefaultencoding('cp1252') + USEC = 1000000 def init_dirs(output_dir): @@ -74,10 +78,13 @@ def copy_attachments(page, ns,randomID): attachments = listdir(srcdir) for attachment in attachments: - src = os.path.join(srcdir, attachment) - dst = os.path.join(output_dir, 'media', dw.mediaFN(dw.cleanID("%s/%s" % (ns, str(randomID)+attachment)))) - copyfile(src, dst) - copystat(src, dst) + try: + src = os.path.join(srcdir, attachment) + dst = os.path.join(output_dir, 'media', dw.mediaFN(dw.cleanID(u"%s/%s" % (ns, str(randomID)+attachment)))) + copyfile(src, dst) + copystat(src, dst) + except UnicodeDecodeError: + print 'ERROR: unable to convert attachment "%s"' % attachment def print_help(): program = sys.argv[0] @@ -161,6 +168,8 @@ def convertfile(page, output = None, overwrite = False): if not output: output = pagename + print "Converting %s" % pagename + if page.isUnderlayPage(): print "underlay: %s" % page.request.cfg.data_underlay_dir print "underlay: %s" % request.cfg.data_underlay_dir @@ -313,6 +322,11 @@ else: del pages[frontpage.page_name] pages[dw.getId()] = frontpage.page_name +print "--------------------------------------------------" +for output, pagename in pages.items(): + print " - %s" % pagename +print "--------------------------------------------------" + converted = 0 for output, pagename in pages.items(): page = Page(request, pagename) diff --git a/reindex.cmd b/reindex.cmd new file mode 100644 index 0000000..38321b0 --- /dev/null +++ b/reindex.cmd @@ -0,0 +1,69 @@ +@echo off +setlocal + +call settings.cmd + +pushd "%DOKU_ANIMALS_HOME%\%ANIMAL%" + +if not exist data\pages\ ( + echo WARNUNG: + echo Es wurden kein "pages" Verzeichnis gefunden. Wurde die neue Version bereits + echo ins Zielverzeichnis kopiert? + pause + goto :eof +) + +if not exist data\media\logo.png ( + echo working in %CD% + rem call :cleanup data\attic || goto :ende + call :cleanup data\cache || goto :ende + call :cleanup data\index || goto :ende + call :cleanup data\locks || goto :ende + call :cleanup data\media_attic || goto :ende + call :cleanup data\media_meta || goto :ende + call :cleanup data\tmp || goto :ende + xcopy /S/I/Y/Q common\*.* data +) + +REM if exist data\pages\startseiteneu.txt ( + REM ren data\pages\startseiteneu.txt startseite.txt + REM ren data\meta\startseiteneu.changes startseite.changes +REM ) + +popd +pushd "%DOKU_HOME%" + +php bin\indexer.php || goto :ende + +popd +pushd "%DOKU_ANIMALS_HOME%\%ANIMAL%" + +if exist data\meta\_dokuwiki.changes del data\meta\_dokuwiki.changes +if exist data\meta\_dokuwiki.changes del data\meta\_dokuwiki.changes +( + for /F "delims=*" %%D in ('dir /b/S/A:D data\meta\*.*') do ( + if exist "%%D\*.changes" type "%%D\*.changes" 2>nul + ) +) > _dokuwiki_unsorted.changes +sort _dokuwiki_unsorted.changes /O data\meta\_dokuwiki.changes +del _dokuwiki_unsorted.changes + +echo --- compressing old files +for /F "delims=*" %%T in ('dir data\attic\*.txt /s/b') do ( + "c:\Program Files\7-Zip\7z.exe" a -bso0 "%%T.gz" "%%T" && del "%%T" || goto :ende +) +echo --- done + +:ende +popd + +pause + +goto :eof +:cleanup +if not exist %1 goto :eof +echo cleaning up %1 +rd /s/q %1 +if exist %1 exit /b 1 +md %1 +echo >nul 2>"%~1\_dummy" diff --git a/settings.cmd b/settings.cmd new file mode 100644 index 0000000..d344fa0 --- /dev/null +++ b/settings.cmd @@ -0,0 +1,45 @@ +@echo off +REM -- no setlocal in this script! +REM make a copy of this file and adjust the paths + +set PHP_HOME=c:\Program Files\php +set PYTHON_HOME=c:\Python27 +REM -- MoinMoin settings +set MOIN_HOME=c:\wwwroot\wiki\moin +set MOIN_CONFIG=%MOIN_HOME%\wiki\config +REM set MOIN_CONFIG=c:\wwwroot\moinfarmdata\config +set MOIN_DATA_HOME=%MOIN_HOME%\wiki\data +REM set MOIN_CONFIG=c:\wwwroot\moinfarmdata\ +REM -- DokuWiki settings +set DOKU_HOME=c:\wwwroot\wiki\dokuwiki +set DOKU_ANIMALS_HOME=%DOKU_HOME% +REM set DOKU_ANIMALS_HOME=c:\wwwroot\dokufarmdata +REM set animal= +REM comment this in to do a full converstion +REM set DOKU_FULL_HISTORY=-a + +REM -- path to your php.ini used by your webserver +REM set PHP_INI_SCAN_DIR=c:\Program Files\ApacheHttpd\conf\ + +REM -- set this to your "production" dokuwiki if you want to update only. +REM set OUTDIR=%DOKU_ANIMALS_HOME%\%animal%\data + +REM ----8<--------8<--------8<--------8<--------8<--------8<--------8<---- +REM -- remove everything beyond the line from your copy +chcp 1252 + +if exist "%~dpn0.local.cmd" call "%~dpn0.local.cmd" + +REM -- %animal% must be lowercase! +( +set animal= +set animal=%animal% +) + +set PATH=%PATH%;%PYTHON_HOME%;%PHP_HOME% + +set PYTHONPATH=. +set PYTHONPATH=%PYTHONPATH%;%MOIN_HOME% +set PYTHONPATH=%PYTHONPATH%;%MOIN_HOME%\MoinMoin\support +set PYTHONPATH=%PYTHONPATH%;%MOIN_CONFIG% +set PYTHONPATH=%PYTHONPATH%; diff --git a/text_dokuwiki.py b/text_dokuwiki.py index 73df575..2eaee12 100755 --- a/text_dokuwiki.py +++ b/text_dokuwiki.py @@ -13,6 +13,9 @@ from MoinMoin.formatter import FormatterBase from MoinMoin import config from MoinMoin.Page import Page from types import * +from MoinMoin import log + +logging = log.getLogger(__name__) # TODO: let base class MoinMoin/formatter/base.py handle not implemented methods @@ -73,6 +76,14 @@ class Formatter(FormatterBase): if on: if interwiki == 'Self': return self.pagelink(on, pagename, **kw) + interwikis = { + 'WikiPedia':'wp', + 'FrWikiPedia':'wpfr', + 'DeWikiPedia':'wpde', + 'MetaWikiPedia':'wpmeta' + } + if interwiki in interwikis: + return '[[%s>%s|' % (interwikis.get(interwiki), pagename) return '[[%s>%s|' % (interwiki, pagename) else: return ']]' @@ -134,7 +145,8 @@ class Formatter(FormatterBase): self.list_type = '*' else: self.list_depth -= 1 - self.list_type = ' ' + if self.list_depth <= 0: + self.list_type = ' ' return ['', '\n'][on] @@ -151,7 +163,7 @@ class Formatter(FormatterBase): def code(self, on, **kw): """ `typewriter` or {{{typerwriter}}, for code blocks i hope codeblock works """ - return ["''", "''"][not on] + return ["''%%", "%%''"][not on] def sup(self, on, **kw): return ['', ''][not on] @@ -162,12 +174,20 @@ class Formatter(FormatterBase): def strike(self, on, **kw): return ['', ''][not on] + def small(self, on, **kw): + #https://www.dokuwiki.org/plugin:wrap + return ['', ''][not on] + + def big(self, on, **kw): + #https://www.dokuwiki.org/plugin:wrap + return ['', ''][not on] + def preformatted(self, on, **kw): FormatterBase.preformatted(self, on) result = '' if self.in_p: result = self.paragraph(0) - return result + ['', '\n'][not on] + return result + ['', '\n'][not on] def paragraph(self, on, **kw): FormatterBase.paragraph(self, on) @@ -192,7 +212,7 @@ class Formatter(FormatterBase): self.in_table = 1 else: self.in_table = 0 - return '' + return ['', '\n'][not on] def table_row(self, on, attrs={}, **kw): return ['\n', '|'][not on] @@ -201,8 +221,8 @@ class Formatter(FormatterBase): return ['|', ''][not on] def anchordef(self, id): - # not supported - return '' + # https://www.dokuwiki.org/plugin:anchor + return '{{anchor:'+id+'}}' def anchorlink(self, on, name='', **kw): # kw.id not supported, we hope the anchor matches existing heading on page @@ -212,16 +232,18 @@ class Formatter(FormatterBase): return ['__', '__'][not on] def definition_list(self, on, **kw): + # https://www.dokuwiki.org/plugin:definitionlist result = '' if self.in_p: result = self.paragraph(0) - return result + ['', ''][not on] + return result def definition_term(self, on, compact=0, **kw): - return [''][not on] + #MoinMoin does no wiki markup in DL-Terms + return [' ;%%', '%%\n'][not on] def definition_desc(self, on, **kw): - return ['', ''][not on] + return [' :', '\n'][not on] def image(self, src=None, **kw): valid_attrs = ['src', 'width', 'height', 'alt', 'title'] @@ -268,8 +290,11 @@ class Formatter(FormatterBase): def comment(self, text): # real comments (lines with two hash marks) if text[0:2] == '##': - #return "/* %s */\n" % text[2:].strip() - return '' + # https://www.dokuwiki.org/plugin:comment + comment = text[2:].strip() + if len(comment)>1: + return "/* %s */\n" % text[2:].strip() + return '\n' # Some kind of Processing Instruction # http://moinmo.in/HelpOnProcessingInstructions @@ -279,6 +304,7 @@ class Formatter(FormatterBase): if tokens[0] == 'acl': # TODO: fill acl.auth.php + logging.info('SKIPPING ACL: %s', text) return '' if tokens[0] == 'deprecated': @@ -289,6 +315,10 @@ class Formatter(FormatterBase): if tokens[0] == 'pragma': # TODO: can do 'description' via 'meta' dokuwiki plugin + pargs = tokens[1].split(None, 1) + if pargs[0]=='section-numbers': + return '/* meta: %s */' % tokens + logging.info('SKIPPING PRAGMA: %s', tokens) #return "/* pragma: %s */\n" % " ".join(tokens[1:]) return '' @@ -313,16 +343,150 @@ class Formatter(FormatterBase): def inherit(args): return apply(FormatterBase.macro, (self, macro_obj, name, args)) + def randomQuote(args): + # https://www.dokuwiki.org/plugin:xfortune + return '{{xfortune>quote:'+args+'.txt}}' + + def monthcal(args): + # https://www.dokuwiki.org/plugin:monthcal + selfname = self.page.page_name + return '{{monthcal:create_links=short,namespace='+selfname.replace('/',':')+'}}' + + def navigation(args): + # https://www.dokuwiki.org/plugin:alphaindex + selfname = self.page.page_name + args = args.split(',') + if len(args)>0: + try: + result = { + 'slides': '[<>]', + 'children': '{{alphaindex>:%s#1|nons incol}}' % selfname.replace('/',':'), + 'siblings': '{{alphaindex>.#1|nons incol}}', + 'slideshow': '/* no support for slideshow navigation */' + }[args[0].strip()] + except KeyError: + result = '/* Unknown Navigation: %s #%s#*/' % args, args[0].strip() + else: + result = '/* Unsupported Navigation: %s */' % args + return result + + def footnote(args): + return '((%s))' % args + + def dateTimeMacro(args): + #https://www.dokuwiki.org/plugin:date + #args = args.split(','); + return '{{date>%%c|timestamp=strtotime("%s")|locale=de}}' % args + + def dateMacro(args): + #https://www.dokuwiki.org/plugin:date + #args = args.split(','); + return '{{date>%%x|timestamp=strtotime("%s")|locale=de}}' % args + + def includeMacro(args): + #https://www.dokuwiki.org/plugin:include + #logging.info('Include(%s)' % args) + args = map(unicode.strip, args.split(',')); + #dokupage = ":".join(pagename.split("/")) + if len(args)==1: + return '{{page>%s&nodate}}' % ":".join(args[0].split("/")) + elif(u'titlesonly' in args): + #https://www.dokuwiki.org/plugin:changes + #https://www.dokuwiki.org/plugin:pagelist + selfname = self.page.page_name + selfNs = ":".join(selfname.split("/")).lower() + pairs = [arg.split('=') for arg in args] + # attrs = {} + # for key, value in pairs: + # attrs[key] = value + #logging.info('pairs:"%s"' % pairs) + + incName = ''#pairs[0] + incCount = -1 + incTitlesOnly = False + notNamedParam = 0 + for pair in pairs: + if len(pair)==1: + if u'titlesonly'==pair: + notNamedParam = -1 + incTitlesOnly = True + elif notNamedParam >=0: + if notNamedParam==0: + incName = pairs[notNamedParam] + notNamedParam += 1; + else: + notNamedParam = -1 + if u'items'==pair[0]: + incCount = int(pair[1]) + + resultArgs = '-h1 -textPages=""' + #(keys,values) = map() + if incCount > 0: + resultArgs += ' -idAndTitle -simpleList -sortId -nbItemsMax=%d' % incCount + else: + resultArgs += ' -nbCol=2' + + ## + ## Lister der letzten 10 Mails: + ## + if incName[0]=='^': + nspagedelim = incName.rfind('/') + ns = ":".join(incName[1:nspagedelim].split('/')).lower() + incPageReg = incName[(nspagedelim+1):] + resultArgs += ' -pregPagesOn="/^%s/"' % incPageReg + else: + ns = selfNs + + return '' % (ns, resultArgs) + + else: + logging.info('UNSUPPORTED INCLUDE "%s"' % args) + return '/* Unsupported Include: %s */' % args + + def fullsearch(args): + #args=None >> {searchform ns=} + #args='' >> {{backlinks>.}} + #args!='' >> {{search>}} + #ignore special searches. see MoinMoin page "HilfeZumSuchen" + if args is None: + return '{searchform ns=}' + elif ':' in args or ' ' in args: + logging.info('UNSUPPORTED SEARCH %s(%s)' % (name, args)) + return '/* Unsupported Search %s(%s). may be backlinks plugin will help */' % (name, args) + elif args=='': + return '{{backlinks>.}}' + elif name=='PageList': + return '{{backlinks>%s}}' % ":".join(args.split('/')).lower(); + else: + logging.info('UNSUPPORTED SEARCH %s(%s)' % (name, args)) + return '/* Unsupported Search %s(%s) */' % (name, args) + try: lookup = { - 'BR' : '\\\\', + 'BR' : ' \\\\ ', + 'br' : ' \\\\ ', 'MailTo' : email, 'GetText' : args, 'ShowSmileys' : inherit, 'ShowAttachedFiles' : showAttachedFiles, - 'Include' : inherit + 'Include' : includeMacro, + #no real fulltext search! + 'FullSearch' : fullsearch, + 'FullSearchCached' : fullsearch, + 'PageList' : fullsearch, + 'MonthCalendar' : monthcal, + 'Navigation' : navigation, + 'TableOfContents' : '', + 'RandomQuote': randomQuote, + 'Anchor': inherit, + 'Action': inherit, + 'Icon': inherit, + 'FootNote': footnote, + 'Date': dateMacro, + 'DateTime': dateTimeMacro }[name] except KeyError: + logging.info('UNDEFINED MACRO "%s"' % name) lookup = '/* UndefinedMacro: %s(%s) */' % (name, args) if type(lookup) == FunctionType: