cleanup regexp based converter
This commit is contained in:
parent
1acc714d5d
commit
949c4d1571
2 changed files with 10 additions and 92 deletions
90
moin2doku.py
90
moin2doku.py
|
@ -113,96 +113,6 @@ def copy_attachments(pagedir, ns):
|
||||||
copyfile(src, dst)
|
copyfile(src, dst)
|
||||||
copystat(src, dst)
|
copystat(src, dst)
|
||||||
|
|
||||||
# convert page markup
|
|
||||||
# pagename: name of current page (MoinMoin name)
|
|
||||||
# content: page content (MoinMoin markup)
|
|
||||||
def convert_markup(pagename, content):
|
|
||||||
"""
|
|
||||||
convert page markup
|
|
||||||
"""
|
|
||||||
namespace = ':'
|
|
||||||
# for i in range(0, len(filename) - 1):
|
|
||||||
# namespace += filename[i] + ':'
|
|
||||||
|
|
||||||
# http://www.pld-linux.org/SyntaxReference
|
|
||||||
regexp = (
|
|
||||||
('\[\[TableOfContents.*\]\]', ''), # remove
|
|
||||||
('\[\[BR\]\]$', ''), # newline at end of line - remove
|
|
||||||
('\[\[BR\]\]', '\n'), # newline
|
|
||||||
('#pragma section-numbers off', ''), # remove
|
|
||||||
('^##.*?\\n', ''), # comments: remove
|
|
||||||
('^#(pragma|format|redirect|refresh|language|acl)(.*?)\n', ''), # remove all
|
|
||||||
('^#deprecated(.*)\n', '<note warning>This page is deprecated<note>\n'), # deprecated
|
|
||||||
|
|
||||||
# Other elements
|
|
||||||
# break
|
|
||||||
('(<<BR>>)|(\[\[BR]])', '\\\\ '),
|
|
||||||
|
|
||||||
# horizontal line
|
|
||||||
('^\s*-{4,}\s*$', '----\n'),
|
|
||||||
# Macros and another foolish - simply remove
|
|
||||||
# macros
|
|
||||||
('<<.+?>>', ''),
|
|
||||||
('\[\[Anchor\(\w+\)\]\]', ''),
|
|
||||||
('\[\[(PageCount|RandomPage)\]\]', ''),
|
|
||||||
|
|
||||||
# ('\["', '[['), # internal link open
|
|
||||||
# ('"\]', ']]'), # internal link close
|
|
||||||
# internal links
|
|
||||||
('\[:(.+)\]', '[[\\1]]'),
|
|
||||||
# TODO: handle more depths
|
|
||||||
('\[\[(.*)/(.*)\]\]', 'B[[\\1:\\2]]'),
|
|
||||||
# wiki:xxx
|
|
||||||
('\[wiki:([^\s]+)\s+(.+)]', '[[\\1|\\2]]'),
|
|
||||||
('wiki:([^\s]+)\s+(.+)', '[[\\1|\\2]]'),
|
|
||||||
('wiki:([^\s]+)', '[[\\1]]'),
|
|
||||||
('(\[\[.+\]\]).*\]', '\\1'),
|
|
||||||
|
|
||||||
# web link without title
|
|
||||||
('\[((?:http|https|file)[^\s]+)\]', '[[\\1]]'),
|
|
||||||
# web link with title
|
|
||||||
('\[((?:http|https|file)[^\s]+)\s+(.+?)\]', '[[\\1|\\2]]'),
|
|
||||||
|
|
||||||
# ('\["/(.*)"\]', '[['+filename[-1]+':\\1]]'),
|
|
||||||
|
|
||||||
# code blocks
|
|
||||||
# open and language
|
|
||||||
('\{{3}#!(python|php)', '<'+'code \\1>'),
|
|
||||||
# code open
|
|
||||||
('\{{3}', '<'+'code>'),
|
|
||||||
# close
|
|
||||||
('\}{3}', '<'+'/code>'),
|
|
||||||
|
|
||||||
('^\s\s\s\s\*', ' *'),
|
|
||||||
('^\s\s\s\*', ' *'),
|
|
||||||
('^\s\s\*', ' *'),
|
|
||||||
('^\s\*', ' *'), # lists must have 2 whitespaces before the asterisk
|
|
||||||
('^\s\s\s\s1\.', ' -'),
|
|
||||||
('^\s\s1\.', ' -'),
|
|
||||||
('^\s1\.', ' -'),
|
|
||||||
('^\s*=====\s*(.*)\s*=====\s*$', '=-=- \\1 =-=-'), # heading 5
|
|
||||||
('^\s*====\s*(.*)\s*====\s*$', '=-=-=- \\1 =-=-=-'), # heading 4
|
|
||||||
('^\s*===\s*(.*)\s*===\s*$', '=-=-=-=- \\1 =-=-=-=-'), # heading 3
|
|
||||||
('^\s*==\s*(.*)\s*==\s*$', '=-=-=-=-=- \\1 =-=-=-=-=-'), # heading 2
|
|
||||||
('^\s*=\s*(.*)\s=\s*$', '=-=-=-=-=-=- \\1 =-=-=-=-=-=-'), # heading 1
|
|
||||||
('=-', '='),
|
|
||||||
('\|{2}', '|'), # table separator
|
|
||||||
('\'{5}(.*)\'{5}', '**//\\1//**'), # bold and italic
|
|
||||||
('\'{3}(.*)\'{3}', '**\\1**'), # bold
|
|
||||||
('\'{2}(.*)\'{2}', '//\\1//'), # italic
|
|
||||||
('`(.*?)`', "''\\1''"), # monospaced
|
|
||||||
('(?<!\[)(\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b)','[[\\1]]'), # CamelCase, dont change if CamelCase is in InternalLink
|
|
||||||
('\[\[Date\(([\d]{4}-[\d]{2}-[\d]{2}T[\d]{2}:[\d]{2}:[\d]{2}Z)\)\]\]', '\\1'), # Date value
|
|
||||||
('attachment:(.*)','{{'+namespace+'\\1|}}')
|
|
||||||
)
|
|
||||||
|
|
||||||
for i in range(len(content)):
|
|
||||||
line = content[i]
|
|
||||||
for item in regexp:
|
|
||||||
line = re.sub(item[0], item[1], line)
|
|
||||||
content[i] = line
|
|
||||||
return content
|
|
||||||
|
|
||||||
def print_help():
|
def print_help():
|
||||||
program = sys.argv[0]
|
program = sys.argv[0]
|
||||||
print "Usage: %s -m <moinmoin pages directory> -d <output directory>" % program
|
print "Usage: %s -m <moinmoin pages directory> -d <output directory>" % program
|
||||||
|
|
|
@ -253,16 +253,24 @@ class Formatter(FormatterBase):
|
||||||
if text[0:2] == '##':
|
if text[0:2] == '##':
|
||||||
return "/* %s */" % text[2:]
|
return "/* %s */" % text[2:]
|
||||||
|
|
||||||
# some kind of macro
|
# Some kind of Processing Instruction
|
||||||
|
# http://moinmo.in/HelpOnProcessingInstructions
|
||||||
tokens = text.lstrip('#').split(None, 1)
|
tokens = text.lstrip('#').split(None, 1)
|
||||||
if tokens[0] in ('language'):
|
if tokens[0] in ('language', 'format', 'refresh'):
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
if tokens[0] == 'acl':
|
if tokens[0] == 'acl':
|
||||||
# TODO: fill acl.auth.php
|
# TODO: fill acl.auth.php
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
if tokens[0] == 'deprecated':
|
||||||
|
return '<note warning>This page is deprecated</note>\n'
|
||||||
|
|
||||||
|
if tokens[0] == 'redirect':
|
||||||
|
return text
|
||||||
|
|
||||||
if tokens[0] == 'pragma':
|
if tokens[0] == 'pragma':
|
||||||
|
# TODO: can do 'description' via 'meta' dokuwiki plugin
|
||||||
return "/* pragma: %s */" % " ".join(tokens[1:])
|
return "/* pragma: %s */" % " ".join(tokens[1:])
|
||||||
|
|
||||||
return "/* %s */" % text.lstrip('#')
|
return "/* %s */" % text.lstrip('#')
|
||||||
|
|
Loading…
Reference in a new issue