cleanup regexp based converter
This commit is contained in:
parent
1acc714d5d
commit
949c4d1571
2 changed files with 10 additions and 92 deletions
90
moin2doku.py
90
moin2doku.py
|
@ -113,96 +113,6 @@ def copy_attachments(pagedir, ns):
|
|||
copyfile(src, dst)
|
||||
copystat(src, dst)
|
||||
|
||||
# convert page markup
|
||||
# pagename: name of current page (MoinMoin name)
|
||||
# content: page content (MoinMoin markup)
|
||||
def convert_markup(pagename, content):
|
||||
"""
|
||||
convert page markup
|
||||
"""
|
||||
namespace = ':'
|
||||
# for i in range(0, len(filename) - 1):
|
||||
# namespace += filename[i] + ':'
|
||||
|
||||
# http://www.pld-linux.org/SyntaxReference
|
||||
regexp = (
|
||||
('\[\[TableOfContents.*\]\]', ''), # remove
|
||||
('\[\[BR\]\]$', ''), # newline at end of line - remove
|
||||
('\[\[BR\]\]', '\n'), # newline
|
||||
('#pragma section-numbers off', ''), # remove
|
||||
('^##.*?\\n', ''), # comments: remove
|
||||
('^#(pragma|format|redirect|refresh|language|acl)(.*?)\n', ''), # remove all
|
||||
('^#deprecated(.*)\n', '<note warning>This page is deprecated<note>\n'), # deprecated
|
||||
|
||||
# Other elements
|
||||
# break
|
||||
('(<<BR>>)|(\[\[BR]])', '\\\\ '),
|
||||
|
||||
# horizontal line
|
||||
('^\s*-{4,}\s*$', '----\n'),
|
||||
# Macros and another foolish - simply remove
|
||||
# macros
|
||||
('<<.+?>>', ''),
|
||||
('\[\[Anchor\(\w+\)\]\]', ''),
|
||||
('\[\[(PageCount|RandomPage)\]\]', ''),
|
||||
|
||||
# ('\["', '[['), # internal link open
|
||||
# ('"\]', ']]'), # internal link close
|
||||
# internal links
|
||||
('\[:(.+)\]', '[[\\1]]'),
|
||||
# TODO: handle more depths
|
||||
('\[\[(.*)/(.*)\]\]', 'B[[\\1:\\2]]'),
|
||||
# wiki:xxx
|
||||
('\[wiki:([^\s]+)\s+(.+)]', '[[\\1|\\2]]'),
|
||||
('wiki:([^\s]+)\s+(.+)', '[[\\1|\\2]]'),
|
||||
('wiki:([^\s]+)', '[[\\1]]'),
|
||||
('(\[\[.+\]\]).*\]', '\\1'),
|
||||
|
||||
# web link without title
|
||||
('\[((?:http|https|file)[^\s]+)\]', '[[\\1]]'),
|
||||
# web link with title
|
||||
('\[((?:http|https|file)[^\s]+)\s+(.+?)\]', '[[\\1|\\2]]'),
|
||||
|
||||
# ('\["/(.*)"\]', '[['+filename[-1]+':\\1]]'),
|
||||
|
||||
# code blocks
|
||||
# open and language
|
||||
('\{{3}#!(python|php)', '<'+'code \\1>'),
|
||||
# code open
|
||||
('\{{3}', '<'+'code>'),
|
||||
# close
|
||||
('\}{3}', '<'+'/code>'),
|
||||
|
||||
('^\s\s\s\s\*', ' *'),
|
||||
('^\s\s\s\*', ' *'),
|
||||
('^\s\s\*', ' *'),
|
||||
('^\s\*', ' *'), # lists must have 2 whitespaces before the asterisk
|
||||
('^\s\s\s\s1\.', ' -'),
|
||||
('^\s\s1\.', ' -'),
|
||||
('^\s1\.', ' -'),
|
||||
('^\s*=====\s*(.*)\s*=====\s*$', '=-=- \\1 =-=-'), # heading 5
|
||||
('^\s*====\s*(.*)\s*====\s*$', '=-=-=- \\1 =-=-=-'), # heading 4
|
||||
('^\s*===\s*(.*)\s*===\s*$', '=-=-=-=- \\1 =-=-=-=-'), # heading 3
|
||||
('^\s*==\s*(.*)\s*==\s*$', '=-=-=-=-=- \\1 =-=-=-=-=-'), # heading 2
|
||||
('^\s*=\s*(.*)\s=\s*$', '=-=-=-=-=-=- \\1 =-=-=-=-=-=-'), # heading 1
|
||||
('=-', '='),
|
||||
('\|{2}', '|'), # table separator
|
||||
('\'{5}(.*)\'{5}', '**//\\1//**'), # bold and italic
|
||||
('\'{3}(.*)\'{3}', '**\\1**'), # bold
|
||||
('\'{2}(.*)\'{2}', '//\\1//'), # italic
|
||||
('`(.*?)`', "''\\1''"), # monospaced
|
||||
('(?<!\[)(\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b)','[[\\1]]'), # CamelCase, dont change if CamelCase is in InternalLink
|
||||
('\[\[Date\(([\d]{4}-[\d]{2}-[\d]{2}T[\d]{2}:[\d]{2}:[\d]{2}Z)\)\]\]', '\\1'), # Date value
|
||||
('attachment:(.*)','{{'+namespace+'\\1|}}')
|
||||
)
|
||||
|
||||
for i in range(len(content)):
|
||||
line = content[i]
|
||||
for item in regexp:
|
||||
line = re.sub(item[0], item[1], line)
|
||||
content[i] = line
|
||||
return content
|
||||
|
||||
def print_help():
|
||||
program = sys.argv[0]
|
||||
print "Usage: %s -m <moinmoin pages directory> -d <output directory>" % program
|
||||
|
|
|
@ -253,16 +253,24 @@ class Formatter(FormatterBase):
|
|||
if text[0:2] == '##':
|
||||
return "/* %s */" % text[2:]
|
||||
|
||||
# some kind of macro
|
||||
# Some kind of Processing Instruction
|
||||
# http://moinmo.in/HelpOnProcessingInstructions
|
||||
tokens = text.lstrip('#').split(None, 1)
|
||||
if tokens[0] in ('language'):
|
||||
if tokens[0] in ('language', 'format', 'refresh'):
|
||||
return ''
|
||||
|
||||
if tokens[0] == 'acl':
|
||||
# TODO: fill acl.auth.php
|
||||
return ''
|
||||
|
||||
if tokens[0] == 'deprecated':
|
||||
return '<note warning>This page is deprecated</note>\n'
|
||||
|
||||
if tokens[0] == 'redirect':
|
||||
return text
|
||||
|
||||
if tokens[0] == 'pragma':
|
||||
# TODO: can do 'description' via 'meta' dokuwiki plugin
|
||||
return "/* pragma: %s */" % " ".join(tokens[1:])
|
||||
|
||||
return "/* %s */" % text.lstrip('#')
|
||||
|
|
Loading…
Reference in a new issue