cleanup regexp based converter

This commit is contained in:
Elan Ruusamäe 2012-09-18 00:11:14 +02:00
parent 1acc714d5d
commit 949c4d1571
2 changed files with 10 additions and 92 deletions

View file

@ -113,96 +113,6 @@ def copy_attachments(pagedir, ns):
copyfile(src, dst)
copystat(src, dst)
# convert page markup
# pagename: name of current page (MoinMoin name)
# content: page content (MoinMoin markup)
def convert_markup(pagename, content):
"""
convert page markup
"""
namespace = ':'
# for i in range(0, len(filename) - 1):
# namespace += filename[i] + ':'
# http://www.pld-linux.org/SyntaxReference
regexp = (
('\[\[TableOfContents.*\]\]', ''), # remove
('\[\[BR\]\]$', ''), # newline at end of line - remove
('\[\[BR\]\]', '\n'), # newline
('#pragma section-numbers off', ''), # remove
('^##.*?\\n', ''), # comments: remove
('^#(pragma|format|redirect|refresh|language|acl)(.*?)\n', ''), # remove all
('^#deprecated(.*)\n', '<note warning>This page is deprecated<note>\n'), # deprecated
# Other elements
# break
('(<<BR>>)|(\[\[BR]])', '\\\\ '),
# horizontal line
('^\s*-{4,}\s*$', '----\n'),
# Macros and another foolish - simply remove
# macros
('<<.+?>>', ''),
('\[\[Anchor\(\w+\)\]\]', ''),
('\[\[(PageCount|RandomPage)\]\]', ''),
# ('\["', '[['), # internal link open
# ('"\]', ']]'), # internal link close
# internal links
('\[:(.+)\]', '[[\\1]]'),
# TODO: handle more depths
('\[\[(.*)/(.*)\]\]', 'B[[\\1:\\2]]'),
# wiki:xxx
('\[wiki:([^\s]+)\s+(.+)]', '[[\\1|\\2]]'),
('wiki:([^\s]+)\s+(.+)', '[[\\1|\\2]]'),
('wiki:([^\s]+)', '[[\\1]]'),
('(\[\[.+\]\]).*\]', '\\1'),
# web link without title
('\[((?:http|https|file)[^\s]+)\]', '[[\\1]]'),
# web link with title
('\[((?:http|https|file)[^\s]+)\s+(.+?)\]', '[[\\1|\\2]]'),
# ('\["/(.*)"\]', '[['+filename[-1]+':\\1]]'),
# code blocks
# open and language
('\{{3}#!(python|php)', '<'+'code \\1>'),
# code open
('\{{3}', '<'+'code>'),
# close
('\}{3}', '<'+'/code>'),
('^\s\s\s\s\*', ' *'),
('^\s\s\s\*', ' *'),
('^\s\s\*', ' *'),
('^\s\*', ' *'), # lists must have 2 whitespaces before the asterisk
('^\s\s\s\s1\.', ' -'),
('^\s\s1\.', ' -'),
('^\s1\.', ' -'),
('^\s*=====\s*(.*)\s*=====\s*$', '=-=- \\1 =-=-'), # heading 5
('^\s*====\s*(.*)\s*====\s*$', '=-=-=- \\1 =-=-=-'), # heading 4
('^\s*===\s*(.*)\s*===\s*$', '=-=-=-=- \\1 =-=-=-=-'), # heading 3
('^\s*==\s*(.*)\s*==\s*$', '=-=-=-=-=- \\1 =-=-=-=-=-'), # heading 2
('^\s*=\s*(.*)\s=\s*$', '=-=-=-=-=-=- \\1 =-=-=-=-=-=-'), # heading 1
('=-', '='),
('\|{2}', '|'), # table separator
('\'{5}(.*)\'{5}', '**//\\1//**'), # bold and italic
('\'{3}(.*)\'{3}', '**\\1**'), # bold
('\'{2}(.*)\'{2}', '//\\1//'), # italic
('`(.*?)`', "''\\1''"), # monospaced
('(?<!\[)(\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b)','[[\\1]]'), # CamelCase, dont change if CamelCase is in InternalLink
('\[\[Date\(([\d]{4}-[\d]{2}-[\d]{2}T[\d]{2}:[\d]{2}:[\d]{2}Z)\)\]\]', '\\1'), # Date value
('attachment:(.*)','{{'+namespace+'\\1|}}')
)
for i in range(len(content)):
line = content[i]
for item in regexp:
line = re.sub(item[0], item[1], line)
content[i] = line
return content
def print_help():
program = sys.argv[0]
print "Usage: %s -m <moinmoin pages directory> -d <output directory>" % program

View file

@ -253,16 +253,24 @@ class Formatter(FormatterBase):
if text[0:2] == '##':
return "/* %s */" % text[2:]
# some kind of macro
# Some kind of Processing Instruction
# http://moinmo.in/HelpOnProcessingInstructions
tokens = text.lstrip('#').split(None, 1)
if tokens[0] in ('language'):
if tokens[0] in ('language', 'format', 'refresh'):
return ''
if tokens[0] == 'acl':
# TODO: fill acl.auth.php
return ''
if tokens[0] == 'deprecated':
return '<note warning>This page is deprecated</note>\n'
if tokens[0] == 'redirect':
return text
if tokens[0] == 'pragma':
# TODO: can do 'description' via 'meta' dokuwiki plugin
return "/* pragma: %s */" % " ".join(tokens[1:])
return "/* %s */" % text.lstrip('#')