some formatting rules from the parl script

This commit is contained in:
Elan Ruusamäe 2011-02-06 23:19:21 +02:00
parent 0b8bf13743
commit 411c5a695c

View file

@ -105,7 +105,7 @@ def copy_attachments(page_dir, attachment_dir):
cmd_string = 'cp -p "' + dir +'/' + attachment + '" "' + attachment_dir + attachment.lower() + '"' cmd_string = 'cp -p "' + dir +'/' + attachment + '" "' + attachment_dir + attachment.lower() + '"'
os.system(cmd_string) os.system(cmd_string)
def convert_markup(page, filename): def convert_markup(content, filename):
""" """
convert page markup convert page markup
""" """
@ -113,12 +113,25 @@ def convert_markup(page, filename):
for i in range(0, len(filename) - 1): for i in range(0, len(filename) - 1):
namespace += filename[i] + ':' namespace += filename[i] + ':'
# http://www.pld-linux.org/SyntaxReference
regexp = ( regexp = (
('\[\[TableOfContents.*\]\]', ''), # remove ('\[\[TableOfContents.*\]\]', ''), # remove
('\[\[BR\]\]$', ''), # newline at end of line - remove ('\[\[BR\]\]$', ''), # newline at end of line - remove
('\[\[BR\]\]', '\n'), # newline ('\[\[BR\]\]', '\n'), # newline
('#pragma section-numbers off', ''), # remove ('#pragma section-numbers off', ''), # remove
('^##.*?\\n', ''), # remove ('^##.*?\\n', ''), # comments: remove
('^#(pragma|format|redirect|refresh|language)(.*)', ''), # remove all
('^#deprecated(.*)\n', '<note warning>This page is deprecated<note>\n'), # deprecated
# Other elements
# break
('(<<BR>>)|(\[\[BR]])', '\\\\ '),
# horizontal line
('^\s*-{4,}\s*$', '----\n'),
# Macros and another foolish - simply remove
# macros
('<<.+?>>', ''),
# ('\["', '[['), # internal link open # ('\["', '[['), # internal link open
# ('"\]', ']]'), # internal link close # ('"\]', ']]'), # internal link close
@ -160,12 +173,12 @@ def convert_markup(page, filename):
('attachment:(.*)','{{'+namespace+'\\1|}}') ('attachment:(.*)','{{'+namespace+'\\1|}}')
) )
for i in range(len(page)): for i in range(len(content)):
line = page[i] line = content[i]
for item in regexp: for item in regexp:
line = re.sub(item[0], item[1], line) line = re.sub(item[0], item[1], line)
page[i] = line content[i] = line
return page return content
def print_help(): def print_help():
print "Usage: moinconv.py <moinmoin pages directory> <output directory>" print "Usage: moinconv.py <moinmoin pages directory> <output directory>"