fix: handle non-ascii characters in page names and attachments
This commit is contained in:
parent
9b3341f836
commit
9023627fe5
2 changed files with 12 additions and 8 deletions
3
doku.py
3
doku.py
|
@ -37,8 +37,7 @@ class DokuWiki:
|
||||||
uargs.append(arg.encode('utf-8'))
|
uargs.append(arg.encode('utf-8'))
|
||||||
key = "%s:%s" % (method, ",".join(uargs))
|
key = "%s:%s" % (method, ",".join(uargs))
|
||||||
if not self.callcache.has_key(key):
|
if not self.callcache.has_key(key):
|
||||||
cmd = ['php', './doku.php', method ] + uargs
|
cmd = ['php', './doku.php', method] + [arg.encode("utf-8") for arg in uargs]
|
||||||
res = subprocess.Popen(cmd, stdin = None, stdout = subprocess.PIPE, stderr = sys.stderr, close_fds = False).communicate()
|
res = subprocess.Popen(cmd, stdin = None, stdout = subprocess.PIPE, stderr = sys.stderr, close_fds = False).communicate()
|
||||||
self.callcache[key] = unicode(res[0].decode('utf-8'))
|
self.callcache[key] = unicode(res[0].decode('utf-8'))
|
||||||
print "%s->%s" % (cmd, self.callcache[key])
|
|
||||||
return self.callcache[key]
|
return self.callcache[key]
|
||||||
|
|
17
moin2doku.py
17
moin2doku.py
|
@ -65,6 +65,11 @@ def writefile(filename, content, overwrite=False):
|
||||||
f.writelines([line + u'\n' for line in content])
|
f.writelines([line + u'\n' for line in content])
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
|
||||||
|
def encode_relaxed(text):
|
||||||
|
return text.encode("ascii", errors="ignore")
|
||||||
|
|
||||||
|
|
||||||
# page = MoinMoin Page oject
|
# page = MoinMoin Page oject
|
||||||
# ns = DokuWiki namespace where attachments to copy
|
# ns = DokuWiki namespace where attachments to copy
|
||||||
def copy_attachments(page, ns,randomID):
|
def copy_attachments(page, ns,randomID):
|
||||||
|
@ -84,7 +89,7 @@ def copy_attachments(page, ns,randomID):
|
||||||
copyfile(src, dst)
|
copyfile(src, dst)
|
||||||
copystat(src, dst)
|
copystat(src, dst)
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
print 'ERROR: unable to convert attachment "%s"' % attachment
|
print 'ERROR: unable to convert attachment "%s"' % encode_relaxed(attachment)
|
||||||
|
|
||||||
def print_help():
|
def print_help():
|
||||||
program = sys.argv[0]
|
program = sys.argv[0]
|
||||||
|
@ -168,12 +173,12 @@ def convertfile(page, output = None, overwrite = False):
|
||||||
if not output:
|
if not output:
|
||||||
output = pagename
|
output = pagename
|
||||||
|
|
||||||
print "Converting %s" % pagename
|
print "Converting %s" % encode_relaxed(pagename)
|
||||||
|
|
||||||
if page.isUnderlayPage():
|
if page.isUnderlayPage():
|
||||||
print "underlay: %s" % page.request.cfg.data_underlay_dir
|
print "underlay: %s" % page.request.cfg.data_underlay_dir
|
||||||
print "underlay: %s" % request.cfg.data_underlay_dir
|
print "underlay: %s" % request.cfg.data_underlay_dir
|
||||||
print "SKIP UNDERLAY: %s" % pagename
|
print "SKIP UNDERLAY: %s" % encode_relaxed(pagename)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
current_exists = page.exists()
|
current_exists = page.exists()
|
||||||
|
@ -202,7 +207,7 @@ def convertfile(page, output = None, overwrite = False):
|
||||||
print "recovered %s: %s" % (rev, mtime)
|
print "recovered %s: %s" % (rev, mtime)
|
||||||
|
|
||||||
if not mtime:
|
if not mtime:
|
||||||
print "NO REVISION: for %s" % pagefile
|
print "NO REVISION: for %s" % encode_relaxed(pagefile)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if rev == current_rev:
|
if rev == current_rev:
|
||||||
|
@ -237,7 +242,7 @@ def convertfile(page, output = None, overwrite = False):
|
||||||
if old_page != ID:
|
if old_page != ID:
|
||||||
redirect_map[old_page] = ID
|
redirect_map[old_page] = ID
|
||||||
|
|
||||||
print "Converted %s as %s" % (pagename, dw.wikiFN(output))
|
print "Converted %s as %s" % (encode_relaxed(pagename), dw.wikiFN(output))
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -324,7 +329,7 @@ else:
|
||||||
|
|
||||||
print "--------------------------------------------------"
|
print "--------------------------------------------------"
|
||||||
for output, pagename in pages.items():
|
for output, pagename in pages.items():
|
||||||
print " - %s" % pagename
|
print " - %s" % encode_relaxed(pagename)
|
||||||
print "--------------------------------------------------"
|
print "--------------------------------------------------"
|
||||||
|
|
||||||
converted = 0
|
converted = 0
|
||||||
|
|
Loading…
Reference in a new issue