fix: handle non-ascii characters in page names and attachments

This commit is contained in:
Lars Kruse 2023-08-08 20:21:56 +02:00
parent 9b3341f836
commit 9023627fe5
2 changed files with 12 additions and 8 deletions

View file

@ -37,8 +37,7 @@ class DokuWiki:
uargs.append(arg.encode('utf-8'))
key = "%s:%s" % (method, ",".join(uargs))
if not self.callcache.has_key(key):
cmd = ['php', './doku.php', method ] + uargs
cmd = ['php', './doku.php', method] + [arg.encode("utf-8") for arg in uargs]
res = subprocess.Popen(cmd, stdin = None, stdout = subprocess.PIPE, stderr = sys.stderr, close_fds = False).communicate()
self.callcache[key] = unicode(res[0].decode('utf-8'))
print "%s->%s" % (cmd, self.callcache[key])
return self.callcache[key]

View file

@ -65,6 +65,11 @@ def writefile(filename, content, overwrite=False):
f.writelines([line + u'\n' for line in content])
f.close()
def encode_relaxed(text):
return text.encode("ascii", errors="ignore")
# page = MoinMoin Page oject
# ns = DokuWiki namespace where attachments to copy
def copy_attachments(page, ns,randomID):
@ -84,7 +89,7 @@ def copy_attachments(page, ns,randomID):
copyfile(src, dst)
copystat(src, dst)
except UnicodeDecodeError:
print 'ERROR: unable to convert attachment "%s"' % attachment
print 'ERROR: unable to convert attachment "%s"' % encode_relaxed(attachment)
def print_help():
program = sys.argv[0]
@ -168,12 +173,12 @@ def convertfile(page, output = None, overwrite = False):
if not output:
output = pagename
print "Converting %s" % pagename
print "Converting %s" % encode_relaxed(pagename)
if page.isUnderlayPage():
print "underlay: %s" % page.request.cfg.data_underlay_dir
print "underlay: %s" % request.cfg.data_underlay_dir
print "SKIP UNDERLAY: %s" % pagename
print "SKIP UNDERLAY: %s" % encode_relaxed(pagename)
return False
current_exists = page.exists()
@ -202,7 +207,7 @@ def convertfile(page, output = None, overwrite = False):
print "recovered %s: %s" % (rev, mtime)
if not mtime:
print "NO REVISION: for %s" % pagefile
print "NO REVISION: for %s" % encode_relaxed(pagefile)
continue
if rev == current_rev:
@ -237,7 +242,7 @@ def convertfile(page, output = None, overwrite = False):
if old_page != ID:
redirect_map[old_page] = ID
print "Converted %s as %s" % (pagename, dw.wikiFN(output))
print "Converted %s as %s" % (encode_relaxed(pagename), dw.wikiFN(output))
return True
@ -324,7 +329,7 @@ else:
print "--------------------------------------------------"
for output, pagename in pages.items():
print " - %s" % pagename
print " - %s" % encode_relaxed(pagename)
print "--------------------------------------------------"
converted = 0