From 9023627fe50f4222b214444d36531fa3e24fb12c Mon Sep 17 00:00:00 2001 From: Lars Kruse Date: Tue, 8 Aug 2023 20:21:56 +0200 Subject: [PATCH] fix: handle non-ascii characters in page names and attachments --- doku.py | 3 +-- moin2doku.py | 17 +++++++++++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/doku.py b/doku.py index c750a19..0ecac1a 100755 --- a/doku.py +++ b/doku.py @@ -37,8 +37,7 @@ class DokuWiki: uargs.append(arg.encode('utf-8')) key = "%s:%s" % (method, ",".join(uargs)) if not self.callcache.has_key(key): - cmd = ['php', './doku.php', method ] + uargs + cmd = ['php', './doku.php', method] + [arg.encode("utf-8") for arg in uargs] res = subprocess.Popen(cmd, stdin = None, stdout = subprocess.PIPE, stderr = sys.stderr, close_fds = False).communicate() self.callcache[key] = unicode(res[0].decode('utf-8')) - print "%s->%s" % (cmd, self.callcache[key]) return self.callcache[key] diff --git a/moin2doku.py b/moin2doku.py index fd60983..8d59c0b 100755 --- a/moin2doku.py +++ b/moin2doku.py @@ -65,6 +65,11 @@ def writefile(filename, content, overwrite=False): f.writelines([line + u'\n' for line in content]) f.close() + +def encode_relaxed(text): + return text.encode("ascii", errors="ignore") + + # page = MoinMoin Page oject # ns = DokuWiki namespace where attachments to copy def copy_attachments(page, ns,randomID): @@ -84,7 +89,7 @@ def copy_attachments(page, ns,randomID): copyfile(src, dst) copystat(src, dst) except UnicodeDecodeError: - print 'ERROR: unable to convert attachment "%s"' % attachment + print 'ERROR: unable to convert attachment "%s"' % encode_relaxed(attachment) def print_help(): program = sys.argv[0] @@ -168,12 +173,12 @@ def convertfile(page, output = None, overwrite = False): if not output: output = pagename - print "Converting %s" % pagename + print "Converting %s" % encode_relaxed(pagename) if page.isUnderlayPage(): print "underlay: %s" % page.request.cfg.data_underlay_dir print "underlay: %s" % request.cfg.data_underlay_dir - print "SKIP UNDERLAY: %s" % pagename + print "SKIP UNDERLAY: %s" % encode_relaxed(pagename) return False current_exists = page.exists() @@ -202,7 +207,7 @@ def convertfile(page, output = None, overwrite = False): print "recovered %s: %s" % (rev, mtime) if not mtime: - print "NO REVISION: for %s" % pagefile + print "NO REVISION: for %s" % encode_relaxed(pagefile) continue if rev == current_rev: @@ -237,7 +242,7 @@ def convertfile(page, output = None, overwrite = False): if old_page != ID: redirect_map[old_page] = ID - print "Converted %s as %s" % (pagename, dw.wikiFN(output)) + print "Converted %s as %s" % (encode_relaxed(pagename), dw.wikiFN(output)) return True @@ -324,7 +329,7 @@ else: print "--------------------------------------------------" for output, pagename in pages.items(): - print " - %s" % pagename + print " - %s" % encode_relaxed(pagename) print "--------------------------------------------------" converted = 0