[PATCH RFC] convert: add support for recode in filemaps

Martin Geisler mg at lazybytes.net
Mon Dec 19 12:21:50 CST 2011


# HG changeset patch
# User Martin Geisler <mg at lazybytes.net>
# Date 1324318815 -3600
# Node ID 5bf6234ff33f997486c85210c6d7cec58f1fa524
# Parent  4841035f37b6df368682460d8a7cbf10276b8d1b
convert: add support for recode in filemaps

This command is used in a filemap like

  recode OLD NEW

and will make convert recode all file names from OLD to NEW.

This patch is not 100% done -- there could be a warning if recode is
specified twice, for example. Also, the recoding is done before
renames are taken into account. It should probably be done after since
the filemap seems to work on source path names only.

I made the patch to better support the case discussed here:
http://serverfault.com/a/342446/14103

diff --git a/hgext/convert/filemap.py b/hgext/convert/filemap.py
--- a/hgext/convert/filemap.py
+++ b/hgext/convert/filemap.py
@@ -4,7 +4,7 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
-import shlex
+import shlex, codecs
 from mercurial.i18n import _
 from mercurial import util
 from common import SKIPREV, converter_source
@@ -26,6 +26,7 @@
         self.include = {}
         self.exclude = {}
         self.rename = {}
+        self.recode = None
         if path:
             if self.parse(path):
                 raise util.Abort(_('errors in filemap'))
@@ -68,6 +69,14 @@
                 self.rename[src] = dest
             elif cmd == 'source':
                 errs += self.parse(lex.get_token())
+            elif cmd == 'recode':
+                self.recode = (lex.get_token(), lex.get_token())
+                try:
+                    codecs.getdecoder(self.recode[0])
+                    codecs.getencoder(self.recode[1])
+                except LookupError, e:
+                    self.ui.warn('%s:%d: %s\n' % (lex.infile, lex.lineno, e))
+                    errs += 1
             else:
                 self.ui.warn(_('%s:%d: unknown directive %r\n') %
                              (lex.infile, lex.lineno, cmd))
@@ -84,6 +93,9 @@
         return '', name, ''
 
     def __call__(self, name):
+        if self.recode:
+            name = name.decode(self.recode[0]).encode(self.recode[1])
+
         if self.include:
             inc = self.lookup(name, self.include)[0]
         else:
@@ -106,7 +118,7 @@
         return name
 
     def active(self):
-        return bool(self.include or self.exclude or self.rename)
+        return bool(self.include or self.exclude or self.rename or self.recode)
 
 # This class does two additional things compared to a regular source:
 #
diff --git a/tests/test-convert-filemap.t b/tests/test-convert-filemap.t
--- a/tests/test-convert-filemap.t
+++ b/tests/test-convert-filemap.t
@@ -375,3 +375,31 @@
   |
   o  0 "addb" files: b
   
+
+Test recode command:
+
+  $ hg init latin-1
+  $ cd latin-1
+  >>> open("p\xe6rer.txt", "w").write("pears\n")
+  $ hg commit -A -m Latin-1
+  adding p\xe6rer.txt (esc)
+  $ cd ..
+  $ echo "recode latin-1 utf-8" > recode
+  $ hg convert latin-1 utf-8 --filemap recode
+  initializing destination utf-8 repository
+  scanning source...
+  sorting...
+  converting...
+  0 Latin-1
+  $ hg -R utf-8 manifest -r tip
+  p\xc3\xa6rer.txt (esc)
+
+Errors:
+
+  $ echo "recode foo utf-8" >> recode
+  $ echo "recode latin-1 bar" >> recode
+  $ hg convert latin-1 utf-8 --filemap recode
+  recode:3: unknown encoding: foo
+  recode:4: unknown encoding: bar
+  abort: errors in filemap
+  [255]


More information about the Mercurial-devel mailing list