Unicode support for non-unicode locales

tailgunner at smtp.ru tailgunner at smtp.ru
Mon Oct 8 05:13:29 CDT 2007



  Currently Mercurial lack an ability to convert file names to
Unicode when working

  in non-Unicode locales. For example, file names which were added
and committed

  in cp-1251 (russian Windows codepage) can't be correctly checked
out in koi8-r

  (russian Unix coding) or UTF-8 - they are checked out as cp-1251,
which is wrong.

  This patch attempts to fix this by keeping file names in UTF-8
on-disk, and converting

  them to local encoding for hg's internal use.

  diff --git a/mercurial/changelog.py b/mercurial/changelog.py
--- a/mercurial/changelog.py
+++ b/mercurial/changelog.py
@@ -172,7 +172,7 @@ class changelog(revlog):
             extra = self.decode_extra(extra)
         if not extra.get('branch'):
             extra['branch'] = 'default'
-        files = l[3:]
+        files = map(util.tolocal, l[3:])
         return (manifest, user, (time, timezone), files, desc,
extra)

     def read(self, node):
@@ -193,6 +193,7 @@ class changelog(revlog):
             extra = self.encode_extra(extra)
             parseddate = "%s %s" % (parseddate, extra)
         list.sort()
+        list = map(util.fromlocal, list)
         l = [hex(manifest), user, parseddate] + list + ["", desc]
         text = "\n".join(l)
         return self.addrevision(text, transaction, self.count(), p1,
p2)
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -421,7 +421,7 @@ class localrepository(repo.repository):
     def file(self, f):
         if f[0] == '/':
             f = f[1:]
-        return filelog.filelog(self.sopener, f)
+        return filelog.filelog(self.sopener, util.fromlocal(f))

     def changectx(self, changeid=None):
         return context.changectx(self, changeid)
 diff --git a/mercurial/manifest.py b/mercurial/manifest.py
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -40,6 +40,19 @@ class manifest(revlog):
         mfdict = manifestdict()
         for l in lines.splitlines():
             f, n = l.split('\0')
+            def should_recode(s):
+               "True if UTF-8 string s should be recoded to local
charset"
+               # XXX this function could use util._encoding
+               # XXX to avoid calling decode
+               try:
+                   s.decode("ascii")
+               except:
+                   ret = True # not ASCII, should recode
+               else:
+                   ret = False # ASCII, no recoding needed
+               return ret
+            if should_recode(f):
+               f = util.tolocal(f)
             if len(n) > 40:
                 mfdict._flags[f] = n[40:]
                 mfdict[f] = bin(n[:40])
@@ -103,6 +116,7 @@ class manifest(revlog):
     def find(self, node, f):
         '''look up entry for a single file efficiently.
         return (node, flags) pair if found, (None, None) if not.'''
+        f = util.fromlocal(f)
         if self.mapcache and node == self.mapcache[0]:
             return self.mapcache[1].get(f),
self.mapcache[1].flags(f)
         text = self.revision(node)
@@ -136,6 +150,12 @@ class manifest(revlog):
             if '\n' in f or '\r' in f:
                 raise RevlogError(_("'\\n' and '\\r' disallowed in
filenames"))

+        t = manifestdict()
+        for k in map.keys():
+            fname = util.fromlocal(k)
+            t[fname] = map[k]
+            t.set(fname, map.execf(k), map.linkf(k))
+        map = t
         # if we're using the listcache, make sure it is valid and
         # parented by the same node we're diffing against
         if not (changed and self.listcache and p1 and
self.mapcache[0] == p1):

-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://selenic.com/pipermail/mercurial-devel/attachments/20071008/0479134f/attachment.htm 


More information about the Mercurial-devel mailing list