[PATCH 5 of 6 STABLE V3] icasefs: use upper() instead of lower() or os.path.normcase()

FUJIWARA Katsunori foozy at lares.dti.ne.jp
Mon Dec 12 07:25:41 CST 2011


# HG changeset patch
# User FUJIWARA Katsunori <foozy at lares.dti.ne.jp>
# Date 1323677419 -32400
# Branch stable
# Node ID 22ae1ea19de2966fe4b13a049a25403cdf1bd8ab
# Parent  34aba3511c68823a927bf5957f813ae0d3dd4214
icasefs: use upper() instead of lower() or os.path.normcase()

this patch uses upper() instead of lower() or os.path.normcase() for
case folding, because lower-ing causes problems in some case
insensitive filesystems.

for cygwin like environment, upper() is used as normcase() also in
posix other than darwin.

see below for detail about problem of lower-ing:

    https://blogs.msdn.com/b/michkap/archive/2005/01/16/353873.aspx

diff -r 34aba3511c68 -r 22ae1ea19de2 hgext/win32mbcs.py
--- a/hgext/win32mbcs.py	Mon Dec 12 17:10:19 2011 +0900
+++ b/hgext/win32mbcs.py	Mon Dec 12 17:10:19 2011 +0900
@@ -127,7 +127,9 @@
 # NOTE: os.path.dirname() and os.path.basename() are safe because
 #       they use result of os.path.split()
 funcs = '''os.path.join os.path.split os.path.splitext
- os.path.splitunc os.path.normpath os.path.normcase os.makedirs
+ os.path.splitunc os.path.normpath os.makedirs
+ mercurial.windows.normcase
+ mercurial.util.normcase
  mercurial.util.endswithsep mercurial.util.splitpath mercurial.util.checkcase
  mercurial.util.fspath mercurial.util.pconvert mercurial.util.normpath
  mercurial.util.checkwinfilename mercurial.util.checkosfilename'''
diff -r 34aba3511c68 -r 22ae1ea19de2 mercurial/encoding.py
--- a/mercurial/encoding.py	Mon Dec 12 17:10:19 2011 +0900
+++ b/mercurial/encoding.py	Mon Dec 12 17:10:19 2011 +0900
@@ -171,3 +171,22 @@
         return lu.encode(encoding)
     except UnicodeError:
         return s.lower() # we don't know how to fold this except in ASCII
+    except LookupError, k:
+        raise error.Abort(k, hint="please check your locale settings")
+
+def upper(s):
+    "best-effort encoding-aware case-folding of local string s"
+    try:
+        if isinstance(s, localstr):
+            u = s._utf8.decode("utf-8")
+        else:
+            u = s.decode(encoding, encodingmode)
+
+        uu = u.upper()
+        if u == uu:
+            return s # preserve localstring
+        return uu.encode(encoding)
+    except UnicodeError:
+        return s.upper() # we don't know how to fold this except in ASCII
+    except LookupError, k:
+        raise error.Abort(k, hint="please check your locale settings")
diff -r 34aba3511c68 -r 22ae1ea19de2 mercurial/posix.py
--- a/mercurial/posix.py	Mon Dec 12 17:10:19 2011 +0900
+++ b/mercurial/posix.py	Mon Dec 12 17:10:19 2011 +0900
@@ -166,7 +166,7 @@
 
 # os.path.normcase is a no-op, which doesn't help us on non-native filesystems
 def normcase(path):
-    return path.lower()
+    return path.upper()
 
 if sys.platform == 'darwin':
     import fcntl # only needed on darwin, missing on jython
diff -r 34aba3511c68 -r 22ae1ea19de2 mercurial/scmutil.py
--- a/mercurial/scmutil.py	Mon Dec 12 17:10:19 2011 +0900
+++ b/mercurial/scmutil.py	Mon Dec 12 17:10:19 2011 +0900
@@ -46,10 +46,10 @@
         self._abort = abort
         self._map = {}
         for f in existingiter:
-            self._map[encoding.lower(f)] = f
+            self._map[encoding.upper(f)] = f
 
     def __call__(self, f):
-        fl = encoding.lower(f)
+        fl = encoding.upper(f)
         map = self._map
         if fl in map and map[fl] != f:
             msg = _('possible case-folding collision for %s') % f
diff -r 34aba3511c68 -r 22ae1ea19de2 mercurial/windows.py
--- a/mercurial/windows.py	Mon Dec 12 17:10:19 2011 +0900
+++ b/mercurial/windows.py	Mon Dec 12 17:10:19 2011 +0900
@@ -131,7 +131,8 @@
 def normpath(path):
     return pconvert(os.path.normpath(path))
 
-normcase = os.path.normcase
+def normcase(path):
+    return path.upper()
 
 def realpath(path):
     '''
diff -r 34aba3511c68 -r 22ae1ea19de2 tests/test-casecollision-i18n.t
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-casecollision-i18n.t	Mon Dec 12 17:10:19 2011 +0900
@@ -0,0 +1,43 @@
+run only on case-sensitive filesystems
+
+  $ "$TESTDIR/hghave" no-icasefs || exit 80
+
+  $ LC_ALL=
+  $ export LC_ALL
+  $ LC_CTYPE=en_US.utf-8
+  $ export LC_CTYPE
+  $ HGENCODING=utf-8
+  $ export HGENCODING
+
+  $ hg init repo
+  $ cd repo
+
+test whether case-collision check are checked by letter upper-ing:
+lower-ing causes collision of u'\u0130' and u'\u0069', but upper-ing
+does ones of u'\u0131' and u'\u0069'
+
+  $ python <<EOF
+  > names = [ u'\u0069', u'\u0130', u'\u0131' ]
+  > for i, name in zip(range(len(names)), names):
+  >     encname = name.encode('utf-8')
+  >     # file for getting target filename of "hg add"
+  >     f = file(str(i), 'w'); f.write(encname); f.close()
+  >     # target file of "hg add"
+  >     f = file(encname, 'w'); f.write(encname); f.close()
+  > EOF
+
+  $ hg add --config ui.portablefilenames=abort `cat 0`
+  $ hg add --config ui.portablefilenames=abort `cat 1`
+  $ hg add --config ui.portablefilenames=abort `cat 2`
+  abort: possible case-folding collision for \xc4\xb1 (esc)
+  [255]
+
+  $ hg status
+  A i
+  A \xc4\xb0 (esc)
+  ? 0
+  ? 1
+  ? 2
+  ? \xc4\xb1 (esc)
+
+  $ cd ..


More information about the Mercurial-devel mailing list