[PATCH 1 of 5 v5] store: refactor hashed encoding into its own function

Bryan O'Sullivan bos at serpentine.com
Mon Sep 10 15:34:58 CDT 2012


# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1347309263 25200
# Node ID bb0216c05e3f9d11269667edb4cc2975377d5054
# Parent  f4d15f3b96c009ab099a7df17575c01fd65d51da
store: refactor hashed encoding into its own function

diff --git a/mercurial/store.py b/mercurial/store.py
--- a/mercurial/store.py
+++ b/mercurial/store.py
@@ -158,20 +158,10 @@
 _maxstorepathlen = 120
 _dirprefixlen = 8
 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
-def _hybridencode(path, auxencode):
-    '''encodes path with a length limit
+_ndprefixlen = len('data/')
 
-    Encodes all paths that begin with 'data/', according to the following.
-
-    Default encoding (reversible):
-
-    Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
-    characters are encoded as '~xx', where xx is the two digit hex code
-    of the character (see encodefilename).
-    Relevant path components consisting of Windows reserved filenames are
-    masked by encoding the third character ('aux' -> 'au~78', see auxencode).
-
-    Hashed encoding (not reversible):
+def _hashencode(path, auxencode):
+    '''Hashed encoding (not reversible):
 
     If the default-encoded path is longer than _maxstorepathlen, a
     non-reversible hybrid hashing of the path is done instead.
@@ -189,36 +179,53 @@
     The string 'data/' at the beginning is replaced with 'dh/', if the hashed
     encoding was used.
     '''
+    digest = _sha(path).hexdigest()
+    aep = auxencode(lowerencode(path[_ndprefixlen:]))
+    _root, ext = os.path.splitext(aep)
+    parts = aep.split('/')
+    basename = parts[-1]
+    sdirs = []
+    for p in parts[:-1]:
+        d = p[:_dirprefixlen]
+        if d[-1] in '. ':
+            # Windows can't access dirs ending in period or space
+            d = d[:-1] + '_'
+        t = '/'.join(sdirs) + '/' + d
+        if len(t) > _maxshortdirslen:
+            break
+        sdirs.append(d)
+    dirs = '/'.join(sdirs)
+    if len(dirs) > 0:
+        dirs += '/'
+    res = 'dh/' + dirs + digest + ext
+    spaceleft = _maxstorepathlen - len(res)
+    if spaceleft > 0:
+        filler = basename[:spaceleft]
+        res = 'dh/' + dirs + filler + digest + ext
+    return res
+
+def _hybridencode(path, auxencode):
+    '''encodes path with a length limit
+
+    Encodes all paths that begin with 'data/', according to the
+    following. Paths longer than _maxstorepathlen bytes use the hashed
+    encoding. The rest use the default encoding.
+
+    Default encoding (reversible):
+
+    Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
+    characters are encoded as '~xx', where xx is the two digit hex code
+    of the character (see encodefilename).
+    Relevant path components consisting of Windows reserved filenames are
+    masked by encoding the third character ('aux' -> 'au~78', see auxencode).
+    '''
     if not path.startswith('data/'):
         return path
     # escape directories ending with .i and .d
     path = encodedir(path)
-    ndpath = path[len('data/'):]
-    res = 'data/' + auxencode(encodefilename(ndpath))
+    res = 'data/' + auxencode(encodefilename(path[_ndprefixlen:]))
     if len(res) > _maxstorepathlen:
-        digest = _sha(path).hexdigest()
-        aep = auxencode(lowerencode(ndpath))
-        _root, ext = os.path.splitext(aep)
-        parts = aep.split('/')
-        basename = parts[-1]
-        sdirs = []
-        for p in parts[:-1]:
-            d = p[:_dirprefixlen]
-            if d[-1] in '. ':
-                # Windows can't access dirs ending in period or space
-                d = d[:-1] + '_'
-            t = '/'.join(sdirs) + '/' + d
-            if len(t) > _maxshortdirslen:
-                break
-            sdirs.append(d)
-        dirs = '/'.join(sdirs)
-        if len(dirs) > 0:
-            dirs += '/'
-        res = 'dh/' + dirs + digest + ext
-        spaceleft = _maxstorepathlen - len(res)
-        if spaceleft > 0:
-            filler = basename[:spaceleft]
-            res = 'dh/' + dirs + filler + digest + ext
+        res = _hashencode(path, auxencode)
     return res
 
 def _calcmode(path):


More information about the Mercurial-devel mailing list