[PATCH] introduce fncache repository layout

Adrian Buehlmann adrian at cadifra.com
Sun Oct 19 17:34:24 CDT 2008


# HG changeset patch
# User Adrian Buehlmann <adrian at cadifra.com>
# Date 1224436327 -7200
# Node ID cf07754d418c7acaf4686a66dea30acd869b94a0
# Parent  834b8d7bd5978a85c22b880d0c24241416d24672
introduce fncache repository layout

* adds a new entry 'fncache' to '.hg/requires' for new repos
* writes new file '.hg/store/fncache'
* hash-encodes filenames with long paths (issue839)
* encodes Windows reserved filenames (issue793)

patch changelog:

2008-10-19:
* fixed typos (shorted -> shortened)
* using util.sha1 instead of import sha

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -16,7 +16,7 @@
 
 class localrepository(repo.repository):
     capabilities = util.set(('lookup', 'changegroupsubset'))
-    supported = ('revlogv1', 'store')
+    supported = ('revlogv1', 'store', 'fncache')
 
     def __init__(self, parentui, path=None, create=0):
         repo.repository.__init__(self)
@@ -35,6 +35,7 @@
                 if parentui.configbool('format', 'usestore', True):
                     os.mkdir(os.path.join(self.path, "store"))
                     requirements.append("store")
+                    requirements.append("fncache")
                     # create an invalid changelog
                     self.opener("00changelog.i", "a").write(
                         '\0\0\0\2' # represents revlogv2
diff --git a/mercurial/store.py b/mercurial/store.py
--- a/mercurial/store.py
+++ b/mercurial/store.py
@@ -5,7 +5,10 @@
 # This software may be used and distributed according to the terms
 # of the GNU General Public License, incorporated herein by reference.
 
+from i18n import _
 import os, stat, osutil, util
+
+_sha = util.sha1
 
 def _buildencodefun():
     e = '_'
@@ -34,6 +37,93 @@
             lambda s: "".join(list(decode(s))))
 
 encodefilename, decodefilename = _buildencodefun()
+
+def _build_lower_encodefun():
+    win_reserved = [ord(x) for x in '\\:*?"<>|']
+    cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
+    for x in (range(32) + range(126, 256) + win_reserved):
+        cmap[chr(x)] = "~%02x" % x
+    for x in range(ord("A"), ord("Z")+1):
+        cmap[chr(x)] = chr(x).lower()
+    return lambda s: "".join([cmap[c] for c in s])
+
+lowerencode = _build_lower_encodefun()
+
+_windows_reserved_filenames = '''con prn aux nul
+    com1 com2 com3 com4 com5 com6 com7 com8 com9
+    lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
+def auxencode(path):
+    res = []
+    for n in path.split('/'):
+        if n:
+            base = n.split('.')[0]
+            if base and (base in _windows_reserved_filenames):
+                # encode third letter ('aux' -> 'au~78')
+                ec = "~%02x" % ord(n[2])
+                n = n[0:2] + ec + n[3:]
+        res.append(n)
+    return '/'.join(res)
+
+MAX_PATH_LEN_IN_HGSTORE = 120
+DIR_PREFIX_LEN = 8
+_MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
+def hybridencode(path):
+    '''encodes path with a length limit
+
+    Encodes all paths that begin with 'data/', according to the following.
+
+    Default encoding (reversible):
+
+    Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
+    characters are encoded as '~xx', where xx is the two digit hex code
+    of the character (see encodefilename).
+    Relevant path components consisting of Windows reserved filenames are
+    masked by encoding the third character ('aux' -> 'au~78', see auxencode).
+
+    Hashed encoding (not reversible):
+
+    If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
+    non-reversible hybrid hashing of the path is done instead.
+    This encoding uses up to DIR_PREFIX_LEN characters of all directory
+    levels of the lowerencoded path, but not more levels than can fit into
+    _MAX_SHORTENED_DIRS_LEN.
+    Then follows the filler followed by the sha digest of the full path.
+    The filler is the beginning of the basename of the lowerencoded path
+    (the basename is everything after the last path separator). The filler
+    is as long as possible, filling in characters from the basename until
+    the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
+    of the basename have been taken).
+    The extension (e.g. '.i' or '.d') is preserved.
+
+    The string 'data/' at the beginning is replaced with 'dh/', if the hashed
+    encoding was used.
+    '''
+    if not path.startswith('data/'):
+        return path
+    ndpath = path[len('data/'):]
+    res = 'data/' + auxencode(encodefilename(ndpath))
+    if len(res) > MAX_PATH_LEN_IN_HGSTORE:
+        digest = _sha(path).hexdigest()
+        aep = auxencode(lowerencode(ndpath))
+        _root, ext = os.path.splitext(aep)
+        parts = aep.split('/')
+        basename = parts[-1]
+        sdirs = []
+        for p in parts[:-1]:
+            d = p[:DIR_PREFIX_LEN]
+            t = '/'.join(sdirs) + '/' + d
+            if len(t) > _MAX_SHORTENED_DIRS_LEN:
+                break
+            sdirs.append(d)
+        dirs = '/'.join(sdirs)
+        if len(dirs) > 0:
+            dirs += '/'
+        res = 'dh/' + dirs + digest + ext
+        space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
+        if space_left > 0:
+            filler = basename[:space_left]
+            res = 'dh/' + dirs + filler + digest + ext
+    return res
 
 def _calcmode(path):
     try:
@@ -120,8 +210,83 @@
         return (['requires', '00changelog.i'] +
                 [self.pathjoiner('store', f) for f in _data.split()])
 
+def fncache(opener):
+    '''yields the entries in the fncache file'''
+    try:
+        fp = opener('fncache', mode='rb')
+    except IOError:
+        # skip nonexistent file
+        return
+    for n, line in enumerate(fp):
+        if (len(line) < 2) or (line[-1] != '\n'):
+            t = _('invalid entry in fncache, line %s') % (n + 1)
+            raise util.Abort(t)
+        yield line[:-1]
+    fp.close()
+
+class fncacheopener(object):
+    def __init__(self, opener):
+        self.opener = opener
+        self.entries = None
+
+    def loadfncache(self):
+        self.entries = {}
+        for f in fncache(self.opener):
+            self.entries[f] = True
+
+    def __call__(self, path, mode='r', *args, **kw):
+        if mode not in ('r', 'rb') and path.startswith('data/'):
+            if self.entries is None:
+                self.loadfncache()
+            if path not in self.entries:
+                self.opener('fncache', 'ab').write(path + '\n')
+                # fncache may contain non-existent files after rollback / strip
+                self.entries[path] = True
+        return self.opener(hybridencode(path), mode, *args, **kw)
+
+class fncachestore(basicstore):
+    def __init__(self, path, opener, pathjoiner):
+        self.pathjoiner = pathjoiner
+        self.path = self.pathjoiner(path, 'store')
+        self.createmode = _calcmode(self.path)
+        self._op = opener(self.path)
+        self._op.createmode = self.createmode
+        self.opener = fncacheopener(self._op)
+
+    def join(self, f):
+        return self.pathjoiner(self.path, hybridencode(f))
+
+    def datafiles(self):
+        rewrite = False
+        existing = []
+        pjoin = self.pathjoiner
+        spath = self.path
+        for f in fncache(self._op):
+            ef = hybridencode(f)
+            try:
+                st = os.stat(pjoin(spath, ef))
+                yield f, ef, st.st_size
+                existing.append(f)
+            except OSError:
+                # nonexistent entry
+                rewrite = True
+        if rewrite:
+            # rewrite fncache to remove nonexistent entries
+            # (may be caused by rollback / strip)
+            fp = self._op('fncache', mode='wb')
+            for p in existing:
+                fp.write(p + '\n')
+            fp.close()
+
+    def copylist(self):
+        d = _data + ' dh fncache' 
+        return (['requires', '00changelog.i'] +
+                [self.pathjoiner('store', f) for f in d.split()])
+
 def store(requirements, path, opener, pathjoiner=None):
     pathjoiner = pathjoiner or os.path.join
     if 'store' in requirements:
+        if 'fncache' in requirements:
+            return fncachestore(path, opener, pathjoiner)
         return encodedstore(path, opener, pathjoiner)
     return basicstore(path, opener, pathjoiner)
diff --git a/tests/test-dumprevlog b/tests/test-dumprevlog
--- a/tests/test-dumprevlog
+++ b/tests/test-dumprevlog
@@ -2,6 +2,7 @@
 
 CONTRIBDIR=$TESTDIR/../contrib
 
+echo % prepare repo-a
 mkdir repo-a
 cd repo-a
 hg init
@@ -18,11 +19,13 @@
 
 hg verify
 
-echo dumping revlog of file a to stdout:
+echo
+echo % dumping revlog of file a to stdout
 python $CONTRIBDIR/dumprevlog .hg/store/data/a.i
-echo dumprevlog done
+echo % dumprevlog done
 
-# dump all revlogs to file repo.dump
+echo
+echo % dump all revlogs to file repo.dump
 find .hg/store -name "*.i" | sort | xargs python $CONTRIBDIR/dumprevlog > ../repo.dump
 
 cd ..
@@ -31,17 +34,28 @@
 cd repo-b
 hg init
 
-echo undumping:
+echo
+echo % undumping into repo-b
 python $CONTRIBDIR/undumprevlog < ../repo.dump
-echo undumping done
+echo % undumping done
 
+cd ..
+
+echo
+echo % clone --pull repo-b repo-c  to rebuild fncache
+hg clone --pull -U repo-b repo-c
+
+cd repo-c
+
+echo
+echo % verify repo-c
 hg verify
 
 cd ..
 
-echo comparing repos:
-hg -R repo-b incoming repo-a
-hg -R repo-a incoming repo-b
-echo comparing done
+echo
+echo % comparing repos
+hg -R repo-c incoming repo-a
+hg -R repo-a incoming repo-c
 
 exit 0
diff --git a/tests/test-dumprevlog.out b/tests/test-dumprevlog.out
--- a/tests/test-dumprevlog.out
+++ b/tests/test-dumprevlog.out
@@ -1,9 +1,11 @@
+% prepare repo-a
 checking changesets
 checking manifests
 crosschecking files in changesets and manifests
 checking files
 1 files, 3 changesets, 3 total revisions
-dumping revlog of file a to stdout:
+
+% dumping revlog of file a to stdout
 file: .hg/store/data/a.i
 node: 183d2312b35066fb6b3b449b84efc370d50993d0
 linkrev: 0
@@ -32,22 +34,34 @@
 adding more to file a
 
 -end-
-dumprevlog done
-undumping:
+% dumprevlog done
+
+% dump all revlogs to file repo.dump
+
+% undumping into repo-b
 .hg/store/00changelog.i
 .hg/store/00manifest.i
 .hg/store/data/a.i
-undumping done
+% undumping done
+
+% clone --pull repo-b repo-c to rebuild fncache
+requesting all changes
+adding changesets
+adding manifests
+adding file changes
+added 3 changesets with 3 changes to 1 files
+
+% verify repo-c
 checking changesets
 checking manifests
 crosschecking files in changesets and manifests
 checking files
 1 files, 3 changesets, 3 total revisions
-comparing repos:
+
+% comparing repos
 comparing with repo-a
 searching for changes
 no changes found
-comparing with repo-b
+comparing with repo-c
 searching for changes
 no changes found
-comparing done
diff --git a/tests/test-inherit-mode.out b/tests/test-inherit-mode.out
--- a/tests/test-inherit-mode.out
+++ b/tests/test-inherit-mode.out
@@ -22,6 +22,7 @@
 00770 ./.hg/store/data/dir/
 00660 ./.hg/store/data/dir/bar.i
 00660 ./.hg/store/data/foo.i
+00660 ./.hg/store/fncache
 00660 ./.hg/store/undo
 00660 ./.hg/undo.branch
 00660 ./.hg/undo.dirstate
@@ -49,6 +50,7 @@
 00770 ../push/.hg/store/data/dir/
 00660 ../push/.hg/store/data/dir/bar.i
 00660 ../push/.hg/store/data/foo.i
+00660 ../push/.hg/store/fncache
 00660 ../push/.hg/store/undo
 00660 ../push/.hg/undo.branch
 00660 ../push/.hg/undo.dirstate
diff --git a/tests/test-init.out b/tests/test-init.out
--- a/tests/test-init.out
+++ b/tests/test-init.out
@@ -3,6 +3,7 @@
 00changelog.i created
 revlogv1
 store
+fncache
 adding foo
 # creating repo with old format
 revlogv1
diff --git a/tests/test-verify.out b/tests/test-verify.out
--- a/tests/test-verify.out
+++ b/tests/test-verify.out
@@ -17,7 +17,6 @@
 checking manifests
 crosschecking files in changesets and manifests
 checking files
- ?: cannot decode filename 'data/X_f_o_o.txt.i'
  data/FOO.txt.i at 0: missing revlog!
  0: empty or missing FOO.txt
  FOO.txt at 0: f62022d3d590 in manifests not found
@@ -27,8 +26,6 @@
  data/bar.txt.i at 0: missing revlog!
  0: empty or missing bar.txt
  bar.txt at 0: 256559129457 in manifests not found
-warning: orphan revlog 'data/xbar.txt.i'
 3 files, 1 changesets, 0 total revisions
-1 warnings encountered!
-10 integrity errors encountered!
+9 integrity errors encountered!
 (first damaged changeset appears to be 0)


More information about the Mercurial-devel mailing list