Solving long paths by hashing

Adrian Buehlmann adrian at cadifra.com
Sun Jun 29 08:30:28 CDT 2008


On 29.06.2008 15:06, Dirkjan Ochtman wrote:
> Adrian Buehlmann wrote:
>> Sounds like you would take Jesse's patch then?
> 
> I haven't seen it, but the concept sounds interesting to me.
> 

I repeated Jesse's link to his patch in the first post of this thread. Here is
the link again:

http://www.selenic.com/mercurial/bts/file520/prevent-excessively-long-repo-paths.diff

I'll post Jesse's patch below so you can read and comment it inline:

Prevent any file paths under .hg/store/data/ from getting dangerously long.
If a working copy file has a path which is very long and uses many underscores
or uppercase letters, the repo could be especially long. Since Windows imposes a
path length maximum around 256 characters, this can make the repository
impossible to check out for Windows users. Worse, you cannot correct the problem
using 'hg ren' to shorten path names, since the old repo files will never be
deleted! Fix is to check for potentially overlong repo names. If found, truncate
them, inject a hash of the full path to prevent clashes, and write the full path
to .hg/store/longnames so it can be recovered in the future. Since this is an
incompatible repository format change, it has to be represented as a new
requirement. Issue #839.

diff -r 04c76f296ad6 mercurial/hg.py
--- a/mercurial/hg.py	Mon Dec 10 10:26:42 2007 -0600
+++ b/mercurial/hg.py	Thu Dec 13 21:59:29 2007 -0500
@@ -198,6 +198,7 @@ def clone(ui, source, dest=None, pull=Fa
             dest_lock = lock.lock(os.path.join(dest_store, "lock"))

             files = ("data",
+                     "longnames",
                      "00manifest.d", "00manifest.i",
                      "00changelog.d", "00changelog.i")
             for f in files:
diff -r 04c76f296ad6 mercurial/localrepo.py
--- a/mercurial/localrepo.py	Mon Dec 10 10:26:42 2007 -0600
+++ b/mercurial/localrepo.py	Thu Dec 13 21:59:29 2007 -0500
@@ -11,10 +11,11 @@ import changelog, dirstate, filelog, man
 import changelog, dirstate, filelog, manifest, context, weakref
 import re, lock, transaction, tempfile, stat, errno, ui
 import os, revlog, time, util, extensions, hook
+import sha

 class localrepository(repo.repository):
     capabilities = util.set(('lookup', 'changegroupsubset'))
-    supported = ('revlogv1', 'store')
+    supported = ('revlogv1', 'store', 'longnames')

     def __init__(self, parentui, path=None, create=0):
         repo.repository.__init__(self)
@@ -59,17 +60,7 @@ class localrepository(repo.repository):
             if r not in self.supported:
                 raise repo.RepoError(_("requirement '%s' not supported") % r)

-        # setup store
-        if "store" in requirements:
-            self.encodefn = util.encodefilename
-            self.decodefn = util.decodefilename
-            self.spath = os.path.join(self.path, "store")
-        else:
-            self.encodefn = lambda x: x
-            self.decodefn = lambda x: x
-            self.spath = self.path
-        self.sopener = util.encodedopener(util.opener(self.spath),
-                                          self.encodefn)
+        self._setup_store(requirements, util.opener, os.path.join)

         self.ui = ui.ui(parentui=parentui)
         try:
@@ -83,6 +74,73 @@ class localrepository(repo.repository):
         self.nodetagscache = None
         self.filterpats = {}
         self._transref = self._lockref = self._wlockref = None
+
+    def _setup_store(self, requirements, opener, pathjoiner):
+        if "store" in requirements:
+            self._longnames = None
+            def load_longnames():
+                if self._longnames == None:
+                    self._longnames = {}
+                    self._longnames_transient = {}
+                    try:
+                        self._longnames_file = opener(self.spath)('longnames',
+                                                                  mode='a+')
+                        for line in self._longnames_file:
+                            datapath = line[0:-1]
+                            sha1 = sha.new(datapath).hexdigest()
+                            self._longnames[sha1] = datapath
+                    except IOError, err:
+                        if err.errno != errno.ENOENT:
+                            raise
+            maxlen = 150 - len('.hg/store/')
+            def encode(s, write=False):
+                r = util.encodefilename(s)
+                if s.startswith('data/') and len(r) > maxlen:
+                    datapath = s[5:]
+                    sha1 = sha.new(datapath).hexdigest()
+                    r = 'data/_=' + sha1 + '-' + \
+                        re.sub(r"[^a-z0-9.-]", '_', datapath[48-maxlen:].lower())
+                    load_longnames()
+                    if not sha1 in self._longnames:
+                        if write:
+                            self._longnames[sha1] = datapath
+                            self._longnames_file.write(datapath + '\n')
+                            self._longnames_file.flush()
+                            reqfile = self.opener("requires", "a+")
+                            if 'longnames\n' not in reqfile.read().splitlines():
+                                reqfile.write('longnames\n')
+                            reqfile.close()
+                        else:
+                            self._longnames_transient[sha1] = datapath
+                return r
+            self.encodefn = encode
+            def decode(s):
+                r = util.decodefilename(s)
+                if s.startswith('data/_='):
+                    sha1 = s[7:].split('-', 2)[0]
+                    load_longnames()
+                    if sha1 in self._longnames:
+                        r = 'data/' + self._longnames[sha1]
+                    else:
+                        r = 'data/' + self._longnames_transient[sha1]
+                return r
+            self.decodefn = decode
+            self.spath = pathjoiner(self.path, "store")
+        else:
+            def encode(s, write=False):
+                return s
+            self.encodefn = encode
+            self.decodefn = lambda x: x
+            self.spath = self.path
+        _spath = self.spath
+        _encodefn = self.encodefn
+        class long_name_opener(opener):
+            def __init__(self):
+                opener.__init__(self, _spath)
+            def __call__(self, path, mode='r', text=False, atomictemp=False):
+                encpath = _encodefn(path, write=('w' in mode or 'a' in mode))
+                return opener.__call__(self, encpath, mode, text, atomictemp)
+        self.sopener = long_name_opener()

     def __getattr__(self, name):
         if name == 'changelog':
diff -r 04c76f296ad6 mercurial/statichttprepo.py
--- a/mercurial/statichttprepo.py	Mon Dec 10 10:26:42 2007 -0600
+++ b/mercurial/statichttprepo.py	Thu Dec 13 21:59:29 2007 -0500
@@ -19,14 +19,16 @@ class rangereader(httprangereader.httpra
             raise IOError(None, inst)
         except urllib2.URLError, inst:
             raise IOError(None, inst.reason[1])
+    def __iter__(self):
+        return self.read().splitlines(True).__iter__()

-def opener(base):
+class opener:
     """return a function that opens files over http"""
-    p = base
-    def o(path, mode="r"):
-        f = "/".join((p, urllib.quote(path)))
+    def __init__(self, base):
+        self.p = base
+    def __call__(self, path, mode="r", text=False, atomictemp=False):
+        f = "/".join((self.p, urllib.quote(path)))
         return rangereader(f)
-    return o

 class statichttprepository(localrepo.localrepository):
     def __init__(self, ui, path):
@@ -45,16 +47,7 @@ class statichttprepository(localrepo.loc
             if r not in self.supported:
                 raise repo.RepoError(_("requirement '%s' not supported") % r)

-        # setup store
-        if "store" in requirements:
-            self.encodefn = util.encodefilename
-            self.decodefn = util.decodefilename
-            self.spath = self.path + "/store"
-        else:
-            self.encodefn = lambda x: x
-            self.decodefn = lambda x: x
-            self.spath = self.path
-        self.sopener = util.encodedopener(opener(self.spath), self.encodefn)
+        self._setup_store(requirements, opener, (lambda a, b: a + '/' + b))

         self.manifest = manifest.manifest(self.sopener)
         self.changelog = changelog.changelog(self.sopener)
diff -r 04c76f296ad6 mercurial/util.py
--- a/mercurial/util.py	Mon Dec 10 10:26:42 2007 -0600
+++ b/mercurial/util.py	Thu Dec 13 21:59:29 2007 -0500
@@ -1266,11 +1266,6 @@ def _buildencodefun():

 encodefilename, decodefilename = _buildencodefun()

-def encodedopener(openerfn, fn):
-    def o(path, *args, **kw):
-        return openerfn(fn(path), *args, **kw)
-    return o
-
 def mktempcopy(name, emptyok=False):
     """Create a temporary file with the same contents from name

diff -r 04c76f296ad6 tests/test-long-filenames
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-long-filenames	Thu Dec 13 21:59:29 2007 -0500
@@ -0,0 +1,18 @@
+#!/bin/sh
+hg init
+echo content > 'An extremely long file name which I would not expect you would normally create as such, but which can easily arise when working with deeply nested directories.'
+hg -q add
+hg ci -m added
+find .hg | cut -c151- | egrep . || true
+hg ann An*
+hg ren An* shorter-file
+hg ci -m renamed
+find .hg | cut -c151- | egrep . || true
+hg ann shorter-file
+wc -l .hg/store/longnames
+fgrep longnames .hg/requires
+hg clone . clone
+cd clone
+hg up -r0
+ls | wc
+find .hg | cut -c151- | egrep . || true
diff -r 04c76f296ad6 tests/test-long-filenames.out
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-long-filenames.out	Thu Dec 13 21:59:29 2007 -0500
@@ -0,0 +1,7 @@
+0: content
+1: content
+1 .hg/store/longnames
+longnames
+1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+1 files updated, 0 files merged, 1 files removed, 0 files unresolved
+      1      27     160


More information about the Mercurial-devel mailing list