Solving long paths by hashing

Adrian Buehlmann adrian at cadifra.com
Sun Jun 29 09:28:53 CDT 2008


On 29.06.2008 15:30, Adrian Buehlmann wrote:
> I repeated Jesse's link to his patch in the first post of this thread. Here is
> the link again:
> 
> http://www.selenic.com/mercurial/bts/file520/prevent-excessively-long-repo-paths.diff
> 
> I'll post Jesse's patch below so you can read and comment it inline:
> 
> Prevent any file paths under .hg/store/data/ from getting dangerously long.
> If a working copy file has a path which is very long and uses many underscores
> or uppercase letters, the repo could be especially long. Since Windows imposes a
> path length maximum around 256 characters, this can make the repository
> impossible to check out for Windows users. Worse, you cannot correct the problem
> using 'hg ren' to shorten path names, since the old repo files will never be
> deleted! Fix is to check for potentially overlong repo names. If found, truncate
> them, inject a hash of the full path to prevent clashes, and write the full path
> to .hg/store/longnames so it can be recovered in the future. Since this is an
> incompatible repository format change, it has to be represented as a new
> requirement. Issue #839.
> 
> diff -r 04c76f296ad6 mercurial/hg.py
> --- a/mercurial/hg.py	Mon Dec 10 10:26:42 2007 -0600
> +++ b/mercurial/hg.py	Thu Dec 13 21:59:29 2007 -0500
> @@ -198,6 +198,7 @@ def clone(ui, source, dest=None, pull=Fa
>              dest_lock = lock.lock(os.path.join(dest_store, "lock"))
> 
>              files = ("data",
> +                     "longnames",
>                       "00manifest.d", "00manifest.i",
>                       "00changelog.d", "00changelog.i")
>              for f in files:
> diff -r 04c76f296ad6 mercurial/localrepo.py
> --- a/mercurial/localrepo.py	Mon Dec 10 10:26:42 2007 -0600
> +++ b/mercurial/localrepo.py	Thu Dec 13 21:59:29 2007 -0500
> @@ -11,10 +11,11 @@ import changelog, dirstate, filelog, man
>  import changelog, dirstate, filelog, manifest, context, weakref
>  import re, lock, transaction, tempfile, stat, errno, ui
>  import os, revlog, time, util, extensions, hook
> +import sha
> 
>  class localrepository(repo.repository):
>      capabilities = util.set(('lookup', 'changegroupsubset'))
> -    supported = ('revlogv1', 'store')
> +    supported = ('revlogv1', 'store', 'longnames')
> 
>      def __init__(self, parentui, path=None, create=0):
>          repo.repository.__init__(self)
> @@ -59,17 +60,7 @@ class localrepository(repo.repository):
>              if r not in self.supported:
>                  raise repo.RepoError(_("requirement '%s' not supported") % r)
> 
> -        # setup store
> -        if "store" in requirements:
> -            self.encodefn = util.encodefilename
> -            self.decodefn = util.decodefilename
> -            self.spath = os.path.join(self.path, "store")
> -        else:
> -            self.encodefn = lambda x: x
> -            self.decodefn = lambda x: x
> -            self.spath = self.path
> -        self.sopener = util.encodedopener(util.opener(self.spath),
> -                                          self.encodefn)
> +        self._setup_store(requirements, util.opener, os.path.join)
> 
>          self.ui = ui.ui(parentui=parentui)
>          try:
> @@ -83,6 +74,73 @@ class localrepository(repo.repository):
>          self.nodetagscache = None
>          self.filterpats = {}
>          self._transref = self._lockref = self._wlockref = None
> +
> +    def _setup_store(self, requirements, opener, pathjoiner):
> +        if "store" in requirements:
> +            self._longnames = None
> +            def load_longnames():
> +                if self._longnames == None:
> +                    self._longnames = {}
> +                    self._longnames_transient = {}
> +                    try:
> +                        self._longnames_file = opener(self.spath)('longnames',
> +                                                                  mode='a+')
> +                        for line in self._longnames_file:
> +                            datapath = line[0:-1]
> +                            sha1 = sha.new(datapath).hexdigest()
> +                            self._longnames[sha1] = datapath
> +                    except IOError, err:
> +                        if err.errno != errno.ENOENT:
> +                            raise
> +            maxlen = 150 - len('.hg/store/')
> +            def encode(s, write=False):
> +                r = util.encodefilename(s)
> +                if s.startswith('data/') and len(r) > maxlen:
> +                    datapath = s[5:]
> +                    sha1 = sha.new(datapath).hexdigest()
> +                    r = 'data/_=' + sha1 + '-' + \
> +                        re.sub(r"[^a-z0-9.-]", '_', datapath[48-maxlen:].lower())
> +                    load_longnames()
> +                    if not sha1 in self._longnames:
> +                        if write:
> +                            self._longnames[sha1] = datapath
> +                            self._longnames_file.write(datapath + '\n')
> +                            self._longnames_file.flush()
> +                            reqfile = self.opener("requires", "a+")
> +                            if 'longnames\n' not in reqfile.read().splitlines():
> +                                reqfile.write('longnames\n')
> +                            reqfile.close()

This means the requirements file may mutate during a repo's life.

For example, a pull into a repo may cause to add 'longnames' to the requirements
file ?

Interesting concept.

> +                        else:
> +                            self._longnames_transient[sha1] = datapath
> +                return r
> +            self.encodefn = encode
> +            def decode(s):
> +                r = util.decodefilename(s)
> +                if s.startswith('data/_='):
> +                    sha1 = s[7:].split('-', 2)[0]
> +                    load_longnames()
> +                    if sha1 in self._longnames:
> +                        r = 'data/' + self._longnames[sha1]
> +                    else:
> +                        r = 'data/' + self._longnames_transient[sha1]
> +                return r
> +            self.decodefn = decode
> +            self.spath = pathjoiner(self.path, "store")
> +        else:
> +            def encode(s, write=False):
> +                return s
> +            self.encodefn = encode
> +            self.decodefn = lambda x: x
> +            self.spath = self.path
> +        _spath = self.spath
> +        _encodefn = self.encodefn
> +        class long_name_opener(opener):
> +            def __init__(self):
> +                opener.__init__(self, _spath)
> +            def __call__(self, path, mode='r', text=False, atomictemp=False):
> +                encpath = _encodefn(path, write=('w' in mode or 'a' in mode))
> +                return opener.__call__(self, encpath, mode, text, atomictemp)
> +        self.sopener = long_name_opener()
> 
>      def __getattr__(self, name):
>          if name == 'changelog':

[snipped the rest for now]


More information about the Mercurial-devel mailing list