Solving long paths by hashing
Adrian Buehlmann
adrian at cadifra.com
Sun Jun 29 09:28:53 CDT 2008
On 29.06.2008 15:30, Adrian Buehlmann wrote:
> I repeated Jesse's link to his patch in the first post of this thread. Here is
> the link again:
>
> http://www.selenic.com/mercurial/bts/file520/prevent-excessively-long-repo-paths.diff
>
> I'll post Jesse's patch below so you can read and comment it inline:
>
> Prevent any file paths under .hg/store/data/ from getting dangerously long.
> If a working copy file has a path which is very long and uses many underscores
> or uppercase letters, the repo could be especially long. Since Windows imposes a
> path length maximum around 256 characters, this can make the repository
> impossible to check out for Windows users. Worse, you cannot correct the problem
> using 'hg ren' to shorten path names, since the old repo files will never be
> deleted! Fix is to check for potentially overlong repo names. If found, truncate
> them, inject a hash of the full path to prevent clashes, and write the full path
> to .hg/store/longnames so it can be recovered in the future. Since this is an
> incompatible repository format change, it has to be represented as a new
> requirement. Issue #839.
>
> diff -r 04c76f296ad6 mercurial/hg.py
> --- a/mercurial/hg.py Mon Dec 10 10:26:42 2007 -0600
> +++ b/mercurial/hg.py Thu Dec 13 21:59:29 2007 -0500
> @@ -198,6 +198,7 @@ def clone(ui, source, dest=None, pull=Fa
> dest_lock = lock.lock(os.path.join(dest_store, "lock"))
>
> files = ("data",
> + "longnames",
> "00manifest.d", "00manifest.i",
> "00changelog.d", "00changelog.i")
> for f in files:
> diff -r 04c76f296ad6 mercurial/localrepo.py
> --- a/mercurial/localrepo.py Mon Dec 10 10:26:42 2007 -0600
> +++ b/mercurial/localrepo.py Thu Dec 13 21:59:29 2007 -0500
> @@ -11,10 +11,11 @@ import changelog, dirstate, filelog, man
> import changelog, dirstate, filelog, manifest, context, weakref
> import re, lock, transaction, tempfile, stat, errno, ui
> import os, revlog, time, util, extensions, hook
> +import sha
>
> class localrepository(repo.repository):
> capabilities = util.set(('lookup', 'changegroupsubset'))
> - supported = ('revlogv1', 'store')
> + supported = ('revlogv1', 'store', 'longnames')
>
> def __init__(self, parentui, path=None, create=0):
> repo.repository.__init__(self)
> @@ -59,17 +60,7 @@ class localrepository(repo.repository):
> if r not in self.supported:
> raise repo.RepoError(_("requirement '%s' not supported") % r)
>
> - # setup store
> - if "store" in requirements:
> - self.encodefn = util.encodefilename
> - self.decodefn = util.decodefilename
> - self.spath = os.path.join(self.path, "store")
> - else:
> - self.encodefn = lambda x: x
> - self.decodefn = lambda x: x
> - self.spath = self.path
> - self.sopener = util.encodedopener(util.opener(self.spath),
> - self.encodefn)
> + self._setup_store(requirements, util.opener, os.path.join)
>
> self.ui = ui.ui(parentui=parentui)
> try:
> @@ -83,6 +74,73 @@ class localrepository(repo.repository):
> self.nodetagscache = None
> self.filterpats = {}
> self._transref = self._lockref = self._wlockref = None
> +
> + def _setup_store(self, requirements, opener, pathjoiner):
> + if "store" in requirements:
> + self._longnames = None
> + def load_longnames():
> + if self._longnames == None:
> + self._longnames = {}
> + self._longnames_transient = {}
> + try:
> + self._longnames_file = opener(self.spath)('longnames',
> + mode='a+')
> + for line in self._longnames_file:
> + datapath = line[0:-1]
> + sha1 = sha.new(datapath).hexdigest()
> + self._longnames[sha1] = datapath
> + except IOError, err:
> + if err.errno != errno.ENOENT:
> + raise
> + maxlen = 150 - len('.hg/store/')
> + def encode(s, write=False):
> + r = util.encodefilename(s)
> + if s.startswith('data/') and len(r) > maxlen:
> + datapath = s[5:]
> + sha1 = sha.new(datapath).hexdigest()
> + r = 'data/_=' + sha1 + '-' + \
> + re.sub(r"[^a-z0-9.-]", '_', datapath[48-maxlen:].lower())
> + load_longnames()
> + if not sha1 in self._longnames:
> + if write:
> + self._longnames[sha1] = datapath
> + self._longnames_file.write(datapath + '\n')
> + self._longnames_file.flush()
> + reqfile = self.opener("requires", "a+")
> + if 'longnames\n' not in reqfile.read().splitlines():
> + reqfile.write('longnames\n')
> + reqfile.close()
This means the requirements file may mutate during a repo's life.
For example, a pull into a repo may cause to add 'longnames' to the requirements
file ?
Interesting concept.
> + else:
> + self._longnames_transient[sha1] = datapath
> + return r
> + self.encodefn = encode
> + def decode(s):
> + r = util.decodefilename(s)
> + if s.startswith('data/_='):
> + sha1 = s[7:].split('-', 2)[0]
> + load_longnames()
> + if sha1 in self._longnames:
> + r = 'data/' + self._longnames[sha1]
> + else:
> + r = 'data/' + self._longnames_transient[sha1]
> + return r
> + self.decodefn = decode
> + self.spath = pathjoiner(self.path, "store")
> + else:
> + def encode(s, write=False):
> + return s
> + self.encodefn = encode
> + self.decodefn = lambda x: x
> + self.spath = self.path
> + _spath = self.spath
> + _encodefn = self.encodefn
> + class long_name_opener(opener):
> + def __init__(self):
> + opener.__init__(self, _spath)
> + def __call__(self, path, mode='r', text=False, atomictemp=False):
> + encpath = _encodefn(path, write=('w' in mode or 'a' in mode))
> + return opener.__call__(self, encpath, mode, text, atomictemp)
> + self.sopener = long_name_opener()
>
> def __getattr__(self, name):
> if name == 'changelog':
[snipped the rest for now]
More information about the Mercurial-devel
mailing list