[PATCH] V13 of experiment for a simpler path encoding for hashed paths (for "fncache2")
Adrian Buehlmann
adrian at cadifra.com
Sat Sep 29 17:30:33 CDT 2012
On 2012-09-29 11:29, Adrian Buehlmann wrote:
> # HG changeset patch
> # User Adrian Buehlmann <adrian at cadifra.com>
> # Date 1348910761 -7200
> # Node ID ca5bca5255271ae28320c4d107b11ba5c9dab1de
> # Parent d6c7128e550de7d74aec27e86532ceec75a5c38f
> V13 of experiment for a simpler path encoding for hashed paths (for "fncache2")
>
> Changes compared to V12:
>
> - Escapes com0 and lpt0 as well. Explorer of Windows 7 refuses to create these,
> even though those names are not specified by Microsoft as being reserved.
>
> Changes in testcases:
>
> cutdirs('data/auxx/conx/prnx/nulx/comx/lptx/foo.i')
> 'data/auxx/conx/prnx/nulx/comx/lptx/foo~i'
> cutdirs('data/com0/com1/com9/lpt0/lpt1/lpt9/foo.i')
> - 'data/com0/com~/com~/lpt0/lpt~/lpt~/foo~i'
> + 'data/com~/com~/com~/lpt~/lpt~/lpt~/foo~i'
> +cutdirs('data/nul.txt/aux.txt/foo.i')
> + 'data/nul~txt/aux~txt/foo~i'
>
> cutdirs('data/common/auxiliary/nulling/console/bla.com/foo.i')
> 'data/common/auxiliar/nulling/console/bla~com/foo~i'
>
> diff --git a/mercurial/parsers.c b/mercurial/parsers.c
> --- a/mercurial/parsers.c
> +++ b/mercurial/parsers.c
> @@ -1508,6 +1508,7 @@
>
> PyObject *encodedir(PyObject *self, PyObject *args);
> PyObject *pathencode(PyObject *self, PyObject *args);
> +PyObject *cutdirs(PyObject *self, PyObject *args);
>
> static PyMethodDef methods[] = {
> {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
> @@ -1516,6 +1517,7 @@
> {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
> {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
> {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
> + {"cutdirs", cutdirs, METH_VARARGS, "fncache-encode a path\n"},
> {NULL, NULL}
> };
>
> diff --git a/mercurial/pathencode.c b/mercurial/pathencode.c
> --- a/mercurial/pathencode.c
> +++ b/mercurial/pathencode.c
> @@ -481,6 +481,92 @@
>
> static const Py_ssize_t maxstorepathlen = 120;
>
> +static const char encchar[256] =
> + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
> + "~!~#$%&'()~+,-~~0123456789~;~=~~"
> + "@abcdefghijklmnopqrstuvwxyz[~]^_"
> + "`abcdefghijklmnopqrstuvwxyz{~}~~"
> + "~abcdefghijklmnopqrstuvwxyz{~}~~"
> + "~!~#$%&'()~+,-~~0123456789~;~=~~"
> + "@abcdefghijklmnopqrstuvwxyz[~]^_"
> + "`abcdefghijklmnopqrstuvwxyz{~}~~";
> +
> +/* this encoding folds */
> +static inline char encodechar(char c)
> +{
> + return encchar[0xff & c];
> +}
> +
> +static Py_ssize_t _cutdirs(char *dest, Py_ssize_t destlen, size_t destsize,
> + const char *src, Py_ssize_t len)
> +{
> + Py_ssize_t i = 0, spaceleft = maxstorepathlen - 40 + 1;
> + char seg[8];
> + int seglen = 0;
> + uint32_t cmp;
> +
> + while (i < len && spaceleft > 0) {
> + if (src[i] == '/' || src[i] == '\0') {
> + if (seglen != 0) {
> + if (seglen == 3) {
> + cmp = seg[0] << 16 | seg[1] << 8 | seg[2];
> + if ( cmp == 0x617578 /* aux */
> + || cmp == 0x636f6e /* con */
> + || cmp == 0x70726e /* prn */
> + || cmp == 0x6e756c /* nul */)
> + seg[2] = '~';
> + }
> + else if (seglen == 4 && seg[3] <= '9'
> + && seg[3] >= '0') {
> + cmp = seg[0] << 16 | seg[1] << 8 | seg[2];
> + if ( cmp == 0x636f6d /* com0..9 */
> + || cmp == 0x6c7074 /* lpt0..9 */)
> + seg[3] = '~';
> + }
> + memcopy(dest, &destlen, destsize, &seg, seglen);
> + seglen = 0;
> + }
> + charcopy(dest, &destlen, destsize, src[i++]);
> + spaceleft--;
> + }
> + else if (seglen == sizeof(seg)) {
> + i++;
> + }
> + else {
> + seg[seglen++] = encodechar(src[i++]);
> + spaceleft--;
> + }
> + }
> +
> + return destlen;
> +}
cutdirs in Python
diff --git a/mercurial/store.py b/mercurial/store.py
--- a/mercurial/store.py
+++ b/mercurial/store.py
@@ -185,6 +185,41 @@
_dirprefixlen = 8
_maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
+_encchar = ("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ "~!~#$%&'()~+,-~~0123456789~;~=~~"
+ "@abcdefghijklmnopqrstuvwxyz[~]^_"
+ "`abcdefghijklmnopqrstuvwxyz{~}~~"
+ "~abcdefghijklmnopqrstuvwxyz{~}~~"
+ "~!~#$%&'()~+,-~~0123456789~;~=~~"
+ "@abcdefghijklmnopqrstuvwxyz[~]^_"
+ "`abcdefghijklmnopqrstuvwxyz{~}~~")
+
+def _foldencode(f): # preserves size
+ f = ''.join([_encchar[ord(c)] for c in f])
+ l = len(f)
+ if l == 3 and f[:3] in _winres3:
+ f = f[:2] + '~'
+ if (l == 4 and f[3] <= '9' and f[3] >= '0'
+ and f[:3] in _winres4):
+ f = f[:3] + '~'
+ return f
+
+def cutdirs(path):
+ parts = []
+ totallen = 0
+ for s in path.split('/'):
+ if len(s) > 8:
+ s = s[:8]
+ if totallen:
+ newlen = totallen + 1 + len(s)
+ else:
+ newlen = len(s)
+ if newlen > _maxstorepathlen - 40:
+ break
+ parts.append(s)
+ totallen = newlen
+ return '/'.join(map(_foldencode, parts))
+
def _hashencode(path, dotencode):
digest = _sha(path).hexdigest()
le = lowerencode(path).split('/')[1:]
More information about the Mercurial-devel
mailing list