[PATCH] store: rewrite fncache path mangling code in C

Bryan O'Sullivan bos at serpentine.com
Mon Aug 27 16:00:33 CDT 2012


# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1346101191 25200
# Node ID 54414ba3b42b2cc38e618782c15aa13070e7a6c7
# Parent  99a2a4ae35e2180b7f825ef2677c36d538eac4ba
store: rewrite fncache path mangling code in C

The Python path mangling code used by fncache is complex and (perhaps
surprisingly) slow enough to negatively affect the overall performance
of Mercurial.

The name mangling scheme used by fncache consists of two paths, both of
which are translated into C in this patch.

For a short path (< 120 bytes), the Python code can be reduced to a
fairly tractable state machine that mangles the path in a single pass.

For longer paths, the mangling scheme is much more complicated, has
poor data dependencies, and requires a name to be hashed using SHA-1.
It's almost impossible to see how to implement this without multiple
copying passes.

Raw performance: I measured in a repo containing 150,000 files in its tip
manifest, with a median path name length of 57 bytes, and 95th percentile
of 96 bytes.

In this repo, the Python code takes 3.95 seconds to encode all path names,
while the C code (called from Python) takes 0.15 seconds.

Across several other large repositories, I've measured the speedup from
the C code at between 25x and 40x.  For path names above 120 bytes where
the hashed encoding method must be used, the speedup is about 20x.

Practical performance:

  "hg clone -U --uncompressed" of mozilla-central over http
    Python: 20.5 seconds
    mixed:  17.5 (server C, client Python)
    all C:  14.8

  "hg update" from null to tip in mozilla-central
    Python: 30.1 seconds
    C:      28.7

diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -1506,11 +1506,14 @@
 
 static char parsers_doc[] = "Efficient content parsing.";
 
+PyObject *pathencode(PyObject *self, PyObject *args);
+
 static PyMethodDef methods[] = {
 	{"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
 	{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
 	{"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
 	{"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
+	{"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
 	{NULL, NULL}
 };
 
diff --git a/mercurial/pathencode.c b/mercurial/pathencode.c
new file mode 100644
--- /dev/null
+++ b/mercurial/pathencode.c
@@ -0,0 +1,589 @@
+#include <Python.h>
+#include <assert.h>
+#include <ctype.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sha1.h"
+
+enum dir_state {
+	DSTART,
+	DDOT,
+	DH,
+	DHG,
+	DDEFAULT,
+};
+
+enum path_state {
+	START,
+	A,
+	AU,
+	THREE,
+	C,
+	CO,
+	COMLPT,
+	COMLPTN,
+	L,
+	LP,
+	N,
+	NU,
+	P,
+	PR,
+	DOT,
+	H,
+	HG,
+	SPACE,
+	DEFAULT,
+};
+
+static inline int check_bit(const uint32_t bitset[], char c)
+{
+	return bitset[c >> 5] & (1 << (c & 31));
+}
+
+static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
+			    char c)
+{
+	if (dest) {
+		assert(*destlen < destsize);
+		dest[*destlen] = c;
+	}
+	(*destlen)++;
+}
+
+static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
+			   const void *src, Py_ssize_t len)
+{
+	if (dest) {
+		assert(*destlen + len < destsize);
+		memcpy((void *)&dest[*destlen], src, len);
+	}
+	*destlen += len;
+}
+
+static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
+			     uint8_t c)
+{
+	static const char hexdigit[] = "0123456789abcdef";
+
+	charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
+	charcopy(dest, destlen, destsize, hexdigit[c & 15]);
+}
+
+static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
+			   char c)
+{
+	charcopy(dest, destlen, destsize, '~');
+	hexencode(dest, destlen, destsize, c);
+}
+
+static Py_ssize_t _encode(const uint32_t twobytes[8],
+			  const uint32_t onebyte[8],
+			  const uint32_t fastpath[8],
+			  char *dest, Py_ssize_t destlen, size_t destsize,
+			  const char *src, Py_ssize_t len)
+{
+	enum path_state state = START;
+	Py_ssize_t i = 0;
+
+	while (i < len) {
+		switch (state) {
+		case START:
+			switch (src[i]) {
+			case '/':
+				charcopy(dest, &destlen, destsize, src[i++]);
+				break;
+			case '.':
+			case ' ':
+				state = DEFAULT;
+				escape3(dest, &destlen, destsize, src[i++]);
+				break;
+			case 'a':
+				state = A;
+				charcopy(dest, &destlen, destsize, src[i++]);
+				break;
+			case 'c':
+				state = C;
+				charcopy(dest, &destlen, destsize, src[i++]);
+				break;
+			case 'l':
+				state = L;
+				charcopy(dest, &destlen, destsize, src[i++]);
+				break;
+			case 'n':
+				state = N;
+				charcopy(dest, &destlen, destsize, src[i++]);
+				break;
+			case 'p':
+				state = P;
+				charcopy(dest, &destlen, destsize, src[i++]);
+				break;
+			default:
+				state = DEFAULT;
+				break;
+			}
+			break;
+		case A:
+			if (src[i] == 'u') {
+				state = AU;
+				charcopy(dest, &destlen, destsize, src[i++]);
+			}
+			else state = DEFAULT;
+			break;
+		case AU:
+			if (src[i] == 'x') {
+				state = THREE;
+				i++;
+			}
+			else state = DEFAULT;
+			break;
+		case THREE:
+			state = DEFAULT;
+			switch (src[i]) {
+			case '.':
+			case '/':
+			case '\0':
+				escape3(dest, &destlen, destsize, src[i - 1]);
+				break;
+			default:
+				i--;
+				break;
+			}
+			break;
+		case C:
+			if (src[i] == 'o') {
+				state = CO;
+				charcopy(dest, &destlen, destsize, src[i++]);
+			}
+			else state = DEFAULT;
+			break;
+		case CO:
+			if (src[i] == 'm') {
+				state = COMLPT;
+				i++;
+			}
+			else if (src[i] == 'n') {
+				state = THREE;
+				i++;
+			}
+			else state = DEFAULT;
+			break;
+		case COMLPT:
+			switch (src[i]) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				state = COMLPTN;
+				i++;
+				break;
+			default:
+				state = DEFAULT;
+				charcopy(dest, &destlen, destsize, src[i - 1]);
+				break;
+			}
+			break;
+		case COMLPTN:
+			state = DEFAULT;
+			switch (src[i]) {
+			case '.':
+			case '/':
+			case '\0':
+				escape3(dest, &destlen, destsize, src[i - 2]);
+				charcopy(dest, &destlen, destsize, src[i - 1]);
+				break;
+			default:
+				memcopy(dest, &destlen, destsize,
+					&src[i - 2], 2);
+				break;
+			}
+			break;
+		case L:
+			if (src[i] == 'p') {
+				state = LP;
+				charcopy(dest, &destlen, destsize, src[i++]);
+			}
+			else state = DEFAULT;
+			break;
+		case LP:
+			if (src[i] == 't') {
+				state = COMLPT;
+				i++;
+			}
+			else state = DEFAULT;
+			break;
+		case N:
+			if (src[i] == 'u') {
+				state = NU;
+				charcopy(dest, &destlen, destsize, src[i++]);
+			}
+			else state = DEFAULT;
+			break;
+		case NU:
+			if (src[i] == 'l') {
+				state = THREE;
+				i++;
+			}
+			else state = DEFAULT;
+			break;
+		case P:
+			if (src[i] == 'r') {
+				state = PR;
+				charcopy(dest, &destlen, destsize, src[i++]);
+			}
+			else state = DEFAULT;
+			break;
+		case PR:
+			if (src[i] == 'n') {
+				state = THREE;
+				i++;
+			}
+			else state = DEFAULT;
+			break;
+		case DOT:
+			switch (src[i]) {
+			case '/':
+			case '\0':
+				state = START;
+				memcopy(dest, &destlen, destsize, "~2e", 3);
+				charcopy(dest, &destlen, destsize, src[i++]);
+				break;
+			case 'd':
+			case 'i':
+				state = HG;
+				charcopy(dest, &destlen, destsize, '.');
+				charcopy(dest, &destlen, destsize, src[i++]);
+				break;
+			case 'h':
+				state = H;
+				charcopy(dest, &destlen, destsize, '.');
+				charcopy(dest, &destlen, destsize, src[i++]);
+				break;
+			default:
+				state = DEFAULT;
+				charcopy(dest, &destlen, destsize, '.');
+				break;
+			}
+			break;
+		case H:
+			if (src[i] == 'g') {
+				state = HG;
+				charcopy(dest, &destlen, destsize, src[i++]);
+			}
+			else state = DEFAULT;
+			break;
+		case HG:
+			if (src[i] == '/') {
+				state = START;
+				memcopy(dest, &destlen, destsize, ".hg", 3);
+				charcopy(dest, &destlen, destsize, src[i++]);
+			}
+			else state = DEFAULT;
+			break;
+		case SPACE:
+			switch (src[i]) {
+			case '/':
+			case '\0':
+				state = START;
+				memcopy(dest, &destlen, destsize, "~20", 3);
+				charcopy(dest, &destlen, destsize, src[i++]);
+				break;
+			default:
+				state = DEFAULT;
+				charcopy(dest, &destlen, destsize, ' ');
+				break;
+			}
+			break;
+		case DEFAULT:
+			while (i < len && check_bit(fastpath, src[i]))
+				charcopy(dest, &destlen, destsize, src[i++]);
+			if (i == len)
+				goto done;
+			switch (src[i]) {
+			case '.':
+				state = DOT;
+				i++;
+				break;
+			case ' ':
+				state = SPACE;
+				i++;
+				break;
+			case '/':
+				state = START;
+				charcopy(dest, &destlen, destsize, '/');
+				i++;
+				break;
+			default:
+				if (check_bit(onebyte, src[i])) {
+					do {
+						charcopy(dest, &destlen,
+							 destsize, src[i++]);
+					} while (i < len &&
+						 check_bit(fastpath, src[i]));
+				}
+				else if (check_bit(twobytes, src[i])) {
+					char c = src[i++];
+					charcopy(dest, &destlen, destsize, '_');
+					charcopy(dest, &destlen, destsize,
+						 c == '_' ? '_' : c + 32);
+				}
+				else
+					escape3(dest, &destlen, destsize,
+						src[i++]);
+				break;
+			}
+			break;
+		}
+	}
+done:
+	return destlen;
+}
+
+static Py_ssize_t basicencode(char *dest, size_t destsize,
+			      const char *src, Py_ssize_t len)
+{
+	static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };
+
+	static const uint32_t onebyte[8] = {
+		1, 0x2bfffbfa, 0x68000001, 0x2fffffff,
+	};
+
+	static const uint32_t fastpath[8] = {
+		1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
+	};
+
+	Py_ssize_t destlen = 0;
+
+	if (len < 5 || memcmp(src, "data/", 5) != 0) {
+		memcopy(dest, &destlen, destsize, src, len);
+		return destlen;
+	}
+
+	memcopy(dest, &destlen, destsize, "data/", 5);
+
+	return _encode(twobytes, onebyte, fastpath, dest, destlen, destsize,
+		       src + 5, len - 5);
+}
+
+static Py_ssize_t direncode(char *dest, size_t destsize,
+			    const char *src, Py_ssize_t len)
+{
+	enum dir_state state = DSTART;
+	Py_ssize_t i = 5, destlen = 0;
+
+	memcopy(dest, &destlen, destsize, "data/", 5);
+
+	while (i < len) {
+		switch (state) {
+		case DSTART:
+			state = DDEFAULT;
+			charcopy(dest, &destlen, destsize, src[i++]);
+			break;
+		case DDOT:
+			switch (src[i]) {
+			case 'd':
+			case 'i':
+				state = DHG;
+				charcopy(dest, &destlen, destsize, '.');
+				charcopy(dest, &destlen, destsize, src[i++]);
+				break;
+			case 'h':
+				state = DH;
+				charcopy(dest, &destlen, destsize, '.');
+				charcopy(dest, &destlen, destsize, src[i++]);
+				break;
+			default:
+				state = DDEFAULT;
+				charcopy(dest, &destlen, destsize, '.');
+				break;
+			}
+		case DH:
+			if (src[i] == 'g') {
+				state = DHG;
+				charcopy(dest, &destlen, destsize, src[i++]);
+			}
+			else state = DDEFAULT;
+			break;
+		case DHG:
+			if (src[i] == '/') {
+				state = DSTART;
+				memcopy(dest, &destlen, destsize, ".hg", 3);
+				charcopy(dest, &destlen, destsize, src[i++]);
+			}
+			else state = DDEFAULT;
+			break;
+		case DDEFAULT:
+			switch (src[i]) {
+			case '.':
+				state = DDOT;
+				i++;
+				break;
+			case '/':
+				state = DSTART;
+			default:
+				charcopy(dest, &destlen, destsize, src[i++]);
+				break;
+			}
+			break;
+		}
+	}
+
+	return destlen;
+}
+
+static Py_ssize_t auxencode(char *dest, size_t destsize,
+			    const char *src, Py_ssize_t len)
+{
+	static const uint32_t twobytes[8];
+
+	static const uint32_t onebyte[8] = {
+		~0, 0xffffbffe, ~0, ~0, ~0, ~0, ~0, ~0,
+	};
+
+	return _encode(twobytes, onebyte, onebyte, dest, 0, destsize,
+		       src, len);
+}
+
+static const Py_ssize_t maxstorepathlen = 120;
+
+static Py_ssize_t lowerencode(char *dest, size_t destsize,
+			      const char *src, Py_ssize_t len)
+{
+	static const uint32_t onebyte[8] = {
+		1, 0x2bfffbfb, 0xe8000001, 0x2fffffff
+	};
+
+	static const uint32_t lower[8] = { 0, 0, 0x7fffffe };
+
+	Py_ssize_t i = 0, destlen = 0;
+
+	while (i < len) {
+		if (check_bit(onebyte, src[i]))
+			charcopy(dest, &destlen, destsize, src[i++]);
+		else if (check_bit(lower, src[i]))
+			charcopy(dest, &destlen, destsize, src[i++] + 32);
+		else
+			escape3(dest, &destlen, destsize, src[i++]);
+	}
+
+	return destlen;
+}
+
+static Py_ssize_t hashmangle(char *dest, size_t destsize, const char *src,
+			     Py_ssize_t len, const unsigned char sha[20])
+{
+	static const Py_ssize_t dirprefixlen = 8;
+	static const Py_ssize_t maxshortdirslen = 68;
+
+	Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;
+	Py_ssize_t destlen = 0, slop;
+
+	while (lastslash >= 0 && src[lastslash] != '/') {
+		if (src[lastslash] == '.' && lastdot == -1)
+			lastdot = lastslash;
+		lastslash--;
+	}
+
+	for (i = d = p = 0; i < lastslash; i++, p++) {
+		if (src[i] == '/') {
+			if (destlen > maxshortdirslen)
+				break;
+			charcopy(dest, &destlen, destsize, src[i]);
+			p = -1;
+		}
+		else if (p < dirprefixlen)
+			charcopy(dest, &destlen, destsize, src[i]);
+	}
+
+	if (destlen > maxshortdirslen)
+		do {
+			destlen--;
+		} while (destlen > 0 && dest[destlen] != '/');
+
+	if (destlen > 3)
+		charcopy(dest, &destlen, destsize, '/');
+
+	slop = maxstorepathlen - destlen - 3 - (40 + len - lastdot - 1);
+	if (slop > len - lastslash - 2)
+		slop = len - lastslash - 2;
+
+	if (slop > 0)
+		memcopy(dest, &destlen, destsize, &src[lastslash + 1], slop);
+
+	for (i = 0; i < 20; i++)
+		hexencode(dest, &destlen, destsize, sha[i]);
+
+	if (lastdot >= 0)
+		memcopy(dest, &destlen, destsize, &src[lastdot],
+			len - lastdot);
+	else
+		charcopy(dest, &destlen, destsize, 0);
+
+	return destlen;
+}
+
+static Py_ssize_t hashencode(char *dest, size_t destsize, const char *src,
+			     Py_ssize_t len)
+{
+	const Py_ssize_t baselen = (len - 5) * 3;
+	char dired[baselen];
+	char lowered[baselen];
+	char auxed[baselen];
+	char mangled[baselen];
+	Py_ssize_t dirlen, lowerlen, auxlen, destlen;
+	unsigned char sha[20];
+
+	dirlen = direncode(dired, baselen, src, len);
+	sha1((const unsigned char *)dired, dirlen - 1, sha);
+	lowerlen = lowerencode(lowered, baselen, src + 5, len - 5);
+	auxlen = auxencode(auxed, baselen, lowered, lowerlen);
+	memcpy(mangled, "dh/", 3);
+	destlen = hashmangle(mangled + 3, baselen - 3, auxed, auxlen, sha) + 2;
+	if (dest) {
+		assert(destlen <= destsize);
+		memcpy(dest, mangled, destlen);
+	}
+
+	return destlen;
+}
+
+PyObject *pathencode(PyObject *self, PyObject *args)
+{
+	Py_ssize_t len, newlen;
+	PyObject *pathobj, *newobj;
+	char *path;
+
+	if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
+		return NULL;
+
+	if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
+		PyErr_SetString(PyExc_TypeError, "expected a string");
+		return NULL;
+	}
+
+	newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
+
+	if (newlen == len + 1) {
+		Py_INCREF(pathobj);
+		return pathobj;
+	}
+
+	if (newlen <= maxstorepathlen + 1) {
+		newobj = PyString_FromStringAndSize(NULL, newlen);
+		PyString_GET_SIZE(newobj)--;
+
+		if (newobj)
+			basicencode(PyString_AS_STRING(newobj), newlen, path,
+				    len + 1);
+	} else {
+		newlen = 170;
+		newobj = PyString_FromStringAndSize(NULL, 170);
+
+		if (newobj) {
+			newlen = hashencode(PyString_AS_STRING(newobj),
+					    newlen, path, len + 1);
+			PyString_GET_SIZE(newobj) = newlen;
+		}
+	}
+
+	return newobj;
+}
diff --git a/mercurial/sha1.c b/mercurial/sha1.c
new file mode 100644
--- /dev/null
+++ b/mercurial/sha1.c
@@ -0,0 +1,300 @@
+/*
+ *  FIPS-180-1 compliant SHA-1 implementation
+ *
+ *  Copyright (C) 2006-2010, Brainspark B.V.
+ *
+ *  This file is part of PolarSSL (http://www.polarssl.org)
+ *  Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org>
+ *
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+/*
+ *  The SHA-1 standard was published by NIST in 1993.
+ *
+ *  http://www.itl.nist.gov/fipspubs/fip180-1.htm
+ */
+
+#include <Python.h>
+
+#include "sha1.h"
+#include "util.h"
+
+/*
+ * SHA-1 context setup
+ */
+void sha1_starts( sha1_context *ctx )
+{
+    ctx->total[0] = 0;
+    ctx->total[1] = 0;
+
+    ctx->state[0] = 0x67452301;
+    ctx->state[1] = 0xEFCDAB89;
+    ctx->state[2] = 0x98BADCFE;
+    ctx->state[3] = 0x10325476;
+    ctx->state[4] = 0xC3D2E1F0;
+}
+
+static void sha1_process( sha1_context *ctx, const unsigned char data[64] )
+{
+    uint32_t temp, W[16], A, B, C, D, E;
+
+    W[0] = getbe32(&data[0]);
+    W[1] = getbe32(&data[4]);
+    W[2] = getbe32(&data[8]);
+    W[3] = getbe32(&data[12]);
+    W[4] = getbe32(&data[16]);
+    W[5] = getbe32(&data[20]);
+    W[6] = getbe32(&data[24]);
+    W[7] = getbe32(&data[28]);
+    W[8] = getbe32(&data[32]);
+    W[9] = getbe32(&data[36]);
+    W[10] = getbe32(&data[40]);
+    W[11] = getbe32(&data[44]);
+    W[12] = getbe32(&data[48]);
+    W[13] = getbe32(&data[52]);
+    W[14] = getbe32(&data[56]);
+    W[15] = getbe32(&data[60]);
+
+#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))
+
+#define R(t)                                            \
+(                                                       \
+    temp = W[(t -  3) & 0x0F] ^ W[(t - 8) & 0x0F] ^     \
+           W[(t - 14) & 0x0F] ^ W[ t      & 0x0F],      \
+    ( W[t & 0x0F] = S(temp,1) )                         \
+)
+
+#define P(a,b,c,d,e,x)                                  \
+{                                                       \
+    e += S(a,5) + F(b,c,d) + K + x; b = S(b,30);        \
+}
+
+    A = ctx->state[0];
+    B = ctx->state[1];
+    C = ctx->state[2];
+    D = ctx->state[3];
+    E = ctx->state[4];
+
+#define F(x,y,z) (z ^ (x & (y ^ z)))
+#define K 0x5A827999
+
+    P( A, B, C, D, E, W[0]  );
+    P( E, A, B, C, D, W[1]  );
+    P( D, E, A, B, C, W[2]  );
+    P( C, D, E, A, B, W[3]  );
+    P( B, C, D, E, A, W[4]  );
+    P( A, B, C, D, E, W[5]  );
+    P( E, A, B, C, D, W[6]  );
+    P( D, E, A, B, C, W[7]  );
+    P( C, D, E, A, B, W[8]  );
+    P( B, C, D, E, A, W[9]  );
+    P( A, B, C, D, E, W[10] );
+    P( E, A, B, C, D, W[11] );
+    P( D, E, A, B, C, W[12] );
+    P( C, D, E, A, B, W[13] );
+    P( B, C, D, E, A, W[14] );
+    P( A, B, C, D, E, W[15] );
+    P( E, A, B, C, D, R(16) );
+    P( D, E, A, B, C, R(17) );
+    P( C, D, E, A, B, R(18) );
+    P( B, C, D, E, A, R(19) );
+
+#undef K
+#undef F
+
+#define F(x,y,z) (x ^ y ^ z)
+#define K 0x6ED9EBA1
+
+    P( A, B, C, D, E, R(20) );
+    P( E, A, B, C, D, R(21) );
+    P( D, E, A, B, C, R(22) );
+    P( C, D, E, A, B, R(23) );
+    P( B, C, D, E, A, R(24) );
+    P( A, B, C, D, E, R(25) );
+    P( E, A, B, C, D, R(26) );
+    P( D, E, A, B, C, R(27) );
+    P( C, D, E, A, B, R(28) );
+    P( B, C, D, E, A, R(29) );
+    P( A, B, C, D, E, R(30) );
+    P( E, A, B, C, D, R(31) );
+    P( D, E, A, B, C, R(32) );
+    P( C, D, E, A, B, R(33) );
+    P( B, C, D, E, A, R(34) );
+    P( A, B, C, D, E, R(35) );
+    P( E, A, B, C, D, R(36) );
+    P( D, E, A, B, C, R(37) );
+    P( C, D, E, A, B, R(38) );
+    P( B, C, D, E, A, R(39) );
+
+#undef K
+#undef F
+
+#define F(x,y,z) ((x & y) | (z & (x | y)))
+#define K 0x8F1BBCDC
+
+    P( A, B, C, D, E, R(40) );
+    P( E, A, B, C, D, R(41) );
+    P( D, E, A, B, C, R(42) );
+    P( C, D, E, A, B, R(43) );
+    P( B, C, D, E, A, R(44) );
+    P( A, B, C, D, E, R(45) );
+    P( E, A, B, C, D, R(46) );
+    P( D, E, A, B, C, R(47) );
+    P( C, D, E, A, B, R(48) );
+    P( B, C, D, E, A, R(49) );
+    P( A, B, C, D, E, R(50) );
+    P( E, A, B, C, D, R(51) );
+    P( D, E, A, B, C, R(52) );
+    P( C, D, E, A, B, R(53) );
+    P( B, C, D, E, A, R(54) );
+    P( A, B, C, D, E, R(55) );
+    P( E, A, B, C, D, R(56) );
+    P( D, E, A, B, C, R(57) );
+    P( C, D, E, A, B, R(58) );
+    P( B, C, D, E, A, R(59) );
+
+#undef K
+#undef F
+
+#define F(x,y,z) (x ^ y ^ z)
+#define K 0xCA62C1D6
+
+    P( A, B, C, D, E, R(60) );
+    P( E, A, B, C, D, R(61) );
+    P( D, E, A, B, C, R(62) );
+    P( C, D, E, A, B, R(63) );
+    P( B, C, D, E, A, R(64) );
+    P( A, B, C, D, E, R(65) );
+    P( E, A, B, C, D, R(66) );
+    P( D, E, A, B, C, R(67) );
+    P( C, D, E, A, B, R(68) );
+    P( B, C, D, E, A, R(69) );
+    P( A, B, C, D, E, R(70) );
+    P( E, A, B, C, D, R(71) );
+    P( D, E, A, B, C, R(72) );
+    P( C, D, E, A, B, R(73) );
+    P( B, C, D, E, A, R(74) );
+    P( A, B, C, D, E, R(75) );
+    P( E, A, B, C, D, R(76) );
+    P( D, E, A, B, C, R(77) );
+    P( C, D, E, A, B, R(78) );
+    P( B, C, D, E, A, R(79) );
+
+#undef K
+#undef F
+
+    ctx->state[0] += A;
+    ctx->state[1] += B;
+    ctx->state[2] += C;
+    ctx->state[3] += D;
+    ctx->state[4] += E;
+}
+
+/*
+ * SHA-1 process buffer
+ */
+void sha1_update( sha1_context *ctx, const unsigned char *input, size_t ilen )
+{
+    size_t fill;
+    unsigned long left;
+
+    if( ilen <= 0 )
+        return;
+
+    left = ctx->total[0] & 0x3F;
+    fill = 64 - left;
+
+    ctx->total[0] += (unsigned long) ilen;
+    ctx->total[0] &= 0xFFFFFFFF;
+
+    if( ctx->total[0] < (unsigned long) ilen )
+        ctx->total[1]++;
+
+    if( left && ilen >= fill )
+    {
+        memcpy( (void *) (ctx->buffer + left),
+                (void *) input, fill );
+        sha1_process( ctx, ctx->buffer );
+        input += fill;
+        ilen  -= fill;
+        left = 0;
+    }
+
+    while( ilen >= 64 )
+    {
+        sha1_process( ctx, input );
+        input += 64;
+        ilen  -= 64;
+    }
+
+    if( ilen > 0 )
+    {
+        memcpy( (void *) (ctx->buffer + left),
+                (void *) input, ilen );
+    }
+}
+
+static const unsigned char sha1_padding[64] =
+{
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/*
+ * SHA-1 final digest
+ */
+void sha1_finish( sha1_context *ctx, unsigned char output[20] )
+{
+    unsigned long last, padn;
+    unsigned long high, low;
+    unsigned char msglen[8];
+
+    high = ( ctx->total[0] >> 29 )
+         | ( ctx->total[1] <<  3 );
+    low  = ( ctx->total[0] <<  3 );
+
+    putbe32( high, &msglen[0] );
+    putbe32( low,  &msglen[4] );
+
+    last = ctx->total[0] & 0x3F;
+    padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last );
+
+    sha1_update( ctx, (unsigned char *) sha1_padding, padn );
+    sha1_update( ctx, msglen, 8 );
+
+    putbe32( ctx->state[0], &output[0] );
+    putbe32( ctx->state[1], &output[4] );
+    putbe32( ctx->state[2], &output[8] );
+    putbe32( ctx->state[3], &output[12] );
+    putbe32( ctx->state[4], &output[16] );
+}
+
+/*
+ * output = SHA-1( input buffer )
+ */
+void sha1( const unsigned char *input, size_t ilen, unsigned char output[20] )
+{
+    sha1_context ctx;
+
+    sha1_starts( &ctx );
+    sha1_update( &ctx, input, ilen );
+    sha1_finish( &ctx, output );
+
+    memset( &ctx, 0, sizeof( sha1_context ) );
+}
diff --git a/mercurial/sha1.h b/mercurial/sha1.h
new file mode 100644
--- /dev/null
+++ b/mercurial/sha1.h
@@ -0,0 +1,84 @@
+/**
+ * \file sha1.h
+ *
+ * \brief SHA-1 cryptographic hash function
+ *
+ *  Copyright (C) 2006-2010, Brainspark B.V.
+ *
+ *  This file is part of PolarSSL (http://www.polarssl.org)
+ *  Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org>
+ *
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef POLARSSL_SHA1_H
+#define POLARSSL_SHA1_H
+
+#include <string.h>
+
+/**
+ * \brief          SHA-1 context structure
+ */
+typedef struct
+{
+    unsigned long total[2];     /*!< number of bytes processed  */
+    unsigned long state[5];     /*!< intermediate digest state  */
+    unsigned char buffer[64];   /*!< data block being processed */
+}
+sha1_context;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * \brief          SHA-1 context setup
+ *
+ * \param ctx      context to be initialized
+ */
+void sha1_starts( sha1_context *ctx );
+
+/**
+ * \brief          SHA-1 process buffer
+ *
+ * \param ctx      SHA-1 context
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ */
+void sha1_update( sha1_context *ctx, const unsigned char *input, size_t ilen );
+
+/**
+ * \brief          SHA-1 final digest
+ *
+ * \param ctx      SHA-1 context
+ * \param output   SHA-1 checksum result
+ */
+void sha1_finish( sha1_context *ctx, unsigned char output[20] );
+
+/**
+ * \brief          Output = SHA-1( input buffer )
+ *
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ * \param output   SHA-1 checksum result
+ */
+void sha1( const unsigned char *input, size_t ilen, unsigned char output[20] );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* sha1.h */
diff --git a/mercurial/store.py b/mercurial/store.py
--- a/mercurial/store.py
+++ b/mercurial/store.py
@@ -6,7 +6,7 @@
 # GNU General Public License version 2 or any later version.
 
 from i18n import _
-import osutil, scmutil, util
+import osutil, scmutil, util, parsers
 import os, stat, errno
 
 _sha = util.sha1
@@ -393,7 +393,10 @@
         return self.path + '/' + self.encode(f)
 
     def getsize(self, path):
-        return os.stat(self.path + '/' + path).st_size
+        try:
+            return os.stat(self.path + '/' + path).st_size
+        except:
+            raise util.Abort(repr(self.path + '/' + path))
 
     def datafiles(self):
         rewrite = False
@@ -425,8 +428,11 @@
 def store(requirements, path, openertype):
     if 'store' in requirements:
         if 'fncache' in requirements:
-            auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
-            encode = lambda f: _hybridencode(f, auxencode)
+            dotreq = 'dotencode' in requirements
+            encode = dotreq and getattr(parsers, 'pathencode')
+            if not encode:
+                auxencode = lambda f: _auxencode(f, dotreq)
+                encode = lambda f: _hybridencode(f, auxencode)
             return fncachestore(path, openertype, encode)
         return encodedstore(path, openertype)
     return basicstore(path, openertype)
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -421,7 +421,7 @@
     Extension('mercurial.bdiff', ['mercurial/bdiff.c']),
     Extension('mercurial.diffhelpers', ['mercurial/diffhelpers.c']),
     Extension('mercurial.mpatch', ['mercurial/mpatch.c']),
-    Extension('mercurial.parsers', ['mercurial/parsers.c']),
+    Extension('mercurial.parsers', ['mercurial/parsers.c', 'mercurial/pathencode.c', 'mercurial/sha1.c']),
     ]
 
 osutil_ldflags = []


More information about the Mercurial-devel mailing list