[PATCH] parse_manifest: rewrite to use memchr

Siddharth Agarwal sid0 at fb.com
Sat Sep 7 22:07:02 CDT 2013


# HG changeset patch
# User Siddharth Agarwal <sid0 at fb.com>
# Date 1378536479 25200
#      Fri Sep 06 23:47:59 2013 -0700
# Node ID 3219c8821869d631bd6dab602a66019c4c43639c
# Parent  6638dd23999d66a95ce28816f035a1d051963290
parse_manifest: rewrite to use memchr

memchr is usually smarter than a simple for loop. With gcc 4.4.6 and glibc 2.12
on x86-64, for a 20 MB, 200,000 file manifest, parse_manifest goes from 0.116
seconds to 0.095 seconds.

diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -77,7 +77,7 @@
 static PyObject *parse_manifest(PyObject *self, PyObject *args)
 {
 	PyObject *mfdict, *fdict;
-	char *str, *cur, *start, *zero;
+	char *str, *start, *end, *zero, *newline;
 	int len;
 
 	if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
@@ -86,21 +86,24 @@
 			      &str, &len))
 		goto quit;
 
-	for (start = cur = str, zero = NULL; cur < str + len; cur++) {
+	start = str;
+	end = str + len;
+	while (start < end) {
 		PyObject *file = NULL, *node = NULL;
 		PyObject *flags = NULL;
 		ptrdiff_t nlen;
 
-		if (!*cur) {
-			zero = cur;
-			continue;
-		}
-		else if (*cur != '\n')
-			continue;
-
+		zero = memchr(start, '\0', end - start);
 		if (!zero) {
 			PyErr_SetString(PyExc_ValueError,
-					"manifest entry has no separator");
+					"manifest contains trailing garbage");
+			goto quit;
+		}
+
+		newline = memchr(zero + 1, '\n', end - (zero + 1));
+		if (!newline) {
+			PyErr_SetString(PyExc_ValueError,
+					"manifest contains trailing garbage");
 			goto quit;
 		}
 
@@ -109,7 +112,7 @@
 		if (!file)
 			goto bail;
 
-		nlen = cur - zero - 1;
+		nlen = newline - zero - 1;
 
 		node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
 		if (!node)
@@ -128,8 +131,7 @@
 		if (PyDict_SetItem(mfdict, file, node) == -1)
 			goto bail;
 
-		start = cur + 1;
-		zero = NULL;
+		start = newline + 1;
 
 		Py_XDECREF(flags);
 		Py_XDECREF(node);
@@ -142,12 +144,6 @@
 		goto quit;
 	}
 
-	if (len > 0 && *(cur - 1) != '\n') {
-		PyErr_SetString(PyExc_ValueError,
-				"manifest contains trailing garbage");
-		goto quit;
-	}
-
 	Py_INCREF(Py_None);
 	return Py_None;
 quit:


More information about the Mercurial-devel mailing list