[PATCH] parse_manifest: rewrite to use memchr
Siddharth Agarwal
sid0 at fb.com
Sat Sep 7 22:07:02 CDT 2013
# HG changeset patch
# User Siddharth Agarwal <sid0 at fb.com>
# Date 1378536479 25200
# Fri Sep 06 23:47:59 2013 -0700
# Node ID 3219c8821869d631bd6dab602a66019c4c43639c
# Parent 6638dd23999d66a95ce28816f035a1d051963290
parse_manifest: rewrite to use memchr
memchr is usually smarter than a simple for loop. With gcc 4.4.6 and glibc 2.12
on x86-64, for a 20 MB, 200,000 file manifest, parse_manifest goes from 0.116
seconds to 0.095 seconds.
diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -77,7 +77,7 @@
static PyObject *parse_manifest(PyObject *self, PyObject *args)
{
PyObject *mfdict, *fdict;
- char *str, *cur, *start, *zero;
+ char *str, *start, *end, *zero, *newline;
int len;
if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
@@ -86,21 +86,24 @@
&str, &len))
goto quit;
- for (start = cur = str, zero = NULL; cur < str + len; cur++) {
+ start = str;
+ end = str + len;
+ while (start < end) {
PyObject *file = NULL, *node = NULL;
PyObject *flags = NULL;
ptrdiff_t nlen;
- if (!*cur) {
- zero = cur;
- continue;
- }
- else if (*cur != '\n')
- continue;
-
+ zero = memchr(start, '\0', end - start);
if (!zero) {
PyErr_SetString(PyExc_ValueError,
- "manifest entry has no separator");
+ "manifest contains trailing garbage");
+ goto quit;
+ }
+
+ newline = memchr(zero + 1, '\n', end - (zero + 1));
+ if (!newline) {
+ PyErr_SetString(PyExc_ValueError,
+ "manifest contains trailing garbage");
goto quit;
}
@@ -109,7 +112,7 @@
if (!file)
goto bail;
- nlen = cur - zero - 1;
+ nlen = newline - zero - 1;
node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
if (!node)
@@ -128,8 +131,7 @@
if (PyDict_SetItem(mfdict, file, node) == -1)
goto bail;
- start = cur + 1;
- zero = NULL;
+ start = newline + 1;
Py_XDECREF(flags);
Py_XDECREF(node);
@@ -142,12 +144,6 @@
goto quit;
}
- if (len > 0 && *(cur - 1) != '\n') {
- PyErr_SetString(PyExc_ValueError,
- "manifest contains trailing garbage");
- goto quit;
- }
-
Py_INCREF(Py_None);
return Py_None;
quit:
More information about the Mercurial-devel
mailing list