[PATCH v3] parsers: add a C function to pack the dirstate

Bryan O'Sullivan bos at serpentine.com
Tue May 22 16:58:20 CDT 2012


# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1337723862 25200
# Node ID cc012f33398867f690963151f82c24fa1cbac89b
# Parent  91f44cb2512919d7fe7f17f9c6f2030b6ff19b34
parsers: add a C function to pack the dirstate

This is about 9 times faster than the Python dirstate packing code.
The relatively small speedup is due to the poor locality and memory
access patterns caused by traversing dicts and other boxed Python
values.

diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -498,12 +498,24 @@ class dirstate(object):
             return
         st = self._opener("dirstate", "w", atomictemp=True)
 
+        def finish(s):
+            st.write(s)
+            st.close()
+            self._lastnormaltime = 0
+            self._dirty = self._dirtypl = False
+
         # use the modification time of the newly created temporary file as the
         # filesystem's notion of 'now'
-        now = int(util.fstat(st).st_mtime)
+        now = util.fstat(st).st_mtime
+        copymap = self._copymap
+        try:
+            finish(parsers.pack_dirstate(self._map, copymap, self._pl, now))
+            return
+        except AttributeError:
+            pass
 
+        now = int(now)
         cs = cStringIO.StringIO()
-        copymap = self._copymap
         pack = struct.pack
         write = cs.write
         write("".join(self._pl))
@@ -526,10 +538,7 @@ class dirstate(object):
             e = pack(_format, e[0], e[1], e[2], e[3], len(f))
             write(e)
             write(f)
-        st.write(cs.getvalue())
-        st.close()
-        self._lastnormaltime = 0
-        self._dirty = self._dirtypl = False
+        finish(cs.getvalue())
 
     def _dirignore(self, f):
         if f == '.':
diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -214,6 +214,141 @@ quit:
 	return ret;
 }
 
+static inline int getintat(PyObject *tuple, int off, uint32_t *v)
+{
+	PyObject *o = PyTuple_GET_ITEM(tuple, off);
+	if (!PyInt_Check(o)) {
+		PyErr_SetString(PyExc_TypeError, "expected an int");
+		return -1;
+	}
+	*v = (uint32_t)PyInt_AS_LONG(o);
+	return 0;
+}
+
+static PyObject *dirstate_unset;
+
+/*
+ * Efficiently pack a dirstate object into its on-disk format.
+ */
+static PyObject *pack_dirstate(PyObject *self, PyObject *args)
+{
+	PyObject *packobj = NULL;
+	PyObject *map, *copymap, *pl;
+	Py_ssize_t nbytes, pos, l;
+	PyObject *k, *v, *pn;
+	char *p, *s;
+	double now;
+
+	if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
+			      &PyDict_Type, &map, &PyDict_Type, &copymap,
+			      &pl, &now))
+		return NULL;
+
+	if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
+		PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
+		return NULL;
+	}
+
+	/* Figure out how much we need to allocate. */
+	for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
+		PyObject *c;
+		if (!PyString_Check(k)) {
+			PyErr_SetString(PyExc_TypeError, "expected string key");
+			goto bail;
+		}
+		nbytes += PyString_GET_SIZE(k) + 17;
+		c = PyDict_GetItem(copymap, k);
+		if (c) {
+			if (!PyString_Check(c)) {
+				PyErr_SetString(PyExc_TypeError,
+						"expected string key");
+				goto bail;
+			}
+			nbytes += PyString_GET_SIZE(c) + 1;
+		}
+	}
+
+	packobj = PyString_FromStringAndSize(NULL, nbytes);
+	if (packobj == NULL)
+		goto bail;
+
+	p = PyString_AS_STRING(packobj);
+
+	pn = PySequence_ITEM(pl, 0);
+	if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
+		PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
+		goto bail;
+	}
+	memcpy(p, s, l);
+	p += 20;
+	pn = PySequence_ITEM(pl, 1);
+	if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
+		PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
+		goto bail;
+	}
+	memcpy(p, s, l);
+	p += 20;
+
+	for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
+		uint32_t mode, size, mtime;
+		Py_ssize_t len, l;
+		PyObject *o;
+		char *s, *t;
+		int err;
+
+		if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
+			PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
+			goto bail;
+		}
+		o = PyTuple_GET_ITEM(v, 0);
+		if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
+			PyErr_SetString(PyExc_TypeError, "expected one byte");
+			goto bail;
+		}
+		*p++ = *s;
+		err = getintat(v, 1, &mode);
+		err |= getintat(v, 2, &size);
+		err |= getintat(v, 3, &mtime);
+		if (err)
+			goto bail;
+		if (*s == 'n' && mtime == (uint32_t)now) {
+			/* See dirstate.py:write for why we do this. */
+			if (PyDict_SetItem(map, k, dirstate_unset) == -1)
+				goto bail;
+			mode = 0, size = -1, mtime = -1;
+		}
+		putbe32(mode, p);
+		putbe32(size, p + 4);
+		putbe32(mtime, p + 8);
+		t = p + 12;
+		p += 16;
+		len = PyString_GET_SIZE(k);
+		memcpy(p, PyString_AS_STRING(k), len);
+		p += len;
+		o = PyDict_GetItem(copymap, k);
+		if (o) {
+			*p++ = '\0';
+			l = PyString_GET_SIZE(o);
+			memcpy(p, PyString_AS_STRING(o), l);
+			p += l;
+			len += l + 1;
+		}
+		putbe32((uint32_t) len, t);
+	}
+
+	pos = p - PyString_AS_STRING(packobj);
+	if (pos != nbytes) {
+		PyErr_Format(PyExc_SystemError, "bad dirstate pack: %ld != %ld",
+			     pos, nbytes);
+		goto bail;
+	}
+
+	return packobj;
+bail:
+	Py_XDECREF(packobj);
+	return NULL;
+}
+
 /*
  * A base-16 trie for fast node->rev mapping.
  *
@@ -1354,6 +1489,7 @@ bail:
 static char parsers_doc[] = "Efficient content parsing.";
 
 static PyMethodDef methods[] = {
+	{"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
 	{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
 	{"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
 	{"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
@@ -1373,6 +1509,8 @@ static void module_init(PyObject *mod)
 				  -1, -1, -1, -1, nullid, 20);
 	if (nullentry)
 		PyObject_GC_UnTrack(nullentry);
+
+	dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
 }
 
 #ifdef IS_PY3K


More information about the Mercurial-devel mailing list