[PATCH resend] revlog: switch to a C version of headrevs

Bryan O'Sullivan bos at serpentine.com
Sun May 13 04:52:45 CDT 2012


# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1336902741 -7200
# Node ID ee6119dfc06366513ba158b0b3111d29f67b0e5b
# Parent  c586d9d9624f96b0ca933262a977e1f29842d11b
revlog: switch to a C version of headrevs

The C implementation is more than 100 times faster than the Python
version (which is still available as a fallback). The result of the
headrevs calculation is cached, because it's still somewhat expensive,
and headrevs is called several times when the tag cache is stale
or missing.

In practice, in a repo with 330,000 revs and a stale .hg/cache/tags
file, this patch improves the performance of "hg tip" from 2.2 to
1.6 seconds.

diff -r c586d9d9624f -r ee6119dfc063 mercurial/parsers.c
--- a/mercurial/parsers.c	Sun May 13 11:49:48 2012 +0200
+++ b/mercurial/parsers.c	Sun May 13 11:52:21 2012 +0200
@@ -246,6 +246,7 @@
 	Py_ssize_t raw_length; /* original number of elements */
 	Py_ssize_t length;     /* current number of elements */
 	PyObject *added;       /* populated on demand */
+	PyObject *headrevs;    /* cache, invalidated on changes */
 	nodetree *nt;          /* base-16 trie */
 	int ntlength;          /* # nodes in use */
 	int ntcapacity;        /* # nodes allocated */
@@ -463,6 +464,8 @@
 	if (self->nt)
 		nt_insert(self, node, (int)offset);
 
+	Py_CLEAR(self->headrevs);
+
 	Py_RETURN_NONE;
 }
 
@@ -488,6 +491,7 @@
 		free(self->nt);
 		self->nt = NULL;
 	}
+	Py_CLEAR(self->headrevs);
 }
 
 static PyObject *index_clearcaches(indexObject *self)
@@ -538,6 +542,112 @@
 	return NULL;
 }
 
+/*
+ * When we cache a list, we want to be sure the caller can't mutate
+ * the cached copy.
+ */
+static PyObject *list_copy(PyObject *list)
+{
+	Py_ssize_t len = PyList_GET_SIZE(list);
+	PyObject *newlist = PyList_New(len);
+	Py_ssize_t i;
+
+	if (newlist == NULL)
+		return NULL;
+
+	for (i = 0; i < len; i++) {
+		PyObject *obj = PyList_GET_ITEM(list, i);
+		Py_INCREF(obj);
+		PyList_SET_ITEM(newlist, i, obj);
+	}
+
+	return newlist;
+}
+
+static PyObject *index_headrevs(indexObject *self)
+{
+	Py_ssize_t i, len, addlen;
+	char *ishead = NULL;
+	PyObject *heads;
+
+	if (self->headrevs)
+		return list_copy(self->headrevs);
+
+	len = index_length(self) - 1;
+	heads = PyList_New(0);
+	if (heads == NULL)
+		goto bail;
+	if (len == 0) {
+		PyObject *nullid = PyInt_FromLong(-1);
+		if (nullid == NULL)
+			goto bail;
+		if (PyList_Append(heads, nullid) == -1) {
+			Py_DECREF(nullid);
+			goto bail;
+		}
+		goto done;
+	}
+
+	ishead = malloc(len);
+	if (ishead == NULL)
+		goto bail;
+	memset(ishead, 1, len);
+
+	for (i = 0; i < self->raw_length; i++) {
+		const char *data = index_deref(self, i);
+		int parent_1 = getbe32(data + 24);
+		int parent_2 = getbe32(data + 28);
+		if (parent_1 >= 0)
+			ishead[parent_1] = 0;
+		if (parent_2 >= 0)
+			ishead[parent_2] = 0;
+	}
+
+	addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
+
+	for (i = 0; i < addlen; i++) {
+		PyObject *rev = PyList_GET_ITEM(self->added, i);
+		PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
+		PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
+		long parent_1, parent_2;
+
+		if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
+			PyErr_SetString(PyExc_TypeError,
+					"revlog parents are invalid");
+			goto bail;
+		}
+		parent_1 = PyInt_AS_LONG(p1);
+		parent_2 = PyInt_AS_LONG(p2);
+		if (parent_1 >= 0)
+			ishead[parent_1] = 0;
+		if (parent_2 >= 0)
+			ishead[parent_2] = 0;
+	}
+
+	for (i = 0; i < len; i++) {
+		PyObject *head;
+
+		if (!ishead[i])
+			continue;
+		head = PyInt_FromLong(i);
+		if (head == NULL)
+			goto bail;
+		if (PyList_Append(heads, head) == -1) {
+			Py_DECREF(head);
+			goto bail;
+		}
+	}
+
+done:
+	self->headrevs = heads;
+	free(ishead);
+	return list_copy(self->headrevs);
+bail:
+	Py_XDECREF(heads);
+	free(ishead);
+	return NULL;
+}
+
 static inline int nt_level(const char *node, Py_ssize_t level)
 {
 	int v = node[level>>1];
@@ -936,6 +1046,7 @@
 {
 	Py_ssize_t start, stop, step, slicelength;
 	Py_ssize_t length = index_length(self);
+	int ret = 0;
 
 	if (PySlice_GetIndicesEx((PySliceObject*)item, length,
 				 &start, &stop, &step, &slicelength) < 0)
@@ -981,7 +1092,9 @@
 				self->ntrev = (int)start;
 		}
 		self->length = start + 1;
-		return 0;
+		if (start < self->raw_length)
+			self->raw_length = start;
+		goto done;
 	}
 
 	if (self->nt) {
@@ -989,10 +1102,12 @@
 		if (self->ntrev > start)
 			self->ntrev = (int)start;
 	}
-	return self->added
-		? PyList_SetSlice(self->added, start - self->length + 1,
-				  PyList_GET_SIZE(self->added), NULL)
-		: 0;
+	if (self->added)
+		ret = PyList_SetSlice(self->added, start - self->length + 1,
+				      PyList_GET_SIZE(self->added), NULL);
+done:
+	Py_CLEAR(self->headrevs);
+	return ret;
 }
 
 /*
@@ -1082,6 +1197,7 @@
 	self->cache = NULL;
 
 	self->added = NULL;
+	self->headrevs = NULL;
 	self->offsets = NULL;
 	self->nt = NULL;
 	self->ntlength = self->ntcapacity = 0;
@@ -1146,6 +1262,8 @@
 	 "clear the index caches"},
 	{"get", (PyCFunction)index_m_get, METH_VARARGS,
 	 "get an index entry"},
+	{"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
+	 "get head revisions"},
 	{"insert", (PyCFunction)index_insert, METH_VARARGS,
 	 "insert an index entry"},
 	{"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
diff -r c586d9d9624f -r ee6119dfc063 mercurial/revlog.py
--- a/mercurial/revlog.py	Sun May 13 11:49:48 2012 +0200
+++ b/mercurial/revlog.py	Sun May 13 11:52:21 2012 +0200
@@ -635,6 +635,10 @@
         return (orderedout, roots, heads)
 
     def headrevs(self):
+        try:
+            return self.index.headrevs()
+        except AttributeError:
+            pass
         count = len(self)
         if not count:
             return [nullrev]


More information about the Mercurial-devel mailing list