[PATCH 1 of 2] phase: compute phases in C

Laurent Charignon lcharignon at fb.com
Fri Mar 20 16:08:19 CDT 2015


# HG changeset patch
# User Laurent Charignon <lcharignon at fb.com>
# Date 1426874708 25200
#      Fri Mar 20 11:05:08 2015 -0700
# Node ID 1c9600e6ceab57b48827292c7b01ed9a625da1fd
# Parent  b7f936f47f2b104a60840bae571e009742126afc
phase: compute phases in C

Previously, the phase computation would grow much slower as the oldest draft
commit in the repository grew older (which is very common in repos with evolve
on) and the number of commits increase.
By rewriting the computation in C we can speed it up from 700ms to 7ms on
a large repository whose oldest draft commit is a year old.

diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -13,6 +13,7 @@
 #include <string.h>
 
 #include "util.h"
+#define MIN(a, b) (((a)<(b))?(a):(b))
 
 static char *versionerrortext = "Python minor version mismatch";
 
@@ -911,6 +912,143 @@
 	}
 }
 
+static long add_roots_get_min(indexObject *self, PyObject *list, int marker,
+                              char *phases)
+{
+	if (PyList_Size(list) != 0) {
+		long min_idx = index_length(self) + 1;
+		PyObject *iter = PyObject_GetIter(list);
+		if (iter == NULL) {
+			return -2;
+		}
+		PyObject *iter_item;
+		long iter_item_long;
+		while ((iter_item = PyIter_Next(iter)))
+		{
+			iter_item_long = PyInt_AS_LONG(iter_item);
+			Py_DECREF(iter_item);
+			if (iter_item_long < min_idx) {
+				min_idx = iter_item_long;
+			}
+			phases[iter_item_long] = marker;
+		}
+		Py_DECREF(iter);
+
+		return min_idx;
+	} else {
+		return index_length(self);
+	}
+}
+
+static inline void set_phase_from_parents(char *phases, int parent_1,
+                                          int parent_2, int i )
+{
+	if ((parent_1 >= 0 && phases[parent_1] > phases[i])) {
+		phases[i] = phases[parent_1];
+	}
+	if ((parent_2 >= 0 && phases[parent_2] > phases[i])) {
+		phases[i] = phases[parent_2];
+	}
+}
+
+static PyObject *compute_phases(indexObject *self, PyObject *args)
+{
+	char *phases = NULL;
+	PyObject **phasemarkers = NULL;
+	PyObject *roots = Py_None;
+
+	/* Argument validation */
+	if (!PyArg_ParseTuple(args, "O", &roots)) {
+		goto bail;
+	}
+	if (roots == NULL || !PyList_Check(roots)) {
+		goto bail;
+	}
+
+	Py_ssize_t addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
+	Py_ssize_t len = index_length(self) - 1;
+	phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
+
+	/* Put the phase information of all the roots in phases */
+	Py_ssize_t numphase = PyList_Size(roots)+1;
+	Py_ssize_t minrevallphases = len + 1;
+	Py_ssize_t i = 0;
+	for (i = 0; i < numphase-1; i++) {
+		PyObject *phaseroots = PyList_GetItem(roots, i);
+
+		if (!PyList_Check(phaseroots)) {
+			Py_XDECREF(phaseroots);
+			goto bail;
+		}
+
+		Py_ssize_t minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
+		/* Error from add_roots_get_min */
+		if (minrevphase == -2 ) {
+			Py_XDECREF(phaseroots);
+			goto bail;
+		}
+		minrevallphases = MIN(minrevallphases, minrevphase);
+	}
+
+	/* Propagate the phase information from the roots to the revs */
+	if (minrevallphases != -1) {
+		for (i = minrevallphases; i < self->raw_length; i++) {
+			const char *data = index_deref(self, i);
+			set_phase_from_parents(phases, getbe32(data+24), getbe32(data+28), i);
+		}
+
+		for (i = 0; i < addlen; i++) {
+			PyObject *rev = PyList_GET_ITEM(self->added, i);
+			PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
+			PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
+			long parent_1, parent_2;
+
+			if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
+				PyErr_SetString(PyExc_TypeError,
+						"revlog parents are invalid");
+				goto bail;
+			}
+			parent_1 = PyInt_AS_LONG(p1);
+			parent_2 = PyInt_AS_LONG(p2);
+			set_phase_from_parents(phases, parent_1, parent_2, i+self->raw_length);
+		}
+	}
+
+	/* Build list of markers to avoid creating python objects repeatedly */
+	phasemarkers = calloc(numphase, sizeof(PyObject *));
+	if (phasemarkers == NULL) {
+		goto bail;
+	}
+	for (i = 0; i < numphase; i++) {
+		phasemarkers[i] = PyInt_FromLong(i);
+	}
+
+	/* Transform phase list to a python list */
+	PyObject *phaseslist = PyList_New(len);
+	Py_INCREF(phaseslist);
+	if (phaseslist == NULL) {
+		Py_XDECREF(phaseslist);
+		goto bail;
+	}
+	for (i = 0; i < len; i++) {
+		PyList_SET_ITEM(phaseslist, i, phasemarkers[(int)phases[i]]);
+	}
+
+	/* Cleanup and return */
+	free(phases);
+	for (i = 0; i < numphase; i++) {
+		Py_XDECREF(phasemarkers[i]);
+	}
+	free(phasemarkers);
+	return phaseslist;
+
+bail:
+	if (phases != NULL) {
+		free(phases);
+	}
+	return Py_None;
+}
+
 static PyObject *index_headrevs(indexObject *self, PyObject *args)
 {
 	Py_ssize_t i, len, addlen;
@@ -2043,6 +2181,8 @@
 	 "clear the index caches"},
 	{"get", (PyCFunction)index_m_get, METH_VARARGS,
 	 "get an index entry"},
+	{"computephases", compute_phases, METH_VARARGS,
+		"compute phases"},
 	{"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
 	 "get head revisions"}, /* Can do filtering since 3.2 */
 	{"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,


More information about the Mercurial-devel mailing list