D1769: cext: use dedicated type for index entries

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Wed Dec 27 00:36:24 UTC 2017


indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Now that we have a handle on our type to represent revlog index
  entries, let's use it.
  
  This commit essentially consists of porting code from PyTuple to
  PyObject.
  
  As part of porting the code, we now retrieve elements in the entry
  type by named attribute instead of integer index.
  
  Before, PyTuple_* APIs allowed us to retrieve a borrowed reference
  to PyObject elements. We can no longer do this via the PyObject_*
  APIs for a type not implemented in C. This required extra refcount
  manipulation in various places.
  
  The C extension is now emitting the new type. And because various
  code is still accessing index entry elements via __getitem__, we
  see a performance regression in the Firefox repository:
  
  $ hg perfrevset '::tip'
  ! wall 0.672636 comb 0.670000 user 0.650000 sys 0.020000 (best of 15)
  ! wall 0.962372 comb 0.960000 user 0.940000 sys 0.020000 (best of 10)
  
  We will claw back this regression in subsequent commits by accessing
  fields by name instead of index.
  
  The version of the "parsers" C extension has been incremented to
  reflect the backwards incompatible API change.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1769

AFFECTED FILES
  mercurial/cext/parsers.c
  mercurial/cext/revlog.c
  mercurial/policy.py

CHANGE DETAILS

diff --git a/mercurial/policy.py b/mercurial/policy.py
--- a/mercurial/policy.py
+++ b/mercurial/policy.py
@@ -75,7 +75,7 @@
     (r'cext', r'diffhelpers'): 1,
     (r'cext', r'mpatch'): 1,
     (r'cext', r'osutil'): 2,
-    (r'cext', r'parsers'): 4,
+    (r'cext', r'parsers'): 5,
 }
 
 # map import request to other package or module
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -133,10 +133,26 @@
 				    int *ps, int maxrev)
 {
 	if (rev >= self->length - 1) {
-		PyObject *tuple = PyList_GET_ITEM(self->added,
+		PyObject *value;
+		PyObject *entry = PyList_GET_ITEM(self->added,
 						  rev - self->length + 1);
-		ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
-		ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
+		value = PyObject_GetAttrString(entry, "p1rev");
+		if (!value) {
+			PyErr_SetString(PyExc_ValueError,
+					"could not retrieve p1rev");
+			return -1;
+		}
+		ps[0] = (int)PyInt_AS_LONG(value);
+		Py_DECREF(value);
+
+		value = PyObject_GetAttrString(entry, "p2rev");
+		if (!value) {
+			PyErr_SetString(PyExc_ValueError,
+					"could not retrieve p2rev");
+			return -1;
+		}
+		ps[1] = (int)PyInt_AS_LONG(value);
+		Py_DECREF(value);
 	} else {
 		const char *data = index_deref(self, rev);
 		ps[0] = getbe32(data + 24);
@@ -224,9 +240,9 @@
 	parent_2 = getbe32(data + 28);
 	c_node_id = data + 32;
 
-	entry = Py_BuildValue(index_entry_format, offset_flags, comp_len,
-			      uncomp_len, base_rev, link_rev,
-			      parent_1, parent_2, c_node_id, 20);
+	entry = PyObject_CallFunction(self->entrytype, index_entry_format,
+			    offset_flags, comp_len, uncomp_len, base_rev,
+			    link_rev, parent_1, parent_2, c_node_id, 20);
 
 	if (entry) {
 		PyObject_GC_UnTrack(entry);
@@ -253,10 +269,16 @@
 		return NULL;
 
 	if (pos >= self->length - 1) {
-		PyObject *tuple, *str;
-		tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
-		str = PyTuple_GetItem(tuple, 7);
-		return str ? PyBytes_AS_STRING(str) : NULL;
+		PyObject *entry, *str;
+		char *result;
+		entry = PyList_GET_ITEM(self->added, pos - self->length + 1);
+		str = PyObject_GetAttrString(entry, "node");
+		if (!str) {
+			return NULL;
+		}
+		result = PyBytes_AS_STRING(str);
+		Py_DECREF(str);
+		return result;
 	}
 
 	data = index_deref(self, pos);
@@ -277,21 +299,48 @@
 
 static PyObject *index_insert(indexObject *self, PyObject *args)
 {
-	PyObject *obj;
+	PyObject *obj, *nodeobj, *result;
+	PyObject *tmpobj = NULL;
 	char *node;
 	int index;
 	Py_ssize_t len, nodelen;
 
 	if (!PyArg_ParseTuple(args, "iO", &index, &obj))
 		return NULL;
 
-	if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
-		PyErr_SetString(PyExc_TypeError, "8-tuple required");
-		return NULL;
+	/* Legacy callers may pass tuples. Convert to an index entry until
+	 * API compatibility is dropped. */
+	if (PyTuple_Check(obj)) {
+		tmpobj = PyObject_Call(self->entrytype, obj, NULL);
+		if (!tmpobj) {
+			return NULL;
+		}
+
+		/* We own a reference to tmpobj. So any return after here
+		   needs to decrease its refcount. */
+		obj = tmpobj;
 	}
 
-	if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
-		return NULL;
+	if (!PyObject_TypeCheck(obj, (PyTypeObject *)self->entrytype)) {
+		PyErr_SetString(PyExc_TypeError, "IndexV1Entry type required");
+		result = NULL;
+		goto cleanup;
+	}
+
+	nodeobj = PyObject_GetAttrString(obj, "node");
+	if (!nodeobj) {
+		PyErr_SetString(PyExc_ValueError, "could not retrieve node");
+		result = NULL;
+		goto cleanup;
+	}
+	if (node_check(nodeobj, &node, &nodelen) == -1) {
+		Py_DECREF(nodeobj);
+		result = NULL;
+		goto cleanup;
+	}
+
+	Py_DECREF(nodeobj);
+	nodeobj = NULL;
 
 	len = index_length(self);
 
@@ -301,23 +350,33 @@
 	if (index != len - 1) {
 		PyErr_SetString(PyExc_IndexError,
 				"insert only supported at index -1");
-		return NULL;
+		result = NULL;
+		goto cleanup;
 	}
 
 	if (self->added == NULL) {
 		self->added = PyList_New(0);
-		if (self->added == NULL)
-			return NULL;
+		if (self->added == NULL) {
+			result = NULL;
+			goto cleanup;
+		}
 	}
 
-	if (PyList_Append(self->added, obj) == -1)
-		return NULL;
+	if (PyList_Append(self->added, obj) == -1) {
+		result = NULL;
+		goto cleanup;
+	}
 
 	if (self->nt)
 		nt_insert(self, node, index);
 
 	Py_CLEAR(self->headrevs);
-	Py_RETURN_NONE;
+	result = Py_None;
+	Py_INCREF(Py_None);
+
+cleanup:
+	Py_XDECREF(tmpobj);
+	return result;
 }
 
 static void _index_clearcaches(indexObject *self)
@@ -837,9 +896,19 @@
 	const char *data;
 
 	if (rev >= self->length - 1) {
-		PyObject *tuple = PyList_GET_ITEM(self->added,
+		PyObject *value;
+		int result;
+		PyObject *entry = PyList_GET_ITEM(self->added,
 			rev - self->length + 1);
-		return (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 3));
+		value = PyObject_GetAttrString(entry, "baserev");
+		if (!value) {
+			PyErr_SetString(PyExc_ValueError,
+					"could not obtain baserev");
+			return -2;
+		}
+		result = (int)PyInt_AS_LONG(value);
+		Py_DECREF(value);
+		return result;
 	}
 	else {
 		data = index_deref(self, rev);
@@ -1712,10 +1781,15 @@
 	Py_ssize_t i, len = PyList_GET_SIZE(self->added);
 
 	for (i = start; i < len; i++) {
-		PyObject *tuple = PyList_GET_ITEM(self->added, i);
-		PyObject *node = PyTuple_GET_ITEM(tuple, 7);
+		PyObject *entry = PyList_GET_ITEM(self->added, i);
+		PyObject *node = PyObject_GetAttrString(entry, "node");
+		if (!node) {
+			PyErr_SetString(PyExc_ValueError, "could not get node");
+			return;
+		}
 
 		nt_insert(self, PyBytes_AS_STRING(node), -1);
+		Py_DECREF(node);
 	}
 
 	if (start == 0)
@@ -1895,8 +1969,9 @@
 	}
 
 	/* Initialize before argument-checking to avoid index_dealloc() crash. */
-	self->nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
-					-1, -1, -1, -1, nullid, 20);
+	self->nullentry = PyObject_CallFunction(self->entrytype, "iiiiiiis#",
+						0, 0, 0, -1, -1, -1, -1,
+						nullid, 20);
 	if (!self->nullentry) {
 		return -1;
 	}
diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c
--- a/mercurial/cext/parsers.c
+++ b/mercurial/cext/parsers.c
@@ -710,7 +710,7 @@
 void manifest_module_init(PyObject *mod);
 void revlog_module_init(PyObject *mod);
 
-static const int version = 4;
+static const int version = 5;
 
 static void module_init(PyObject *mod)
 {



To: indygreg, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list