[PATCH 7 of 7] [RFC] parsers: lazy instantiate obs marker values

Tue Mar 14 01:15:49 EDT 2017

# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1489467779 25200
#      Mon Mar 13 22:02:59 2017 -0700
# Node ID e5cdd30f3cb1de9b8cf963a702daf3bdd10d04c9
# Parent  beda0762692f4252b049472b7d653abc5e809151
[RFC] parsers: lazy instantiate obs marker values

Up until now, we parsed obs marker components directly into
Python objects. For each container object, we had the following
Python values

* long for flags
* float for mtime
* long for timezone
* tuple to hold (mtime, timezone)
* bytes for the precursor
* tuple to hold successors
* a bytes for each successor
* tuple to hold parents
* a bytes for each parent
* tuple for metadata
* a tuple for each metadata pair
* a bytes for each metadata key
* a bytes for each metadata value

That's a lot of Python objects. Each one requires memory and CPU
to instantiate. This can add up.

This patch changes the obs marker loading code to lazily
instantiate Python values for each obs marker component.

The parsing code still goes through and looks for buffer overruns.
But instead of converting values to Python objects, it stuffs raw
primitive types away (in the case of static elements) and copies
the buffer containing the variable-width storage (precursor,
successors, parents, and metadata).

The impact on performance to `hg perfloadmarkers` is obvious:

! result: 130347
! wall 0.077677 comb 0.080000 user 0.060000 sys 0.020000 (best of 100)
! wall 0.030492 comb 0.040000 user 0.020000 sys 0.020000 (best of 100)

A downside to this approach is values inside these fields aren't
validated at file load time. Instead, it is deferred to first
access time. I argue that attempting to validate all data at load
time is an anti-pattern for Mercurial. So if this change in behavior
breaks anything, we should change the consumer to allow more
flexible loading behavior.

TESTS FAIL WITH THIS PATCH. THERE ARE LIKELY BUGS. TESTING THIS SERIES
IN READ ONLY MODE IS PROBABLY OK. BUT IF YOU WRITE MARKERS, IT MAY DO
BAD THINGS. YOU HAVE BEEN WARNED.

diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -2683,6 +2683,16 @@ typedef struct {
 	PyObject *metadata;
 	PyObject *time;
 	PyObject *parents;
+
+	double mtime;
+	int16_t tz;
+	uint16_t rawflags;
+	Py_ssize_t hashwidth;
+	/* Beginning of variable length data */
+	char *data;
+	unsigned char successorscount;
+	unsigned char parentscount;
+	unsigned char metadatacount;
 } obsmarker1Object;
 
 PyTypeObject obsmarker1Type;
@@ -2707,14 +2717,11 @@ static obsmarker1Object * Obsmarker1_Fro
 {
 	obsmarker1Object *result = NULL;
 	const char *data = databegin;
+	const char *copydatabegin;
 	const char *meta;
 
-	double mtime;
-	int16_t tz;
-	uint16_t flags;
 	unsigned char nsuccs, nparents, nmetadata;
 	Py_ssize_t hashwidth = 20;
-	PyObject *mtimeo, *tzo;
 	int i;
 
 	if (data + FM1_HEADER_SIZE > dataend) {
@@ -2729,38 +2736,14 @@ static obsmarker1Object * Obsmarker1_Fro
 
 	*msize = getbe32(data);
 	data += 4;
-	mtime = getbefloat64(data);
+	result->mtime = getbefloat64(data);
 	data += 8;
-	tz = getbeint16(data);
-	data += 2;
-	flags = getbeuint16(data);
+	result->tz = getbeint16(data);
 	data += 2;
-
-	result->flags = PyLong_FromUnsignedLong(flags);
-	if (result->flags == NULL) {
-		Py_CLEAR(result);
-		goto bail;
-	}
-
-	result->time = PyTuple_New(2);
-	if (result->time == NULL) {
-		Py_CLEAR(result);
-		goto bail;
-	}
-
-	mtimeo = PyFloat_FromDouble(mtime);
-	tzo = PyLong_FromUnsignedLong(tz * 60);
-	if (!mtimeo || !tzo) {
-		Py_XDECREF(mtimeo);
-		Py_XDECREF(tzo);
-		Py_CLEAR(result);
-		goto bail;
-	}
-
-	PyTuple_SET_ITEM(result->time, 0, mtimeo);
-	PyTuple_SET_ITEM(result->time, 1, tzo);
-
-	if (flags & USING_SHA_256) {
+	result->rawflags = getbeuint16(data);
+	data += 2;
+
+	if (result->rawflags & USING_SHA_256) {
 		hashwidth = 32;
 	}
 
@@ -2768,6 +2751,10 @@ static obsmarker1Object * Obsmarker1_Fro
 	nparents = (unsigned char)(*data++);
 	nmetadata = (unsigned char)(*data++);
 
+	result->successorscount = nsuccs;
+	result->parentscount = nparents;
+	result->metadatacount = nmetadata;
+
 	if (databegin + *msize > dataend) {
 		goto overflow;
 	}
@@ -2777,34 +2764,24 @@ static obsmarker1Object * Obsmarker1_Fro
 		goto overflow;
 	}
 
-	result->precursors = PyBytes_FromStringAndSize(data, hashwidth);
+	copydatabegin = data;
+
+	/* Precursor */
 	data += hashwidth;
-	if (result->precursors == NULL) {
-		Py_CLEAR(result);
-		goto bail;
-	}
 
 	if (data + nsuccs * hashwidth > dataend) {
 		goto overflow;
 	}
-	result->successors = readshas(data, nsuccs, hashwidth);
-	if (result->successors == NULL) {
-		Py_CLEAR(result);
-		goto bail;
-	}
+
 	data += nsuccs * hashwidth;
 
 	if (nparents == 1 || nparents == 2) {
 		if (data + nparents * hashwidth > dataend) {
 			goto overflow;
 		}
-		result->parents = readshas(data, nparents, hashwidth);
-		if (result->parents == NULL) {
-			Py_CLEAR(result);
-			goto bail;
-		}
 		data += nparents * hashwidth;
 	} else {
+		/* This is cheap. So just do it. */
 		result->parents = Py_None;
 		Py_INCREF(Py_None);
 	}
@@ -2812,35 +2789,30 @@ static obsmarker1Object * Obsmarker1_Fro
 	if (data + 2 * nmetadata > dataend) {
 		goto overflow;
 	}
+
 	meta = data + (2 * nmetadata);
-	result->metadata = PyTuple_New(nmetadata);
-	if (result->metadata == NULL) {
-		Py_CLEAR(result);
-		goto bail;
-	}
+
 	for (i = 0; i < nmetadata; i++) {
-		PyObject *tmp, *left = NULL, *right = NULL;
 		Py_ssize_t leftsize = (unsigned char)(*data++);
 		Py_ssize_t rightsize = (unsigned char)(*data++);
 		if (meta + leftsize + rightsize > dataend) {
 			goto overflow;
 		}
-		left = PyBytes_FromStringAndSize(meta, leftsize);
+
 		meta += leftsize;
-		right = PyBytes_FromStringAndSize(meta, rightsize);
 		meta += rightsize;
-		tmp = PyTuple_New(2);
-		if (!left || !right || !tmp) {
-			Py_XDECREF(left);
-			Py_XDECREF(right);
-			Py_XDECREF(tmp);
-			goto bail;
-		}
-		PyTuple_SET_ITEM(tmp, 0, left);
-		PyTuple_SET_ITEM(tmp, 1, right);
-		PyTuple_SET_ITEM(result->metadata, i, tmp);
 	}
 
+	/* Copy variable width data into the result so it is accessible for
+	   lazy reading */
+	result->data = PyMem_Malloc(dataend - copydatabegin);
+	if (result->data == NULL) {
+		Py_CLEAR(result);
+		goto bail;
+	}
+
+	memcpy(result->data, copydatabegin, dataend - copydatabegin);
+
 	goto bail;
 
 overflow:
@@ -2850,6 +2822,58 @@ bail:
 	return result;
 }
 
+static int obsmarker1_populate_metadata(obsmarker1Object *self)
+{
+	char *data = self->data;
+	const char *meta;
+	PyObject *metadata = NULL;
+	int i;
+
+	assert(!self->metadata);
+
+	/* precursor */
+	data += self->hashwidth;
+	data += self->hashwidth * self->successorscount;
+	data += self->hashwidth * self->parentscount;
+
+	meta = data + (2 * self->metadatacount);
+
+	metadata = PyTuple_New(self->metadatacount);
+	if (metadata == NULL) {
+		return -1;
+	}
+
+	for (i = 0; i < self->metadatacount; i++) {
+		PyObject *tmp, *left = NULL, *right = NULL;
+		Py_ssize_t leftsize = (unsigned char)(*data++);
+		Py_ssize_t rightsize = (unsigned char)(*data++);
+
+		/* We already validated for overflow when instance
+		   created. No need to check it here. */
+		left = PyBytes_FromStringAndSize(meta, leftsize);
+		meta += leftsize;
+		right = PyBytes_FromStringAndSize(meta, rightsize);
+		meta += rightsize;
+
+		tmp = PyTuple_New(2);
+		if (!left || !right || !tmp) {
+			Py_XDECREF(left);
+			Py_XDECREF(right);
+			Py_XDECREF(tmp);
+			Py_DECREF(metadata);
+			return -1;
+		}
+
+		PyTuple_SET_ITEM(tmp, 0, left);
+		PyTuple_SET_ITEM(tmp, 1, right);
+		PyTuple_SET_ITEM(metadata, i, tmp);
+	}
+
+	self->metadata = metadata;
+
+	return 0;
+}
+
 static Py_ssize_t obsmarker1_length(obsmarker1Object *self)
 {
 	return 6;
@@ -2859,21 +2883,87 @@ static PyObject *obsmarker1_item(obsmark
 {
 	switch (i) {
 	case 0:
+		if (self->precursors == NULL) {
+			/* Precursors is fixed length at start of data */
+			/* TODO consider a buffer for 0 copy */
+			self->precursors = PyBytes_FromStringAndSize(
+				self->data, self->hashwidth);
+			if (self->precursors == NULL) {
+				return NULL;
+			}
+		}
+
 		Py_INCREF(self->precursors);
 		return self->precursors;
 	case 1:
+		if (self->successors == NULL) {
+			/* Successors follow 1 precursor */
+			self->successors = readshas(self->data + self->hashwidth,
+				self->successorscount, self->hashwidth);
+			if (self->successors == NULL) {
+				return NULL;
+			}
+		}
+
 		Py_INCREF(self->successors);
 		return self->successors;
 	case 2:
+		if (self->flags == NULL) {
+			self->flags = PyLong_FromUnsignedLong(self->rawflags);
+			if (self->flags == NULL) {
+				return NULL;
+			}
+		}
+
 		Py_INCREF(self->flags);
 		return self->flags;
 	case 3:
+		if (self->metadata == NULL) {
+			if (obsmarker1_populate_metadata(self)) {
+				return NULL;
+			}
+		}
+
 		Py_INCREF(self->metadata);
 		return self->metadata;
 	case 4:
+		if (self->time == NULL) {
+			PyObject *tuple, *mtime, *tz;
+
+			tuple = PyTuple_New(2);
+			mtime = PyFloat_FromDouble(self->mtime);
+			tz = PyLong_FromLong(self->tz * 60);
+
+			if (!tuple || !mtime || !tz) {
+				Py_XDECREF(tuple);
+				Py_XDECREF(mtime);
+				Py_XDECREF(tz);
+				return NULL;
+			}
+
+			PyTuple_SET_ITEM(tuple, 0, mtime);
+			PyTuple_SET_ITEM(tuple, 1, tz);
+
+			self->time = tuple;
+		}
+
 		Py_INCREF(self->time);
 		return self->time;
 	case 5:
+		if (self->parents == NULL) {
+			/* Parents are after the successors */
+			char *data = self->data;
+			/* precursor */
+			data += self->hashwidth;
+			data += self->hashwidth * self->successorscount;
+
+			self->parents = readshas(data, self->parentscount,
+				self->hashwidth);
+			if (self->parents == NULL) {
+				return NULL;
+			}
+		}
+
 		Py_INCREF(self->parents);
 		return self->parents;
 	default: