[PATCH 1 of 3 rfc] mpath: split the cpython part from the pure C part

Maciej Fijalkowski fijall at gmail.com
Mon Mar 7 10:43:15 EST 2016


# HG changeset patch
# User fijal
# Date 1456135587 -3600
#      Mon Feb 22 11:06:27 2016 +0100
# Node ID c41ede700e5cf47d7eb44408fd2aa0a55f21c81d
# Parent  41dcd754526612c43b9695df8851557c851828ef
mpath: split the cpython part from the pure C part

The goal of this series of patches is to explore potential for using mercurial
with PyPy. PyPy is known to run some operations (e.g. hg log) faster than
CPython, but some of them have a significant amount of runtime spent in
a C extension, where PyPy uses inefficient pure Python implementation.

The proposal is as follows:
* we'll keep C extensions for CPython (as this is the fastest way to
  run anything there)

For PyPy, one of the strategies will be executed:

* C extensions will be split into two parts, pure-C and CPython C API,
  PyPy will use cffi to call the pure C part
* pure python code will be sped up

For the first patch, I split mpatch.c into a couple of files, on pure C
and CPython C API basis. It also reworks error handling to make it not
use the CPython C API everywhere (the rest of the changes coming)

This is not a commit passing tests, just a request for comments on the general
direction

diff -r 41dcd7545266 -r c41ede700e5c mercurial/bitmanipulation.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/bitmanipulation.h	Mon Feb 22 11:06:27 2016 +0100
@@ -0,0 +1,48 @@
+#pragma once
+
+static inline uint32_t getbe32(const char *c)
+{
+	const unsigned char *d = (const unsigned char *)c;
+
+	return ((d[0] << 24) |
+		(d[1] << 16) |
+		(d[2] << 8) |
+		(d[3]));
+}
+
+static inline int16_t getbeint16(const char *c)
+{
+	const unsigned char *d = (const unsigned char *)c;
+
+	return ((d[0] << 8) |
+		(d[1]));
+}
+
+static inline uint16_t getbeuint16(const char *c)
+{
+	const unsigned char *d = (const unsigned char *)c;
+
+	return ((d[0] << 8) |
+		(d[1]));
+}
+
+static inline void putbe32(uint32_t x, char *c)
+{
+	c[0] = (x >> 24) & 0xff;
+	c[1] = (x >> 16) & 0xff;
+	c[2] = (x >> 8) & 0xff;
+	c[3] = (x) & 0xff;
+}
+
+static inline double getbefloat64(const char *c)
+{
+	const unsigned char *d = (const unsigned char *)c;
+	double ret;
+	int i;
+	uint64_t t = 0;
+	for (i = 0; i < 8; i++) {
+		t = (t<<8) + d[i];
+	}
+	memcpy(&ret, &t, sizeof(t));
+	return ret;
+}
diff -r 41dcd7545266 -r c41ede700e5c mercurial/mpatch.c
--- a/mercurial/mpatch.c	Wed Feb 24 15:55:44 2016 -0600
+++ b/mercurial/mpatch.c	Mon Feb 22 11:06:27 2016 +0100
@@ -20,26 +20,15 @@
  of the GNU General Public License, incorporated herein by reference.
 */
 
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
 #include <stdlib.h>
 #include <string.h>
 
-#include "util.h"
+#include "bitmanipulation.h"
+#include "mpatch.h"
 
-static char mpatch_doc[] = "Efficient binary patching.";
-static PyObject *mpatch_Error;
+char *mpatch_error = NULL;
 
-struct frag {
-	int start, end, len;
-	const char *data;
-};
-
-struct flist {
-	struct frag *base, *head, *tail;
-};
-
-static struct flist *lalloc(Py_ssize_t size)
+static struct flist *lalloc(ssize_t size)
 {
 	struct flist *a = NULL;
 
@@ -56,12 +45,10 @@
 		free(a);
 		a = NULL;
 	}
-	if (!PyErr_Occurred())
-		PyErr_NoMemory();
 	return NULL;
 }
 
-static void lfree(struct flist *a)
+void lfree(struct flist *a)
 {
 	if (a) {
 		free(a->base);
@@ -69,7 +56,7 @@
 	}
 }
 
-static Py_ssize_t lsize(struct flist *a)
+ssize_t lsize(struct flist *a)
 {
 	return a->tail - a->head;
 }
@@ -159,7 +146,7 @@
 
 /* combine hunk lists a and b, while adjusting b for offset changes in a/
    this deletes a and b and returns the resultant list. */
-static struct flist *combine(struct flist *a, struct flist *b)
+struct flist *combine(struct flist *a, struct flist *b)
 {
 	struct flist *c = NULL;
 	struct frag *bh, *ct;
@@ -198,7 +185,7 @@
 }
 
 /* decode a binary patch into a hunk list */
-static struct flist *decode(const char *bin, Py_ssize_t len)
+struct flist *decode(const char *bin, ssize_t len)
 {
 	struct flist *l;
 	struct frag *lt;
@@ -206,8 +193,10 @@
 
 	/* assume worst case size, we won't have many of these lists */
 	l = lalloc(len / 12);
-	if (!l)
-		return NULL;
+	if (!l) {
+            mpatch_error = "no memory";
+            return NULL;
+        }
 
 	lt = l->tail;
 
@@ -223,8 +212,7 @@
 	}
 
 	if (pos != len) {
-		if (!PyErr_Occurred())
-			PyErr_SetString(mpatch_Error, "patch cannot be decoded");
+                mpatch_error = "patch cannot be decoded";
 		lfree(l);
 		return NULL;
 	}
@@ -234,16 +222,14 @@
 }
 
 /* calculate the size of resultant text */
-static Py_ssize_t calcsize(Py_ssize_t len, struct flist *l)
+ssize_t calcsize(ssize_t len, struct flist *l)
 {
-	Py_ssize_t outlen = 0, last = 0;
+	ssize_t outlen = 0, last = 0;
 	struct frag *f = l->head;
 
 	while (f != l->tail) {
 		if (f->start < last || f->end > len) {
-			if (!PyErr_Occurred())
-				PyErr_SetString(mpatch_Error,
-				                "invalid patch");
+                        mpatch_error = "invalid_patch";
 			return -1;
 		}
 		outlen += f->start - last;
@@ -256,7 +242,7 @@
 	return outlen;
 }
 
-static int apply(char *buf, const char *orig, Py_ssize_t len, struct flist *l)
+int apply(char *buf, const char *orig, ssize_t len, struct flist *l)
 {
 	struct frag *f = l->head;
 	int last = 0;
@@ -264,9 +250,7 @@
 
 	while (f != l->tail) {
 		if (f->start < last || f->end > len) {
-			if (!PyErr_Occurred())
-				PyErr_SetString(mpatch_Error,
-				                "invalid patch");
+                        mpatch_error = "invalid patch";
 			return 0;
 		}
 		memcpy(p, orig + last, f->start - last);
@@ -280,84 +264,9 @@
 	return 1;
 }
 
-/* recursively generate a patch of all bins between start and end */
-static struct flist *fold(PyObject *bins, Py_ssize_t start, Py_ssize_t end)
+ssize_t _patchedsize(long orig, char* bin, ssize_t patchlen)
 {
-	Py_ssize_t len, blen;
-	const char *buffer;
-
-	if (start + 1 == end) {
-		/* trivial case, output a decoded list */
-		PyObject *tmp = PyList_GetItem(bins, start);
-		if (!tmp)
-			return NULL;
-		if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
-			return NULL;
-		return decode(buffer, blen);
-	}
-
-	/* divide and conquer, memory management is elsewhere */
-	len = (end - start) / 2;
-	return combine(fold(bins, start, start + len),
-		       fold(bins, start + len, end));
-}
-
-static PyObject *
-patches(PyObject *self, PyObject *args)
-{
-	PyObject *text, *bins, *result;
-	struct flist *patch;
-	const char *in;
-	char *out;
-	Py_ssize_t len, outlen, inlen;
-
-	if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
-		return NULL;
-
-	len = PyList_Size(bins);
-	if (!len) {
-		/* nothing to do */
-		Py_INCREF(text);
-		return text;
-	}
-
-	if (PyObject_AsCharBuffer(text, &in, &inlen))
-		return NULL;
-
-	patch = fold(bins, 0, len);
-	if (!patch)
-		return NULL;
-
-	outlen = calcsize(inlen, patch);
-	if (outlen < 0) {
-		result = NULL;
-		goto cleanup;
-	}
-	result = PyBytes_FromStringAndSize(NULL, outlen);
-	if (!result) {
-		result = NULL;
-		goto cleanup;
-	}
-	out = PyBytes_AsString(result);
-	if (!apply(out, in, inlen, patch)) {
-		Py_DECREF(result);
-		result = NULL;
-	}
-cleanup:
-	lfree(patch);
-	return result;
-}
-
-/* calculate size of a patched file directly */
-static PyObject *
-patchedsize(PyObject *self, PyObject *args)
-{
-	long orig, start, end, len, outlen = 0, last = 0, pos = 0;
-	Py_ssize_t patchlen;
-	char *bin;
-
-	if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
-		return NULL;
+	long start, end, len, outlen = 0, last = 0, pos = 0;
 
 	while (pos >= 0 && pos < patchlen) {
 		start = getbe32(bin + pos);
@@ -372,49 +281,9 @@
 	}
 
 	if (pos != patchlen) {
-		if (!PyErr_Occurred())
-			PyErr_SetString(mpatch_Error, "patch cannot be decoded");
-		return NULL;
+            return -1;
 	}
 
 	outlen += orig - last;
-	return Py_BuildValue("l", outlen);
+        return outlen;
 }
-
-static PyMethodDef methods[] = {
-	{"patches", patches, METH_VARARGS, "apply a series of patches\n"},
-	{"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
-	{NULL, NULL}
-};
-
-#ifdef IS_PY3K
-static struct PyModuleDef mpatch_module = {
-	PyModuleDef_HEAD_INIT,
-	"mpatch",
-	mpatch_doc,
-	-1,
-	methods
-};
-
-PyMODINIT_FUNC PyInit_mpatch(void)
-{
-	PyObject *m;
-
-	m = PyModule_Create(&mpatch_module);
-	if (m == NULL)
-		return NULL;
-
-	mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
-	Py_INCREF(mpatch_Error);
-	PyModule_AddObject(m, "mpatchError", mpatch_Error);
-
-	return m;
-}
-#else
-PyMODINIT_FUNC
-initmpatch(void)
-{
-	Py_InitModule3("mpatch", methods, mpatch_doc);
-	mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
-}
-#endif
diff -r 41dcd7545266 -r c41ede700e5c mercurial/mpatch.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/mpatch.h	Mon Feb 22 11:06:27 2016 +0100
@@ -0,0 +1,19 @@
+#pragma once
+
+struct frag {
+	int start, end, len;
+	const char *data;
+};
+
+struct flist {
+	struct frag *base, *head, *tail;
+};
+
+extern char* mpatch_error;
+
+struct flist *decode(const char *bin, ssize_t len);
+struct flist *combine(struct flist *a, struct flist *b);
+ssize_t calcsize(ssize_t len, struct flist *l);
+void lfree(struct flist *a);
+int apply(char *buf, const char *orig, ssize_t len, struct flist *l);
+ssize_t _patchedsize(long orig, char* bin, ssize_t patchlen);
diff -r 41dcd7545266 -r c41ede700e5c mercurial/mpatch_cpy.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/mpatch_cpy.c	Mon Feb 22 11:06:27 2016 +0100
@@ -0,0 +1,159 @@
+/* CPython C API interface to mpatch.c, an effiecient patch
+   manipulation algorithm */
+
+#define PY_SSIZE_T_CLEAN
+
+#include <Python.h>
+#include "util.h"
+
+static char mpatch_doc[] = "Efficient binary patching.";
+static PyObject *mpatch_Error;
+
+#include "mpatch.h"
+
+/* recursively generate a patch of all bins between start and end */
+static struct flist *fold(PyObject *bins, ssize_t start, ssize_t end)
+{
+	ssize_t len, blen;
+	const char *buffer;
+        struct flist *res;
+
+	if (start + 1 == end) {
+		/* trivial case, output a decoded list */
+		PyObject *tmp = PyList_GetItem(bins, start);
+		if (!tmp)
+			return NULL;
+		if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
+			return NULL;
+		res = decode(buffer, blen);
+                goto finish;
+	}
+
+	/* divide and conquer, memory management is elsewhere */
+	len = (end - start) / 2;
+	res = combine(fold(bins, start, start + len),
+		       fold(bins, start + len, end));
+ finish:
+        if (!res && !PyErr_Occurred()) {
+            if (strncmp(mpatch_error, "no memory", 9) == 0) {
+                mpatch_error = NULL;
+                PyErr_NoMemory();
+            } else {
+                PyErr_SetString(mpatch_Error, mpatch_error);
+                mpatch_error = NULL;
+            }
+        }
+        return res;
+}
+
+static PyObject *
+patches(PyObject *self, PyObject *args)
+{
+	PyObject *text, *bins, *result;
+	struct flist *patch;
+	const char *in;
+	char *out;
+	ssize_t len, outlen, inlen;
+
+	if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
+		return NULL;
+
+	len = PyList_Size(bins);
+	if (!len) {
+		/* nothing to do */
+		Py_INCREF(text);
+		return text;
+	}
+
+	if (PyObject_AsCharBuffer(text, &in, &inlen))
+		return NULL;
+
+	patch = fold(bins, 0, len);
+	if (!patch)
+		return NULL;
+
+	outlen = calcsize(inlen, patch);
+	if (outlen < 0) {
+                if (!PyErr_Occurred())
+                    PyErr_SetString(mpatch_Error, mpatch_error);
+                mpatch_error = NULL;
+                result = NULL;
+                goto cleanup;
+	}
+	result = PyBytes_FromStringAndSize(NULL, outlen);
+	if (!result) {
+		result = NULL;
+		goto cleanup;
+	}
+	out = PyBytes_AsString(result);
+	if (!apply(out, in, inlen, patch)) {
+            if (!PyErr_Occurred())
+                PyErr_SetString(mpatch_Error, mpatch_error);
+            mpatch_error = NULL;
+            Py_DECREF(result);
+            result = NULL;
+	}
+cleanup:
+	lfree(patch);
+	return result;
+}
+
+/* calculate size of a patched file directly */
+static PyObject *
+patchedsize(PyObject *self, PyObject *args)
+{
+        long orig, outlen = 0;
+	ssize_t patchlen;
+	char *bin;
+
+	if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
+		return NULL;
+
+        outlen = _patchedsize(orig, bin, patchlen);
+
+	if (outlen == -1) {
+            if (!PyErr_Occurred())
+                PyErr_SetString(mpatch_Error, "patch cannot be decoded");
+            return NULL;
+	}
+
+	return Py_BuildValue("l", outlen);
+}
+
+static PyMethodDef methods[] = {
+    {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
+    {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
+    {NULL, NULL}
+    };
+
+#ifdef IS_PY3K
+static struct PyModuleDef mpatch_module = {
+    PyModuleDef_HEAD_INIT,
+    "mpatch",
+    mpatch_doc,
+    -1,
+    methods
+    };
+
+PyMODINIT_FUNC PyInit_mpatch(void)
+{
+    PyObject *m;
+
+    m = PyModule_Create(&mpatch_module);
+    if (m == NULL)
+        return NULL;
+
+    mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
+    Py_INCREF(mpatch_Error);
+    PyModule_AddObject(m, "mpatchError", mpatch_Error);
+
+    return m;
+}
+#else
+PyMODINIT_FUNC
+initmpatch(void)
+{
+    Py_InitModule3("mpatch", methods, mpatch_doc);
+    mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
+}
+#endif
diff -r 41dcd7545266 -r c41ede700e5c mercurial/util.h
--- a/mercurial/util.h	Wed Feb 24 15:55:44 2016 -0600
+++ b/mercurial/util.h	Mon Feb 22 11:06:27 2016 +0100
@@ -102,52 +102,7 @@
 extern PyTypeObject dirstateTupleType;
 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateTupleType)
 
-static inline uint32_t getbe32(const char *c)
-{
-	const unsigned char *d = (const unsigned char *)c;
-
-	return ((d[0] << 24) |
-		(d[1] << 16) |
-		(d[2] << 8) |
-		(d[3]));
-}
-
-static inline int16_t getbeint16(const char *c)
-{
-	const unsigned char *d = (const unsigned char *)c;
-
-	return ((d[0] << 8) |
-		(d[1]));
-}
-
-static inline uint16_t getbeuint16(const char *c)
-{
-	const unsigned char *d = (const unsigned char *)c;
-
-	return ((d[0] << 8) |
-		(d[1]));
-}
-
-static inline void putbe32(uint32_t x, char *c)
-{
-	c[0] = (x >> 24) & 0xff;
-	c[1] = (x >> 16) & 0xff;
-	c[2] = (x >> 8) & 0xff;
-	c[3] = (x) & 0xff;
-}
-
-static inline double getbefloat64(const char *c)
-{
-	const unsigned char *d = (const unsigned char *)c;
-	double ret;
-	int i;
-	uint64_t t = 0;
-	for (i = 0; i < 8; i++) {
-		t = (t<<8) + d[i];
-	}
-	memcpy(&ret, &t, sizeof(t));
-	return ret;
-}
+#include "bitmanipulation.h"
 
 /* This should be kept in sync with normcasespecs in encoding.py. */
 enum normcase_spec {
diff -r 41dcd7545266 -r c41ede700e5c setup.py
--- a/setup.py	Wed Feb 24 15:55:44 2016 -0600
+++ b/setup.py	Mon Feb 22 11:06:27 2016 +0100
@@ -554,7 +554,7 @@
               depends=common_depends),
     Extension('mercurial.diffhelpers', ['mercurial/diffhelpers.c'],
               depends=common_depends),
-    Extension('mercurial.mpatch', ['mercurial/mpatch.c'],
+    Extension('mercurial.mpatch', ['mercurial/mpatch.c', 'mercurial/mpatch_cpy.c'],
               depends=common_depends),
     Extension('mercurial.parsers', ['mercurial/dirs.c',
                                     'mercurial/manifest.c',


More information about the Mercurial-devel mailing list