[PATCH 1 of 2] pathencode: new C module with fast encodedir() function

Adrian Buehlmann adrian at cadifra.com
Tue Sep 18 05:02:17 CDT 2012


# HG changeset patch
# User Adrian Buehlmann <adrian at cadifra.com>
# Date 1347961410 -7200
# Node ID b62927686fa75898548e1c6e80e70dd825db5853
# Parent  663098a577fdfc30833dbaa9dca28f15854ec265
pathencode: new C module with fast encodedir() function

Not yet used (will be enabled in a later patch).

This patch is a stripped down version of patches originally created by
Bryan O'Sullivan <bryano at fb.com>

diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -1506,11 +1506,14 @@
 
 static char parsers_doc[] = "Efficient content parsing.";
 
+PyObject *encodedir(PyObject *self, PyObject *args);
+
 static PyMethodDef methods[] = {
 	{"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
 	{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
 	{"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
 	{"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
+	{"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
 	{NULL, NULL}
 };
 
diff --git a/mercurial/pathencode.c b/mercurial/pathencode.c
new file mode 100644
--- /dev/null
+++ b/mercurial/pathencode.c
@@ -0,0 +1,125 @@
+/*
+ pathencode.c - efficient path name encoding
+
+ Copyright 2012 Facebook
+
+ This software may be used and distributed according to the terms of
+ the GNU General Public License, incorporated herein by reference.
+*/
+
+#include <Python.h>
+#include <assert.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "util.h"
+
+/* state machine for dir-encoding */
+enum dir_state {
+	DDOT,
+	DH,
+	DHGDI,
+	DDEFAULT,
+};
+
+static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
+                            char c)
+{
+	if (dest) {
+		assert(*destlen < destsize);
+		dest[*destlen] = c;
+	}
+	(*destlen)++;
+}
+
+static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
+                           const void *src, Py_ssize_t len)
+{
+	if (dest) {
+		assert(*destlen + len < destsize);
+		memcpy((void *)&dest[*destlen], src, len);
+	}
+	*destlen += len;
+}
+
+static Py_ssize_t _encodedir(char *dest, size_t destsize,
+                             const char *src, Py_ssize_t len)
+{
+	enum dir_state state = DDEFAULT;
+	Py_ssize_t i = 0, destlen = 0;
+
+	while (i < len) {
+		switch (state) {
+		case DDOT:
+			switch (src[i]) {
+			case 'd':
+			case 'i':
+				state = DHGDI;
+				charcopy(dest, &destlen, destsize, src[i++]);
+				break;
+			case 'h':
+				state = DH;
+				charcopy(dest, &destlen, destsize, src[i++]);
+				break;
+			default:
+				state = DDEFAULT;
+				break;
+			}
+			break;
+		case DH:
+			if (src[i] == 'g') {
+				state = DHGDI;
+				charcopy(dest, &destlen, destsize, src[i++]);
+			}
+			else state = DDEFAULT;
+			break;
+		case DHGDI:
+			if (src[i] == '/') {
+				memcopy(dest, &destlen, destsize, ".hg", 3);
+				charcopy(dest, &destlen, destsize, src[i++]);
+			}
+			state = DDEFAULT;
+			break;
+		case DDEFAULT:
+			if (src[i] == '.')
+				state = DDOT;
+			charcopy(dest, &destlen, destsize, src[i++]);
+			break;
+		}
+	}
+
+	return destlen;
+}
+
+PyObject *encodedir(PyObject *self, PyObject *args)
+{
+	Py_ssize_t len, newlen;
+	PyObject *pathobj, *newobj;
+	char *path;
+
+	if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
+		return NULL;
+
+	if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
+		PyErr_SetString(PyExc_TypeError, "expected a string");
+		return NULL;
+	}
+
+	newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
+
+	if (newlen == len + 1) {
+		Py_INCREF(pathobj);
+		return pathobj;
+	}
+
+	newobj = PyString_FromStringAndSize(NULL, newlen);
+
+	if (newobj) {
+		PyString_GET_SIZE(newobj)--;
+		_encodedir(PyString_AS_STRING(newobj), newlen, path,
+			   len + 1);
+	}
+
+	return newobj;
+}
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -421,7 +421,8 @@
     Extension('mercurial.bdiff', ['mercurial/bdiff.c']),
     Extension('mercurial.diffhelpers', ['mercurial/diffhelpers.c']),
     Extension('mercurial.mpatch', ['mercurial/mpatch.c']),
-    Extension('mercurial.parsers', ['mercurial/parsers.c']),
+    Extension('mercurial.parsers', ['mercurial/parsers.c',
+                                    'mercurial/pathencode.c']),
     ]
 
 osutil_ldflags = []


More information about the Mercurial-devel mailing list