[patch 6/7] Implement nlbuf for optimized manifest operations

Chris Mason mason at suse.com
Mon Aug 22 12:44:40 CDT 2005


# HG changeset patch
# User mason at suse.com
Implement nlbuf for optimized manifest operations

Testing shows that manifest.add is spending a significant percentage of
its time running calcoffsets and doing text = "".join(addlist).  This
patch removes the need for both of these by creating a new type optimized
for the kind of operations manifest.add does.  

The new nlbuf type stores a buffer of text along with an index for the 
start of each line.  Calculating the offset of any given line is done
via simple O(1) pointer math and a zero copy buffer is used to provide
the manifest text in string form.

Time to apply 2751 patches (without psyco, ext3 noatime,data=writeback):

Stock hg: 4m45s real 3m32s user 55s sys
patched:  2m42s real 1m46s user 44s sys
quilt:    2m30s real   45s user 50s sys

(quilt does much more io...)

Some changes to the compression code were required because it was trying
to concatenate a string with buffer returned by nlbuf.text().  The fix also
reduces string duplication in general.

Index: mine/setup.py
===================================================================
--- mine.orig/setup.py	2005-08-22 12:06:22.000000000 -0400
+++ mine/setup.py	2005-08-22 12:11:47.000000000 -0400
@@ -31,6 +31,7 @@ try:
           license='GNU GPL',
           packages=['mercurial'],
           ext_modules=[Extension('mercurial.mpatch', ['mercurial/mpatch.c']),
+                       Extension('mercurial.nlbuf', ['mercurial/nlbuf.c']),
                        Extension('mercurial.bdiff', ['mercurial/bdiff.c'])],
           data_files=[('mercurial/templates',
                        ['templates/map'] +
Index: mine/mercurial/nlbuf.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ mine/mercurial/nlbuf.c	2005-08-22 12:11:47.000000000 -0400
@@ -0,0 +1,568 @@
+#include <Python.h>
+#include "structmember.h"
+
+#define ROUND_UP(x) ((x) + 4096 - ((x) & (4095)))
+typedef struct {
+	PyObject_HEAD char *buf;
+	char **lines;
+	int buf_used;
+	/* size of the buf */
+	int buf_size;
+	/* number of lines actually in the buffer */
+	int lines_used;
+	/* number of slots in the lines array */
+	int lines_size;
+} nlbuf;
+
+static PyObject *__nlbuf_new(char *p, int len);
+
+static void nlbuf_dealloc(nlbuf * self)
+{
+	free(self->lines);
+	free(self->buf);
+	self->ob_type->tp_free((PyObject *) self);
+}
+
+/*
+ * compare our buffer against an arbitrary string
+ */
+PyObject *nlbuf_compare(PyObject * a, PyObject * w)
+{
+	char *wbuf;
+	int wlen;
+	int mlen;
+	int ret;
+	nlbuf *self = (nlbuf *) a;
+	if (PyObject_AsReadBuffer(w, (const void **)&wbuf, &wlen))
+		return PyInt_FromLong(-1);
+	mlen = wlen;
+	if (self->buf_used < wlen)
+		mlen = self->buf_used;
+	ret = memcmp(self->buf, wbuf, mlen);
+	if (ret == 0) {
+		if (wlen < self->buf_used)
+			return PyInt_FromLong(1);
+		if (wlen > self->buf_used)
+			return PyInt_FromLong(-1);
+		return PyInt_FromLong(0);
+	}
+	return PyInt_FromLong(ret);
+}
+
+static PyObject *nlbuf_new(PyTypeObject * type, PyObject * args,
+			   PyObject * kwds)
+{
+	nlbuf *self;
+	self = (nlbuf *) type->tp_alloc(type, 0);
+	if (!self) {
+		abort();
+		return NULL;
+	}
+	self->lines = NULL;
+	self->buf = NULL;
+	self->buf_size = self->buf_used = self->lines_used =
+	    self->lines_size = 0;
+	return (PyObject *) self;
+}
+
+/*
+ * fill in the lines array
+ */
+static void reindex(nlbuf * self)
+{
+	char *p;
+	self->lines[0] = self->buf;
+	self->lines_used = 0;
+	for (p = self->buf; p < self->buf + self->buf_used; p++) {
+		if (*p == '\n') {
+			self->lines_used++;
+			if (self->lines_used >= self->lines_size) {
+				int len = (self->lines_size + 
+					   self->lines_size / 4 + 16) * sizeof(char **);
+				len = ROUND_UP(len);
+				self->lines = realloc(self->lines, len);
+				self->lines_size = len / sizeof(char **);
+			}
+			self->lines[self->lines_used] = p + 1;
+		} else if (p == self->buf + self->buf_used - 1) {
+			self->lines_used++;
+		}
+	}
+	self->lines[self->lines_used] = p;
+}
+
+static int __nlbuf_init(nlbuf * self, char *p, int len)
+{
+	self->buf_used = len;
+	self->buf_size = ROUND_UP(self->buf_used);
+	self->buf = malloc(self->buf_size);
+	if (!self->buf)
+		return -1;
+	memcpy(self->buf, p, self->buf_used);
+	self->lines_size = self->buf_used / 64 + 16;
+	self->lines = malloc((self->lines_size + 1) * sizeof(char **));
+	if (!self->lines) {
+		free(self->buf);
+		self->buf = NULL;
+		return -1;
+	}
+	reindex(self);
+	return 0;
+}
+
+static int nlbuf_init(nlbuf * self, PyObject * args, PyObject * kwds)
+{
+	static char *kwlist[] = { "text" };
+	PyObject *b;
+	char *p;
+	int len;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwds, "|S", kwlist, &b))
+		return -1;
+
+	free(self->lines);
+	free(self->buf);
+	self->lines = NULL;
+	self->buf = NULL;
+	self->buf_used = self->buf_size = self->lines_used =
+	    self->lines_size = 0;
+
+	PyString_AsStringAndSize(b, &p, &len);
+	return __nlbuf_init(self, p, len);
+}
+
+/*
+ * returns the number of lines in the buffer
+ */
+static int nlbuf_seq_len(PyObject * a)
+{
+	nlbuf *self = (nlbuf *) a;
+	return self->lines_used;
+}
+
+static PyObject *indexerr = NULL;
+static PyObject *nlbuf_get_item(PyObject * a, int i)
+{
+	nlbuf *self = (nlbuf *) a;
+	char *p;
+	int len;
+	if (i < 0 || i >= self->lines_used) {
+		if (indexerr == NULL)
+			indexerr =
+			    PyString_FromString("list index out of range");
+		PyErr_SetObject(PyExc_IndexError, indexerr);
+		return NULL;
+	}
+	p = self->lines[i];
+	len = self->lines[i + 1] - p;
+	return PyString_FromStringAndSize(p, len);
+}
+
+/*
+ * gets the string for a given line, ignoring everything after
+ * the first null.
+ */
+static PyObject *nlbuf_get_str(PyObject * a, PyObject * args)
+{
+	long offset;
+	nlbuf *self = (nlbuf *) a;
+	char *p;
+
+	if (!PyArg_ParseTuple(args, "l", &offset)) {
+		return NULL;
+	}
+	if (offset < 0 || offset >= self->lines_used) {
+		if (indexerr == NULL)
+			indexerr =
+			    PyString_FromString("offset index out of range");
+		PyErr_SetObject(PyExc_IndexError, indexerr);
+		return NULL;
+	}
+	p = self->lines[offset];
+	return PyString_FromString(p);
+}
+
+static PyObject *nlbuf_slice(PyObject * a, int ilow, int ihigh)
+{
+	nlbuf *c;
+	nlbuf *self = (nlbuf *) a;
+	int len;
+	char *p;
+	if (ilow < 0)
+		ilow = 0;
+	else if (ilow > self->lines_used)
+		ilow = self->lines_used;
+	if (ihigh < ilow)
+		ihigh = ilow;
+	else if (ihigh > self->lines_used)
+		ihigh = self->lines_used;
+
+	if (ilow >= self->lines_used) {
+		p = "";
+		len = 0;
+	} else {
+		p = self->lines[ilow];
+		len = self->lines[ihigh] - p;
+	}
+	c = (nlbuf *) __nlbuf_new(p, len);
+	if (!c)
+		abort();
+
+	return (PyObject *) c;
+}
+/*
+ * here we have all the dirty work for assigning to a slice of the buffer.
+ * This resizes both the text buffer and lines array based on the changes being
+ * made.  Consider this python statement:
+ * 
+ * nlbuf[ilow:ihigh] = [ some array ]
+ *
+ * ilow and ihigh -- line indexes for the start and end in the current nlbuf
+ * size_diff -- difference in text size between our current buffer and 'some array'
+ * d -- difference in the number of lines between our current buffer and 'some array'
+ *
+ * Other than resizing things and shifting memory around, no changes are made
+ * to the nlbuf.  The caller must insert his new values himself
+ */
+static int resize_buffer(nlbuf * a, int ilow, int ihigh, int size_diff, int d)
+{
+	char *src;
+	char *dest;
+	char *old_low;
+	int i;
+
+	/* do we need to extend the buffer? */
+	if (size_diff > 0 && a->buf_used + size_diff + 1 > a->buf_size) {
+		int new_size = ROUND_UP(a->buf_size + size_diff + 1);
+		a->buf = realloc(a->buf, new_size);
+		if (!a->buf)
+			return -1;
+		a->buf_size = new_size;
+		reindex(a);
+	}
+	/* do we need to extend the lines array? */
+	if (d > 0 && a->lines_used + d > a->lines_size) {
+		int len = (a->lines_size + d + 1) * sizeof(char **);
+		len = ROUND_UP(len);
+		a->lines = realloc(a->lines, len);
+		a->lines_size = len / sizeof(char **);
+	}
+	if (ihigh < a->lines_used) {
+		/* when ihigh and ilow are the same, we need to preserve ilow */
+		old_low = a->lines[ilow];
+		if (size_diff) {
+			dest = a->lines[ihigh] + size_diff;
+			src = a->lines[ihigh];
+			memmove(dest, src, a->buf + a->buf_used - src);
+			for (i = ihigh; i <= a->lines_used; i++) {
+				a->lines[i] += size_diff;
+			}
+		}
+		if (d) {
+			dest = (char *)(a->lines + ihigh + d);
+			src = (char *)(a->lines + ihigh);
+			memmove(dest, src,
+				(a->lines_used - ihigh + 1) * sizeof(char **));
+		}
+		a->lines[ilow] = old_low;
+	} else if (size_diff > 0) {
+		/* we're adding onto the end, make sure to put a new line there */
+		if (a->buf_used && a->buf[a->buf_used - 1] != '\n') {
+			a->buf[a->buf_used] = '\n';
+			a->buf_used++;
+		}
+		a->lines[ihigh] = a->buf + a->buf_used;
+	}
+	a->buf_used += size_diff;
+	a->lines_used += d;
+	return 0;
+}
+
+/* a[ilow:ihigh] = v if v != NULL.
+ * del a[ilow:ihigh] if v == NULL.
+ */
+static int nlbuf_ass_slice(PyObject * aa, int ilow, int ihigh, PyObject * v)
+{
+	nlbuf *a = (nlbuf *) aa;
+	nlbuf *b = (nlbuf *) v;
+	PyObject **vitem = NULL;
+	PyObject *v_as_SF = NULL;	/* PySequence_Fast(v) */
+	int n;			/* # of elements in replacement list */
+	int norig;		/* # of elements in list getting replaced */
+	int d;			/* Change in number of lines */
+	int k;
+	int result = -1;	/* guilty until proved innocent */
+	int total_vsize;        /* total size of data in v */
+	char *dest;
+	int size_diff = 0;	/* change in buffer size */
+
+	if (v == NULL)
+		n = 0;
+	else {
+		if (a == b) {
+			/* Special case "a[i:j] = a" -- copy b first */
+			v = nlbuf_slice((PyObject *) b, 0, b->lines_used);
+			if (v == NULL) {
+				abort();
+				return result;
+			}
+			result = nlbuf_ass_slice((PyObject *) a, ilow,
+						 ihigh, v);
+			Py_DECREF(v);
+			if (!result)
+				abort();
+			return result;
+		}
+		v_as_SF = PySequence_Fast(v, "can only assign an iterable");
+		if (v_as_SF == NULL)
+			goto Error;
+		n = PySequence_Fast_GET_SIZE(v_as_SF);
+		vitem = PySequence_Fast_ITEMS(v_as_SF);
+	}
+	if (ilow < 0)
+		ilow = 0;
+	else if (ilow > a->lines_used)
+		ilow = a->lines_used;
+
+	if (ihigh < ilow)
+		ihigh = ilow;
+	else if (ihigh > a->lines_used)
+		ihigh = a->lines_used;
+
+	norig = ihigh - ilow;
+	assert(norig >= 0);
+	d = n - norig;
+	if (a->lines_used + d == 0) {
+		Py_XDECREF(v_as_SF);
+		a->lines_used = 0;
+		a->buf_used = 0;
+		return 0;
+	}
+	total_vsize = 0;
+	for (k = 0; k < n; k++) {
+		int ret;
+		PyObject *w = vitem[k];
+		ret = PyString_Size(w);
+		if (ret < 0)
+			return result;
+		total_vsize += ret;
+	}
+	if (ihigh > ilow)
+		size_diff = total_vsize - (a->lines[ihigh] - a->lines[ilow]);
+	else
+		size_diff = total_vsize;
+
+	if (resize_buffer(a, ilow, ihigh, size_diff, d))
+		return result;
+
+	dest = a->lines[ilow];
+	for (k = 0; k < n; k++, ilow++) {
+		int len;
+		char *p;
+		int ret;
+		PyObject *w = vitem[k];
+		ret = PyString_AsStringAndSize(w, &p, &len);
+		if (ret < 0)
+			return -1;
+		memcpy(dest, p, len);
+		a->lines[ilow] = dest;
+		dest += len;
+	}
+	if (ilow == a->lines_used) {
+		a->lines[ilow] = dest;
+	}
+
+	result = 0;
+Error:
+	Py_XDECREF(v_as_SF);
+	return result;
+}
+
+static int nlbuf_ass_item(PyObject * aa, int i, PyObject * v)
+{
+	nlbuf *a = (nlbuf *) aa;
+	int len;
+	char *p;
+	int ret;
+	int cur_len;
+	char *curp;
+	if (i < 0 || i >= a->lines_used) {
+		PyErr_SetString(PyExc_IndexError,
+				"list assignment index out of range");
+		return -1;
+	}
+	if (v == NULL)
+		return nlbuf_ass_slice(aa, i, i + 1, v);
+
+	ret = PyString_AsStringAndSize(v, &p, &len);
+	if (ret) {
+		return -1;
+	}
+	curp = a->lines[i];
+	cur_len = a->lines[i + 1] - curp;
+	if (cur_len != len && resize_buffer(a, i, i + 1, len - cur_len, 0))
+		return -1;
+	memcpy(curp, p, len);
+	return 0;
+}
+
+static PySequenceMethods nlbuf_as_sequence = {
+	nlbuf_seq_len,		// inquiry sq_length;
+	NULL,			// binaryfunc sq_concat;
+	NULL,			// intargfunc sq_repeat;
+	nlbuf_get_item,		// intargfunc sq_item;
+	nlbuf_slice,		// intintargfunc sq_slice;
+	nlbuf_ass_item,		// intobjargproc sq_ass_item;
+	nlbuf_ass_slice,	// intintobjargproc sq_ass_slice;
+	// objobjproc sq_contains;
+	/* Added in release 2.0 */
+	// binaryfunc sq_inplace_concat;
+	// intargfunc sq_inplace_repeat;
+};
+
+static PyObject *nlbuf_text(nlbuf * self)
+{
+	return PyBuffer_FromMemory(self->buf, self->buf_used);
+}
+
+/*
+ * search for a given string, returning the same as the python
+ * bisect_right call
+ */
+static PyObject *nlbuf_bisect(nlbuf * self, PyObject * args)
+{
+	char *p;
+	long hi = self->lines_used, lo = 0;
+	long mid;
+	int ret;
+
+	if (!PyArg_ParseTuple(args, "s|ii", &p, &lo, &hi)) {
+		if (indexerr == NULL)
+			indexerr = PyString_FromString("offset index out of range");
+		PyErr_SetObject(PyExc_IndexError, indexerr);
+		return NULL;
+	}
+	while (lo < hi) {
+		mid = (lo + hi) / 2;
+		ret = strcmp(p, self->lines[mid]);
+		if (ret < 0)
+			hi = mid;
+		else
+			lo = mid + 1;
+	}
+	return PyInt_FromLong(lo);
+}
+
+/*
+ * return the byte offset of a given line
+ */
+static PyObject *nlbuf_offset(nlbuf * self, PyObject * args)
+{
+	long offset;
+
+	if (!PyArg_ParseTuple(args, "l", &offset)) {
+		return NULL;
+	}
+	if (offset < 0 || offset > self->lines_used) {
+		if (indexerr == NULL)
+			indexerr =
+			    PyString_FromString("offset index out of range");
+		PyErr_SetObject(PyExc_IndexError, indexerr);
+		return NULL;
+	}
+	offset = self->lines[offset] - self->buf;
+	return PyInt_FromLong(offset);
+}
+
+static PyMethodDef nlbuf_methods[] = {
+	{"text", (PyCFunction) nlbuf_text, METH_NOARGS,
+	 "Return the text"},
+	{"offset", (PyCFunction) nlbuf_offset, METH_VARARGS,
+	 "Return the offset"},
+	{"bisect", (PyCFunction) nlbuf_bisect, METH_VARARGS,
+	 "Return the bisect"},
+	{"get_str", (PyCFunction) nlbuf_get_str, METH_VARARGS,
+	 "Return str"},
+	{"compare_text", (PyCFunction) nlbuf_compare, METH_O,
+	 "compare our text against a buffer"},
+	{NULL}			/* Sentinel */
+};
+
+static PyTypeObject nlbufType = {
+	PyObject_HEAD_INIT(NULL) 0,	/*ob_size */
+	"mercurial.nlbuf",	/*tp_name */
+	sizeof(nlbuf),		/*tp_basicsize */
+	0,			/*tp_itemsize */
+	(destructor) nlbuf_dealloc,	/*tp_dealloc */
+	0,			/*tp_print */
+	0,			/*tp_getattr */
+	0,			/*tp_setattr */
+	0,			/*tp_compare */
+	0,			/*tp_repr */
+	0,			/*tp_as_number */
+	&nlbuf_as_sequence,	/*tp_as_sequence */
+	0,			/*tp_as_mapping */
+	0,			/*tp_hash */
+	0,			/*tp_call */
+	0,			/*tp_str */
+	0,			/*tp_getattro */
+	0,			/*tp_setattro */
+	0,			/*tp_as_buffer */
+	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,	/*tp_flags */
+	"nlbuf objects",	/* tp_doc */
+	0,			/* tp_traverse */
+	0,			/* tp_clear */
+	0,			/* tp_richcompare */
+	0,			/* tp_weaklistoffset */
+	0,			/* tp_iter */
+	0,			/* tp_iternext */
+	nlbuf_methods,		/* tp_methods */
+	0,			/* tp_members */
+	0,			/* tp_getset */
+	0,			/* tp_base */
+	0,			/* tp_dict */
+	0,			/* tp_descr_get */
+	0,			/* tp_descr_set */
+	0,			/* tp_dictoffset */
+	(initproc) nlbuf_init,	/* tp_init */
+	0,			/* tp_alloc */
+	nlbuf_new,		/* tp_new */
+};
+
+static PyMethodDef module_methods[] = {
+	{NULL}			/* Sentinel */
+};
+
+#ifndef PyMODINIT_FUNC		/* declarations for DLL import/export */
+#define PyMODINIT_FUNC void
+#endif
+PyMODINIT_FUNC initnlbuf(void)
+{
+	PyObject *m;
+
+	if (PyType_Ready(&nlbufType) < 0)
+		return;
+
+	m = Py_InitModule3("nlbuf", module_methods, "New line indexed buffer");
+
+	if (m == NULL)
+		return;
+
+	Py_INCREF(&nlbufType);
+	PyModule_AddObject(m, "nlbuf", (PyObject *) & nlbufType);
+}
+
+static PyObject *__nlbuf_new(char *p, int len)
+{
+	nlbuf *nl = (nlbuf *) nlbuf_new(&nlbufType, NULL, NULL);
+	int ret;
+	if (nl) {
+		ret = __nlbuf_init(nl, p, len);
+		if (ret != 0) {
+			Py_XDECREF(nl);
+			abort();
+			return NULL;
+		}
+	}
+	return (PyObject *) nl;
+}
Index: mine/mercurial/hg.py
===================================================================
--- mine.orig/mercurial/hg.py	2005-08-22 12:06:22.000000000 -0400
+++ mine/mercurial/hg.py	2005-08-22 13:30:05.000000000 -0400
@@ -10,7 +10,7 @@ import util
 from revlog import *
 from demandload import *
 demandload(globals(), "re lock urllib urllib2 transaction time socket")
-demandload(globals(), "tempfile httprangereader bdiff urlparse")
+demandload(globals(), "tempfile httprangereader bdiff urlparse nlbuf")
 demandload(globals(), "bisect errno select stat weakref")
 
 class filelog(revlog):
@@ -114,9 +114,10 @@ class manifest(revlog):
         text = self.revision(node)
         map = {}
         flag = {}
-        self.listcache = (text, text.splitlines(1))
-        for l in self.listcache[1]:
-            (f, n) = l.split('\0')
+        self.listcache = nlbuf.nlbuf(text)
+        for l in self.listcache:
+            s = str(l)
+            (f, n) = s.split('\0')
             map[f] = bin(n[:40])
             flag[f] = (n[40:-1] == "x")
         self.mapcache = (node, map, flag)
@@ -130,8 +131,9 @@ class manifest(revlog):
 
     def diff(self, a, b):
         # this is sneaky, as we're not actually using a and b
-        if self.listcache and self.addlist and self.listcache[0] == a:
-            d = mdiff.diff(self.listcache[1], self.addlist, 1)
+        if self.listcache and self.addlist and \
+           self.listcache.compare_text(a) == 0:
+            d = mdiff.diff(self.listcache, self.addlist, 1)
             if mdiff.patch(a, d) != b:
                 sys.stderr.write("*** sortdiff failed, falling back ***\n")
                 return mdiff.textdiff(a, b)
@@ -178,19 +180,6 @@ class manifest(revlog):
                     del addlist[start:end]
             return addlist
 
-        # calculate the byte offset of the start of each line in the
-        # manifest
-        def calcoffsets(addlist):
-            offsets = [0] * (len(addlist) + 1)
-            offset = 0
-            i = 0
-            while i < len(addlist):
-                offsets[i] = offset
-                offset += len(addlist[i])
-                i += 1
-            offsets[i] = offset
-            return offsets
-
         # if we're using the listcache, make sure it is valid and
         # parented by the same node we're diffing against
         if not changed or not self.listcache or not p1 or \
@@ -198,15 +187,14 @@ class manifest(revlog):
             files = map.keys()
             files.sort()
 
-            self.addlist = ["%s\000%s%s\n" %
+            addlist = ["%s\000%s%s\n" %
                             (f, hex(map[f]), flags[f] and "x" or '')
                             for f in files]
             cachedelta = None
+            self.listcache = nlbuf.nlbuf("".join(addlist))
+            self.addlist = self.listcache
         else:
-            addlist = self.listcache[1]
-
-            # find the starting offset for each line in the add list
-            offsets = calcoffsets(addlist)
+            addlist = self.listcache
 
             # combine the changed lists into one list for sorting
             work = [[x, 0] for x in changed[0]]
@@ -218,10 +206,10 @@ class manifest(revlog):
 
             for w in work:
                 f = w[0]
-                # bs will either be the index of the item or the insert point
-                bs = bisect.bisect(addlist, f, bs)
-                if bs < len(addlist):
-                    fn = addlist[bs][:addlist[bs].index('\0')]
+                # bs will either be the index of the item + 1 or the insert point
+                bs = addlist.bisect(f, bs)
+                if bs <= len(addlist) and bs > 0:
+                    fn = addlist.get_str(bs-1)
                 else:
                     fn = None
                 if w[1] == 0:
@@ -229,9 +217,9 @@ class manifest(revlog):
                                           flags[f] and "x" or '')
                 else:
                     l = None
-                start = bs
                 if fn != f:
                     # item not found, insert a new one
+                    start = bs
                     end = bs
                     if w[1] == 1:
                         sys.stderr.write("failed to remove %s from manifest\n"
@@ -239,8 +227,9 @@ class manifest(revlog):
                         sys.exit(1)
                 else:
                     # item is found, replace/delete the existing line
-                    end = bs + 1
-                delta.append([start, end, offsets[start], offsets[end], l])
+                    start = bs - 1
+                    end = bs
+                delta.append((start, end, addlist.offset(start), addlist.offset(end), l))
 
             self.addlist = addlistdelta(addlist, delta)
             if self.mapcache[0] == self.tip():
@@ -248,13 +237,9 @@ class manifest(revlog):
             else:
                 cachedelta = None
 
-        text = "".join(self.addlist)
-        if cachedelta and mdiff.patch(self.listcache[0], cachedelta) != text:
-            sys.stderr.write("manifest delta failure\n")
-            sys.exit(1)
+        text = self.addlist.text()
         n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
         self.mapcache = (n, map, flags)
-        self.listcache = (text, self.addlist)
         self.addlist = None
 
         return n
@@ -452,10 +437,11 @@ class dirstate:
         for x in files:
             if x is '.':
                 return self.map.copy()
-            if x not in self.map:
+            s = self.map.get(x, False)
+            if not s:
                 unknown.append(x)
             else:
-                ret[x] = self.map[x]
+                ret[x] = s
 
         if not unknown:
             return ret
Index: mine/mercurial/revlog.py
===================================================================
--- mine.orig/mercurial/revlog.py	2005-08-22 12:06:22.000000000 -0400
+++ mine/mercurial/revlog.py	2005-08-22 12:11:47.000000000 -0400
@@ -16,15 +16,15 @@ def bin(node): return binascii.unhexlify
 def short(node): return hex(node[:6])
 
 def compress(text):
-    if not text: return text
+    if not text: return ("", text)
     if len(text) < 44:
-        if text[0] == '\0': return text
-        return 'u' + text
+        if text[0] == '\0': return ("", text)
+        return ('u', text)
     bin = zlib.compress(text)
     if len(bin) > len(text):
-        if text[0] == '\0': return text
-        return 'u' + text
-    return bin
+        if text[0] == '\0': return ("", text)
+        return ('u', text)
+    return ("", bin)
 
 def decompress(bin):
     if not bin: return bin
@@ -324,14 +324,16 @@ class revlog:
             end = self.end(t)
             if not d:
                 prev = self.revision(self.tip())
-                d = self.diff(prev, text)
+                d = self.diff(prev, str(text))
             data = compress(d)
-            dist = end - start + len(data)
+            l = len(data[1]) + len(data[0])
+            dist = end - start + l
 
         # full versions are inserted when the needed deltas
         # become comparable to the uncompressed text
         if not n or dist > len(text) * 2:
             data = compress(text)
+            l = len(data[1]) + len(data[0])
             base = n
         else:
             base = self.base(t)
@@ -340,14 +342,17 @@ class revlog:
         if t >= 0:
             offset = self.end(t)
 
-        e = (offset, len(data), base, link, p1, p2, node)
+        e = (offset, l, base, link, p1, p2, node)
 
         self.index.append(e)
         self.nodemap[node] = n
         entry = struct.pack(indexformat, *e)
 
         transaction.add(self.datafile, e[0])
-        self.opener(self.datafile, "a").write(data)
+        f = self.opener(self.datafile, "a")
+        if data[0]:
+            f.write(data[0])
+        f.write(data[1])
         transaction.add(self.indexfile, n * len(entry))
         self.opener(self.indexfile, "a").write(entry)
 
@@ -570,7 +575,8 @@ class revlog:
             # current size.
 
             if chain == prev:
-                cdelta = compress(delta)
+                tempd = compress(delta)
+                cdelta = tempd[0] + tempd[1]
 
             if chain != prev or (end - start + len(cdelta)) > measure * 2:
                 # flush our writes here so we can read it in revision

--


More information about the Mercurial mailing list