[PATCH 1 of 4 lazy-manifest] manifest.c: new extension code to lazily parse manifests
Augie Fackler
raf at durin42.com
Thu Jan 8 20:34:58 UTC 2015
# HG changeset patch
# User Augie Fackler <augie at google.com>
# Date 1420748959 18000
# Thu Jan 08 15:29:19 2015 -0500
# Node ID 788bb2dacad048f161dfed583d3a95f708e91d1e
# Parent 7ad155e13f0f51df8e986a0ec4e58ac9a0ccedbb
manifest.c: new extension code to lazily parse manifests
This lets us iterate manifests in order, but do a _lot_ less work in
the common case when we only care about a few manifest entries.
Many thanks to Mike Edgar for reviewing this in advance of it going
out to the list, which caught many things I missed.
diff --git a/mercurial/manifest.c b/mercurial/manifest.c
new file mode 100644
--- /dev/null
+++ b/mercurial/manifest.c
@@ -0,0 +1,846 @@
+/*
+ * manifest.c - manifest type that does on-demand parsing.
+ */
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <Python.h>
+
+/* yay c89 */
+#define true 1
+#define false 0
+typedef unsigned char bool;
+
+const int kDefaultLinesSize = 100000;
+
+typedef struct {
+ char *start;
+ Py_ssize_t len; /* length of line not including terminal newline */
+ char hash_suffix;
+ bool hash_none;
+ bool from_malloc;
+ bool deleted;
+} line;
+
+typedef struct {
+ PyObject_HEAD PyObject *pydata;
+ line *lines;
+ int numlines;
+ int livelines;
+ int maxlines;
+ bool dirty;
+} lazymanifest;
+
+#define MANIFEST_OOM -1
+#define MANIFEST_NOT_SORTED -2
+#define MANIFEST_MALFORMED -3
+
+static int realloc_if_needed(lazymanifest * self)
+{
+ if (self->numlines == self->maxlines) {
+ self->maxlines *= 2;
+ self->lines = realloc(
+ self->lines, self->maxlines * sizeof(line));
+ }
+ return self->lines != NULL;
+}
+
+static int find_lines(lazymanifest * self, char *data, Py_ssize_t len)
+{
+ char *prev = NULL;
+ while (data) {
+ char *point = memchr(data, '\n', len);
+ if (point == NULL) {
+ if (prev == NULL) {
+ return 0;
+ } else {
+ return MANIFEST_MALFORMED;
+ }
+ }
+ if (point) {
+ point++; /* advance past newline */
+ if (!realloc_if_needed(self)) {
+ return MANIFEST_OOM; /* no memory */
+ }
+ if (prev != NULL && strcmp(prev, data) > -1) {
+ /* This data isn't sorted, so we have to abort. */
+ return MANIFEST_NOT_SORTED;
+ }
+ prev = data;
+ line *l = self->lines + ((self->numlines)++);
+ l->start = data;
+ l->len = point - data;
+ l->hash_suffix = '\0';
+ l->hash_none = false;
+ l->from_malloc = false;
+ l->deleted = false;
+ if (*point == '\0') {
+ break;
+ } else {
+ len = len - l->len;
+ data = point;
+ }
+ }
+ }
+ self->livelines = self->numlines;
+ return 0;
+}
+
+static int lzm_init_real(lazymanifest * self, PyObject * pydata) {
+ self->dirty = false;
+ char *data;
+ Py_ssize_t len;
+ int err = PyString_AsStringAndSize(pydata, &data, &len);
+ if (err == -1)
+ return -1;
+ self->pydata = pydata;
+ Py_INCREF(self->pydata);
+ int ret;
+ Py_BEGIN_ALLOW_THREADS
+ self->lines = (line *)malloc(kDefaultLinesSize * sizeof(line));
+ self->maxlines = kDefaultLinesSize;
+ self->numlines = 0;
+ if (self->lines == NULL)
+ ret = -2;
+ else
+ ret = find_lines(self, data, len);
+ Py_END_ALLOW_THREADS switch (ret) {
+ case 0:
+ break;
+ case MANIFEST_OOM:
+ PyErr_NoMemory();
+ break;
+ case MANIFEST_NOT_SORTED:
+ PyErr_Format(PyExc_ValueError,
+ "Manifest lines not in sorted order.");
+ break;
+ case MANIFEST_MALFORMED:
+ PyErr_Format(PyExc_ValueError,
+ "Manifest did not end in a newline.");
+ break;
+ default:
+ PyErr_Format(PyExc_ValueError,
+ "Unknown problem parsing manifest.");
+ }
+ return ret == 0 ? 0 : -1;
+}
+
+static int lzm_init(lazymanifest * self, PyObject * args)
+{
+ PyObject *pydata;
+ if (!PyArg_ParseTuple(args, "S", &pydata)) {
+ return -1;
+ }
+ return lzm_init_real(self, pydata);
+}
+
+static void lzm_dealloc(lazymanifest * self)
+{
+ /* free any extra lines we had to allocate */
+ int i;
+ for (i = 0; i < self->numlines; i++) {
+ if (self->lines[i].from_malloc) {
+ free(self->lines[i].start);
+ }
+ }
+ if (self->lines != NULL) {
+ free(self->lines);
+ self->lines = NULL;
+ }
+ if (self->pydata) {
+ Py_DECREF(self->pydata);
+ self->pydata = NULL;
+ }
+ PyObject_Del(self);
+}
+
+/* iteration support */
+
+typedef struct {
+ PyObject_HEAD lazymanifest *m;
+ Py_ssize_t pos;
+} lmIter;
+
+static void lmIter_dealloc(PyObject * o)
+{
+ lmIter *self = (lmIter *)o;
+ Py_DECREF(self->m);
+ PyObject_Del(self);
+}
+
+/* defined in parsers.c */
+PyObject *unhexlify(const char *str, int len);
+
+static size_t pathlen(line *l) {
+ return strnlen(l->start, l->len);
+}
+
+static PyObject *nodeof(line *l) {
+ if (l->hash_none) {
+ Py_INCREF(Py_None);
+ return Py_None;
+ }
+ char *s = l->start;
+ ssize_t llen = pathlen(l);
+ PyObject *hash = unhexlify(s + llen + 1, 40);
+ if (hash == NULL) {
+ return NULL;
+ }
+ if (l->hash_suffix != '\0') {
+ char newhash[21];
+ memcpy(newhash, PyString_AsString(hash), 20);
+ Py_DECREF(hash);
+ newhash[20] = l->hash_suffix;
+ hash = PyString_FromStringAndSize(newhash, 21);
+ }
+ return hash;
+}
+
+static PyObject *hashflags(line * l)
+{
+ char *s = l->start;
+ size_t plen = pathlen(l);
+ PyObject *hash = nodeof(l);
+ if (hash == NULL)
+ return NULL;
+ /* 40 for hash, 1 for null byte, 1 for newline */
+ size_t hplen = plen + 42;
+ Py_ssize_t flen = l->len - hplen;
+ PyObject *flags;
+ if (flen > 0) {
+ flags = PyString_FromStringAndSize(s + hplen - 1, flen);
+ } else {
+ flags = PyString_FromString("");
+ }
+ if (flags == NULL) {
+ Py_DECREF(hash);
+ return NULL;
+ }
+ PyObject *tup = PyTuple_Pack(2, hash, flags);
+ Py_DECREF(flags);
+ Py_DECREF(hash);
+ return tup;
+}
+
+PyObject *lmIter_iternext(PyObject * o)
+{
+ PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
+ lmIter *self = (lmIter *)o;
+ Py_ssize_t index = self->pos++;
+ /* skip over deleted manifest entries */
+ while (index < self->m->numlines &&
+ self->m->lines[index].deleted == true) {
+ index = ++(self->pos);
+ }
+ if (self->m->numlines <= index) {
+ goto bail;
+ }
+ line *l = &(self->m->lines[index]);
+ size_t pl = pathlen(l);
+ path = PyString_FromStringAndSize(l->start, pl);
+ hash = nodeof(l);
+ Py_ssize_t consumed = pl + 41;
+ if (l->len > (consumed + 1)) {
+ flags = PyString_FromStringAndSize(l->start + consumed,
+ l->len - consumed - 1);
+ } else {
+ flags = PyString_FromString("");
+
+ }
+ if (flags == NULL) {
+ goto bail;
+ }
+ ret = PyTuple_Pack(3, path, hash, flags);
+bail:
+ Py_XDECREF(path);
+ Py_XDECREF(hash);
+ Py_XDECREF(flags);
+ return ret;
+}
+
+static PyTypeObject lazymanifestIterator = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /*ob_size */
+ "parsers.lazymanifest.iterator", /*tp_name */
+ sizeof(lmIter), /*tp_basicsize */
+ 0, /*tp_itemsize */
+ lmIter_dealloc, /*tp_dealloc */
+ 0, /*tp_print */
+ 0, /*tp_getattr */
+ 0, /*tp_setattr */
+ 0, /*tp_compare */
+ 0, /*tp_repr */
+ 0, /*tp_as_number */
+ 0, /*tp_as_sequence */
+ 0, /*tp_as_mapping */
+ 0, /*tp_hash */
+ 0, /*tp_call */
+ 0, /*tp_str */
+ 0, /*tp_getattro */
+ 0, /*tp_setattro */
+ 0, /*tp_as_buffer */
+ /* tp_flags: Py_TPFLAGS_HAVE_ITER tells python to
+ use tp_iter and tp_iternext fields. */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER,
+ "Iterator for a lazymanifest.", /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ PyObject_SelfIter, /* tp_iter: __iter__() method */
+ lmIter_iternext, /* tp_iternext: next() method */
+};
+
+static lazymanifest *lzm_copy(lazymanifest *self, PyObject *unused_args);
+
+static PyObject *lzm_GetIter(lazymanifest *self)
+{
+ lazymanifest *t = lzm_copy(self, NULL);
+ if (t == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ lmIter *i = PyObject_New(lmIter, &lazymanifestIterator);
+ if (i != NULL) {
+ i->m = t;
+ i->pos = 0;
+ } else {
+ Py_DECREF(t);
+ PyErr_NoMemory();
+ }
+ return (PyObject *)i;
+}
+
+/* __getitem__ and __setitem__ support */
+
+static Py_ssize_t lzm_size(lazymanifest * self)
+{
+ return self->livelines;
+}
+
+static int linecmp(const void *left, const void *right)
+{
+ return strcmp(((const line *)left)->start,
+ ((const line *)right)->start);
+}
+
+static PyObject *lzm_getitem(lazymanifest * self, PyObject * key)
+{
+ if (!PyString_Check(key)) {
+ PyErr_Format(PyExc_TypeError,
+ "getitem: manifest keys must be a string.");
+ return NULL;
+ }
+ line needle = { PyString_AsString(key), 0 };
+ line *hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
+ &linecmp);
+ if (hit == NULL || hit->deleted == true) {
+ PyErr_Format(PyExc_KeyError, "No such manifest entry.");
+ return NULL;
+ }
+ return hashflags(hit);
+}
+
+static int lzm_delitem(lazymanifest * self, PyObject * key)
+{
+ if (!PyString_Check(key)) {
+ PyErr_Format(PyExc_TypeError,
+ "delitem: manifest keys must be a string.");
+ return -1;
+ }
+ line needle = { PyString_AsString(key), 0 };
+ line *hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
+ &linecmp);
+ if (hit == NULL || hit->deleted == true) {
+ PyErr_Format(PyExc_KeyError,
+ "Tried to delete nonexistent manifest entry.");
+ return -1;
+ }
+ self->dirty = true;
+ hit->deleted = true;
+ self->livelines--;
+ return 0;
+}
+
+static int lzm_setitem(lazymanifest * self, PyObject * key, PyObject * value)
+{
+ if (!PyString_Check(key)) {
+ PyErr_Format(PyExc_TypeError,
+ "setitem: manifest keys must be a string.");
+ return -1;
+ }
+ if (value == NULL) {
+ return lzm_delitem(self, key);
+ }
+ if (!PyObject_IsInstance(value, (PyObject *) & PyTuple_Type)
+ || PyTuple_Size(value) != 2) {
+ PyErr_Format(PyExc_TypeError,
+ "Manifest values must be a tuple of (node, flags).");
+ return -1;
+ }
+ char *path;
+ Py_ssize_t plen;
+ if (PyString_AsStringAndSize(key, &path, &plen) == -1) {
+ return -1;
+ }
+
+ PyObject *pyhash = PyTuple_GetItem(value, 0);
+ char *hash = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+ char hashextra = '\0';
+ bool hashnone = false;
+ if (pyhash == Py_None) {
+ hashnone = true;
+ } else {
+ if (!PyString_Check(pyhash)) {
+ PyErr_Format(PyExc_TypeError,
+ "node must be a 20-byte string");
+ return -1;
+ }
+ Py_ssize_t hlen = PyString_Size(pyhash);
+ /* Some parts of the codebase try and set 21 or 22
+ * byte "hash" values in order to perturb things for
+ * status. We have to preserve at least the 21st
+ * byte. Sigh.
+ */
+ if (hlen != 20 && hlen != 21 && hlen != 22) {
+ PyErr_Format(PyExc_TypeError,
+ "node must be a 20-byte string");
+ return -1;
+ }
+ hash = PyString_AsString(pyhash);
+ if (hlen > 20) {
+ hashextra = hash[20];
+ }
+ }
+
+ PyObject *pyflags = PyTuple_GetItem(value, 1);
+ if (!PyString_Check(pyflags) || PyString_Size(pyflags) > 1) {
+ PyErr_Format(PyExc_TypeError,
+ "flags must a 0 or 1 byte string");
+ return -1;
+ }
+ char *flags;
+ Py_ssize_t flen;
+ if (PyString_AsStringAndSize(pyflags, &flags, &flen) == -1) {
+ return -1;
+ }
+ /* 3 -> two null bytes and one newline */
+ size_t dlen = plen + 40 + flen + 3;
+ char *dest = malloc(dlen);
+ if (dest == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ memcpy(dest, path, plen + 1);
+ int i;
+ for (i = 0; i < 20; i++) {
+ sprintf(dest + plen + 1 + (i * 2), "%02hhx", hash[i]);
+ }
+ sprintf(dest + plen + 41, "%s\n", flags);
+ line new = {
+ dest, /* start */
+ dlen - 1, /* total line length */
+ hashextra, /* extra byte, if any, on the hash */
+ hashnone, /* true if the hash is set to None */
+ true, /* is `start` a pointer we allocated? */
+ false, /* is this entry deleted? */
+ };
+ line *hit = bsearch(&new, self->lines, self->numlines,
+ sizeof(line), &linecmp);
+ self->dirty = true;
+ if (hit != NULL) {
+ /* updating a line we already had */
+ if (hit->from_malloc) {
+ free(hit->start);
+ }
+ if (hit->deleted) {
+ self->livelines++;
+ }
+ memcpy(hit, &new, sizeof(line));
+ } else {
+ /* new line */
+ if (!realloc_if_needed(self)) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ memcpy(self->lines + self->numlines++, &new, sizeof(line));
+ self->livelines++;
+ /* TODO: do a binary search and insert rather than
+ * append and qsort. Also offer a batch-insert
+ * interface, which should be a nice little
+ * performance win.
+ */
+ qsort(self->lines, self->numlines, sizeof(line), &linecmp);
+ }
+ return 0;
+}
+
+static PyMappingMethods lzm_mapping_methods = {
+ (lenfunc)lzm_size, /* mp_length */
+ (binaryfunc)lzm_getitem, /* mp_subscript */
+ (objobjargproc)lzm_setitem, /* mp_ass_subscript */
+};
+
+/* sequence methods (important or __contains__ builds an iterator */
+
+static int lzm_contains(lazymanifest * self, PyObject * key)
+{
+ if (!PyString_Check(key)) {
+ PyErr_Format(PyExc_TypeError,
+ "contains: manifest keys must be a string.");
+ return 0;
+ }
+ line needle = { PyString_AsString(key), 0 };
+ line *hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
+ &linecmp);
+ if (hit == NULL || hit->deleted == true) {
+ return 0;
+ }
+ return 1;
+}
+
+static PySequenceMethods lzm_seq_meths = {
+ (lenfunc)lzm_size, /* sq_length */
+ 0, /* sq_concat */
+ 0, /* sq_repeat */
+ 0, /* sq_item */
+ 0, /* sq_slice */
+ 0, /* sq_ass_item */
+ 0, /* sq_ass_slice */
+ (objobjproc)lzm_contains, /* sq_contains */
+ 0, /* sq_inplace_concat */
+ 0, /* sq_inplace_repeat */
+};
+
+
+/* Other methods (copy, diff, etc) */
+static PyTypeObject lazymanifestType;
+
+static int compact(lazymanifest *self) {
+ if (!self->dirty)
+ return 0;
+ int i;
+ ssize_t need = 0;
+ for (i = 0; i < self->numlines; i++) {
+ if (self->lines[i].deleted == false) {
+ need += self->lines[i].len;
+ }
+ }
+ char *dest = malloc(need);
+ if (dest == NULL) {
+ return -1;
+ }
+ ssize_t copied = 0;
+ int real = 0;
+ for (i = 0; i < self->numlines; i++) {
+ line *src = self->lines + i;
+ char *tofree = NULL;
+ if (src->from_malloc) {
+ tofree = src->start;
+ }
+ if (src->deleted == false) {
+ ssize_t c = src->len;
+ memcpy(dest + copied, src->start, c);
+ line new = {
+ dest + copied, /* start */
+ c, /* total line length */
+ src->hash_suffix, /* extra byte, if any, on the hash */
+ src->hash_none, /* true if the hash is set to None */
+ false, /* is `start` a pointer we allocated? */
+ false, /* is this entry deleted? */
+ };
+ line *l = (self->lines) + real;
+ memcpy(l, &new, sizeof(line));
+ real++;
+ copied += c;
+ }
+ if (tofree)
+ free(tofree);
+ }
+ PyObject *pydata = PyString_FromStringAndSize(dest, copied);
+ if (pydata == NULL)
+ return -1;
+ char *realdest;
+ int err = PyString_AsStringAndSize(pydata, &realdest, &copied);
+ if (err == -1)
+ return -1;
+ /* Right now, lines contain pointers into a block we just had
+ Python copy. Fix up the pointers so that they point into
+ the Python string we just built.
+ */
+ for (i = 0; i < real; i++) {
+ int offset = (self->lines[i].start) - dest;
+ self->lines[i].start = realdest + offset;
+ }
+ Py_DECREF(self->pydata);
+ free(dest);
+ self->pydata = pydata;
+ Py_INCREF(self->pydata);
+ self->numlines = real;
+ self->livelines = real;
+ self->dirty = false;
+ return 0;
+}
+
+static PyObject *lzm_text(lazymanifest * self, PyObject * unused_args)
+{
+ if (compact(self) != 0) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ Py_INCREF(self->pydata);
+ return self->pydata;
+}
+
+static lazymanifest *lzm_copy(lazymanifest * self, PyObject * unused_args)
+{
+ lazymanifest *copy = NULL;
+ if (compact(self) != 0) {
+ goto nomem;
+ }
+ copy = PyObject_New(lazymanifest, &lazymanifestType);
+ if (copy == NULL) {
+ goto nomem;
+ }
+ copy->numlines = self->numlines;
+ copy->livelines = self->livelines;
+ copy->dirty = false;
+ copy->lines = malloc(self->maxlines * sizeof(line));
+ if (copy->lines == NULL) {
+ goto nomem;
+ }
+ memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
+ copy->maxlines = self->maxlines;
+ copy->pydata = self->pydata;
+ Py_INCREF(copy->pydata);
+ return copy;
+nomem:
+ PyErr_NoMemory();
+ Py_XDECREF(copy);
+ return NULL;
+}
+
+static lazymanifest *lzm_filtercopy(lazymanifest * self, PyObject *matchfn)
+{
+ if (!PyCallable_Check(matchfn)) {
+ PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
+ return NULL;
+ }
+ /* compact ourselves first to avoid double-frees later when we
+ * compact tmp so that it doesn't have random pointers to our
+ * underlying data */
+ compact(self);
+ lazymanifest *tmp = PyObject_New(lazymanifest, &lazymanifestType);
+ tmp->lines = (line *)malloc(self->maxlines * sizeof(line));
+ tmp->maxlines = self->maxlines;
+ tmp->livelines = 0;
+ tmp->numlines = 0;
+ tmp->pydata = self->pydata;
+ Py_INCREF(self->pydata);
+ Py_INCREF(matchfn);
+ int i;
+ for (i = 0; i < self->numlines; i++) {
+ PyObject *arg = PyString_FromString(self->lines[i].start);
+ PyObject *arglist = PyTuple_Pack(1, arg);
+ PyObject *result = PyObject_CallObject(matchfn, arglist);
+ /* if the callback raised an exception, just let it
+ * through and give up */
+ if (result == NULL) {
+ free(tmp->lines);
+ tmp->lines = NULL;
+ Py_DECREF(matchfn);
+ Py_DECREF(self->pydata);
+ return NULL;
+ }
+ if (PyObject_IsTrue(result)) {
+ assert(!(self->lines[i].from_malloc));
+ tmp->lines[tmp->numlines++] = self->lines[i];
+ tmp->livelines++;
+ }
+ Py_DECREF(arglist);
+ Py_DECREF(arg);
+ Py_DECREF(result);
+ }
+ Py_DECREF(matchfn);
+ compact(tmp);
+ return tmp;
+}
+
+static PyObject *lzm_diff(lazymanifest * self, lazymanifest * other)
+{
+ if (!PyObject_IsInstance((PyObject *)other,
+ (PyObject *)&lazymanifestType)) {
+ PyErr_Format(PyExc_TypeError, "other must be a lazymanifest");
+ return NULL;
+ }
+ PyObject *emptyTup = NULL, *ret = NULL;
+ PyObject *es = PyString_FromString("");
+ if (es == NULL) {
+ goto nomem;
+ }
+ emptyTup = PyTuple_Pack(2, Py_None, es);
+ Py_DECREF(es);
+ if (emptyTup == NULL) {
+ goto nomem;
+ }
+ ret = PyDict_New();
+ if (ret == NULL) {
+ goto nomem;
+ }
+ int sneedle = 0, oneedle = 0;
+ while (sneedle != self->numlines || oneedle != other->numlines) {
+ line *left = self->lines + sneedle;
+ line *right = other->lines + oneedle;
+ int result;
+ /* If we're looking at a deleted entry and it's not
+ * the end of the manifest, just skip it. */
+ if (left->deleted && sneedle < self->numlines) {
+ sneedle++;
+ continue;
+ }
+ if (right->deleted && oneedle < other->numlines) {
+ oneedle++;
+ continue;
+ }
+ /* if we're at the end of either manifest, then we
+ * know the remaining items are adds so we can skip
+ * the strcmp. */
+ if (sneedle == self->numlines) {
+ result = 1;
+ } else if (oneedle == other->numlines) {
+ result = -1;
+ } else {
+ result = linecmp(left, right);
+ }
+ PyObject *key = result <= 0 ?
+ PyString_FromString(left->start) : PyString_FromString(right->start);
+ if (key == NULL)
+ goto nomem;
+ if (result < 0) {
+ PyObject *tup = hashflags(left);
+ if (tup == NULL) {
+ goto nomem;
+ }
+ PyObject *outer = PyTuple_Pack(2, tup, emptyTup);
+ Py_DECREF(tup);
+ if (outer == NULL) {
+ goto nomem;
+ }
+ PyDict_SetItem(ret, key, outer);
+ Py_DECREF(outer);
+ sneedle++;
+ } else if (result > 0) {
+ PyObject *tup = hashflags(right);
+ if (tup == NULL) {
+ goto nomem;
+ }
+ PyObject *outer = PyTuple_Pack(2, emptyTup, tup);
+ Py_DECREF(tup);
+ if (outer == NULL) {
+ goto nomem;
+ }
+ PyDict_SetItem(ret, key, outer);
+ Py_DECREF(outer);
+ oneedle++;
+ } else {
+ /* file exists in both manifests */
+ if (left->len != right->len
+ || memcmp(left->start, right->start, left->len)
+ || left->hash_suffix != right->hash_suffix
+ || left->hash_none != right->hash_none) {
+ PyObject *l = hashflags(left);
+ if (l == NULL) {
+ goto nomem;
+ }
+ PyObject *r = hashflags(right);
+ if (r == NULL) {
+ Py_DECREF(l);
+ goto nomem;
+ }
+ PyObject *outer = PyTuple_Pack(2, l, r);
+ Py_DECREF(l);
+ Py_DECREF(r);
+ if (outer == NULL) {
+ goto nomem;
+ }
+ PyDict_SetItem(ret, key, outer);
+ Py_DECREF(outer);
+ }
+ sneedle++;
+ oneedle++;
+ }
+ Py_DECREF(key);
+ }
+ Py_DECREF(emptyTup);
+ return ret;
+nomem:
+ PyErr_NoMemory();
+ Py_XDECREF(ret);
+ Py_XDECREF(emptyTup);
+ return NULL;
+}
+
+static PyMethodDef lzm_methods[] = {
+ {"copy", (PyCFunction)lzm_copy, METH_NOARGS,
+ "Make a copy of this lazymanifest."},
+ {"filtercopy", (PyCFunction)lzm_filtercopy, METH_O,
+ "Make a copy of this manifest filtered by matchfn."},
+ {"diff", (PyCFunction)lzm_diff, METH_O,
+ "Compare this lazymanifest to another one."},
+ {"text", (PyCFunction)lzm_text, METH_NOARGS,
+ "Encode this manifest to text."},
+ {NULL},
+};
+
+static PyTypeObject lazymanifestType = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /* ob_size */
+ "parsers.lazymanifest", /* tp_name */
+ sizeof(lazymanifest), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)lzm_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ &lzm_seq_meths, /* tp_as_sequence */
+ &lzm_mapping_methods, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN, /* tp_flags */
+ "TODO(augie)", /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ (getiterfunc)lzm_GetIter, /* tp_iter */
+ 0, /* tp_iternext */
+ lzm_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)lzm_init, /* tp_init */
+ 0, /* tp_alloc */
+};
+
+void manifest_module_init(PyObject * mod)
+{
+ lazymanifestType.tp_new = PyType_GenericNew;
+ if (PyType_Ready(&lazymanifestType) < 0)
+ return;
+ Py_INCREF(&lazymanifestType);
+
+ PyModule_AddObject(mod, "lazymanifest",
+ (PyObject *)&lazymanifestType);
+}
diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -71,7 +71,7 @@ static inline int hexdigit(const char *p
/*
* Turn a hex-encoded string into binary.
*/
-static PyObject *unhexlify(const char *str, int len)
+PyObject *unhexlify(const char *str, int len)
{
PyObject *ret;
char *d;
@@ -2233,6 +2233,7 @@ static PyMethodDef methods[] = {
};
void dirs_module_init(PyObject *mod);
+void manifest_module_init(PyObject *mod);
static void module_init(PyObject *mod)
{
@@ -2247,6 +2248,7 @@ static void module_init(PyObject *mod)
PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
dirs_module_init(mod);
+ manifest_module_init(mod);
indexType.tp_new = PyType_GenericNew;
if (PyType_Ready(&indexType) < 0 ||
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -491,6 +491,7 @@ extmodules = [
Extension('mercurial.mpatch', ['mercurial/mpatch.c'],
depends=common_depends),
Extension('mercurial.parsers', ['mercurial/dirs.c',
+ 'mercurial/manifest.c',
'mercurial/parsers.c',
'mercurial/pathencode.c'],
depends=common_depends),
diff --git a/tests/test-manifest.py b/tests/test-manifest.py
new file mode 100644
--- /dev/null
+++ b/tests/test-manifest.py
@@ -0,0 +1,215 @@
+import binascii
+import unittest
+import itertools
+
+import silenttestrunner
+
+from mercurial import parsers
+
+HASH_1 = '1' * 40
+HASH_2 = 'f' * 40
+HASH_3 = '1234567890abcdef0987654321deadbeef0fcafe'
+A_SHORT_MANIFEST = (
+ 'bar/baz/qux.py\0%(hash2)s%(flag2)s\n'
+ 'foo\0%(hash1)s%(flag1)s\n'
+ ) % {'hash1': HASH_1,
+ 'flag1': '',
+ 'hash2': HASH_2,
+ 'flag2': 'l',
+ }
+
+HUGE_MANIFEST_ENTRIES = 200001
+
+A_HUGE_MANIFEST = ''.join(sorted(
+ 'file%d\0%s%s\n' % (i, h, f) for i, h, f in
+ itertools.izip(xrange(200001),
+ itertools.cycle((HASH_1, HASH_2)),
+ itertools.cycle(('', 'x', 'l')))))
+
+class testmanifest(unittest.TestCase):
+ def testEmptyManifest(self):
+ m = parsers.lazymanifest('')
+ self.assertEqual(0, len(m))
+ self.assertEqual([], list(m))
+
+ def testManifest(self):
+ m = parsers.lazymanifest(A_SHORT_MANIFEST)
+ want = [
+ ('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'),
+ ('foo', binascii.unhexlify(HASH_1), ''),
+ ]
+ self.assertEqual(len(want), len(m))
+ self.assertEqual(want, list(m))
+ self.assertEqual((binascii.unhexlify(HASH_1), ''), m['foo'])
+ self.assertRaises(KeyError, lambda : m['wat'])
+ self.assertEqual((binascii.unhexlify(HASH_2), 'l'),
+ m['bar/baz/qux.py'])
+
+ def testSetItem(self):
+ want = binascii.unhexlify(HASH_1), ''
+
+ m = parsers.lazymanifest('')
+ m['a'] = want
+ self.assertIn('a', m)
+ self.assertEqual(want, m['a'])
+ self.assertEqual('a\0' + HASH_1 + '\n', m.text())
+
+ m = parsers.lazymanifest(A_SHORT_MANIFEST)
+ m['a'] = want
+ self.assertEqual(want, m['a'])
+ self.assertEqual('a\0' + HASH_1 + '\n' + A_SHORT_MANIFEST,
+ m.text())
+ m2 = m.copy()
+ del m
+ del m2 # make sure we don't double free() anything
+
+ def testCompaction(self):
+ unhex = binascii.unhexlify
+ h1, h2 = unhex(HASH_1), unhex(HASH_2)
+ m = parsers.lazymanifest(A_SHORT_MANIFEST)
+ m['alpha'] = h1, ''
+ m['beta'] = h2, ''
+ del m['foo']
+ want = 'alpha\0%s\nbar/baz/qux.py\0%sl\nbeta\0%s\n' % (
+ HASH_1, HASH_2, HASH_2)
+ self.assertEqual(want, m.text())
+ self.assertEqual(3, len(m))
+ self.assertEqual((h1, ''), m['alpha'])
+ self.assertEqual((h2, ''), m['beta'])
+ self.assertRaises(KeyError, lambda : m['foo'])
+ w = [('alpha', h1, ''), ('bar/baz/qux.py', h2, 'l'), ('beta', h2, '')]
+ self.assertEqual(w, list(m))
+
+ def testSetGetNodeSuffix(self):
+ clean = parsers.lazymanifest(A_SHORT_MANIFEST)
+ m = parsers.lazymanifest(A_SHORT_MANIFEST)
+ h, f = m['foo']
+ want = h + 'a', f
+ # Merge code wants to set 21-byte fake hashes at times
+ m['foo'] = want
+ self.assertEqual(want, m['foo'])
+ self.assertEqual([('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'),
+ ('foo', binascii.unhexlify(HASH_1) + 'a', '')],
+ list(m))
+ # Sometimes it even tries a 22-byte fake hash, but we can
+ # return 21 and it'll work out
+ m['foo'] = want[0] + '+', f
+ self.assertEqual(want, m['foo'])
+ # make sure the suffix survives a copy
+ m2 = m.filtercopy(lambda x: x == 'foo')
+ self.assertEqual(want, m2['foo'])
+ m2 = m.copy()
+ self.assertEqual(want, m2['foo'])
+ # suffix with iteration
+ self.assertEqual([('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'),
+ ('foo', want[0], '')], list(m))
+ # shows up in diff
+ self.assertEqual({'foo': (want, (h, ''))}, m.diff(clean))
+ self.assertEqual({'foo': ((h, ''), want)}, clean.diff(m))
+
+ def testFilterCopyException(self):
+ m = parsers.lazymanifest(A_SHORT_MANIFEST)
+ def filt(path):
+ if path == 'foo':
+ assert False
+ return True
+ self.assertRaises(AssertionError, m.filtercopy, filt)
+
+ def testSetGetNodeNone(self):
+ clean = parsers.lazymanifest(A_SHORT_MANIFEST)
+ m = parsers.lazymanifest(A_SHORT_MANIFEST)
+ h, f = m['foo']
+ want = None, f
+ m['foo'] = want
+ self.assertEqual(want, m['foo'])
+ # none with iteration
+ self.assertEqual([('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'),
+ ('foo', None, '')], list(m))
+ # none with copies
+ m2 = m.filtercopy(lambda x: x == 'foo')
+ self.assertEqual(want, m2['foo'])
+ m2 = m.copy()
+ self.assertEqual(want, m2['foo'])
+ # shows up in diff
+ self.assertEqual({'foo': (want, (h, ''))}, m.diff(clean))
+ self.assertEqual({'foo': ((h, ''), want)}, clean.diff(m))
+
+ def testRemoveItem(self):
+ m = parsers.lazymanifest(A_SHORT_MANIFEST)
+ del m['foo']
+ self.assertRaises(KeyError, lambda : m['foo'])
+ self.assertEqual(1, len(m))
+ self.assertEqual(1, len(list(m)))
+
+ def testManifestDiff(self):
+ MISSING = (None, '')
+ addl = 'z-only-in-left\0' + HASH_1 + '\n'
+ addr = 'z-only-in-right\0' + HASH_2 + 'x\n'
+ left = parsers.lazymanifest(
+ A_SHORT_MANIFEST.replace(HASH_1, HASH_3 + 'x') + addl)
+ right = parsers.lazymanifest(A_SHORT_MANIFEST + addr)
+ want = {
+ 'foo': ((binascii.unhexlify(HASH_3), 'x'),
+ (binascii.unhexlify(HASH_1), '')),
+ 'z-only-in-left': ((binascii.unhexlify(HASH_1), ''), MISSING),
+ 'z-only-in-right': (MISSING, (binascii.unhexlify(HASH_2), 'x')),
+ }
+ self.assertEqual(want, left.diff(right))
+
+ want = {
+ 'bar/baz/qux.py': (MISSING, (binascii.unhexlify(HASH_2), 'l')),
+ 'foo': (MISSING, (binascii.unhexlify(HASH_3), 'x')),
+ 'z-only-in-left': (MISSING, (binascii.unhexlify(HASH_1), '')),
+ }
+ self.assertEqual(want, parsers.lazymanifest('').diff(left))
+
+ want = {
+ 'bar/baz/qux.py': ((binascii.unhexlify(HASH_2), 'l'), MISSING),
+ 'foo': ((binascii.unhexlify(HASH_3), 'x'), MISSING),
+ 'z-only-in-left': ((binascii.unhexlify(HASH_1), ''), MISSING),
+ }
+ self.assertEqual(want, left.diff(parsers.lazymanifest('')))
+ copy = right.copy()
+ del copy['z-only-in-right']
+ del right['foo']
+ want = {
+ 'foo': (MISSING, (binascii.unhexlify(HASH_1), '')),
+ 'z-only-in-right': ((binascii.unhexlify(HASH_2), 'x'), MISSING),
+ }
+ self.assertEqual(want, right.diff(copy))
+
+ short = parsers.lazymanifest(A_SHORT_MANIFEST)
+ pruned = short.copy()
+ del pruned['foo']
+ want = {
+ 'foo': ((binascii.unhexlify(HASH_1), ''), MISSING),
+ }
+ self.assertEqual(want, short.diff(pruned))
+ want = {
+ 'foo': (MISSING, (binascii.unhexlify(HASH_1), '')),
+ }
+ self.assertEqual(want, pruned.diff(short))
+
+ def testReversedLines(self):
+ backwards = ''.join(
+ l + '\n' for l in reversed(A_SHORT_MANIFEST.split('\n')) if l)
+ try:
+ parsers.lazymanifest(backwards)
+ self.fail('Should have raised ValueError')
+ except ValueError, v:
+ self.assertEqual('Manifest lines not in sorted order.', v.message)
+
+ def testNoTerminalNewline(self):
+ try:
+ parsers.lazymanifest(A_SHORT_MANIFEST + 'wat')
+ self.fail('Should have raised ValueError')
+ except ValueError, v:
+ self.assertEqual('Manifest did not end in a newline.', v.message)
+
+ def testHugeManifest(self):
+ m = parsers.lazymanifest(A_HUGE_MANIFEST)
+ self.assertEqual(HUGE_MANIFEST_ENTRIES, len(m))
+ self.assertEqual(len(m), len(list(m)))
+
+if __name__ == '__main__':
+ silenttestrunner.main(__name__)
More information about the Mercurial-devel
mailing list