[PATCH 1 of 4 lazy-manifest v2] manifest.c: new extension code to lazily parse manifests
Martin von Zweigbergk
martinvonz at google.com
Tue Jan 13 18:42:38 CST 2015
As I reported on IRC, the 'setitem' method seems slow. On the Mozilla repo,
running
hg co -C 1813b && hg mv -q intl i18n && time hg ci -qm 'move intl'
takes 6.3s with current hg and 1m21s with these patches applied. It may
very be related to your TODO in setitem.
On Mon Jan 12 2015 at 12:05:17 PM Augie Fackler <raf at durin42.com> wrote:
> # HG changeset patch
> # User Augie Fackler <augie at google.com>
> # Date 1420871231 28800
> # Fri Jan 09 22:27:11 2015 -0800
> # Node ID b9d67bf3ea8196c8530afdcf6b1727682b681259
> # Parent 678f53865c6860a950392691814766957ee89316
> manifest.c: new extension code to lazily parse manifests
>
> This lets us iterate manifests in order, but do a _lot_ less work in
> the common case when we only care about a few manifest entries.
>
> Many thanks to Mike Edgar for reviewing this in advance of it going
> out to the list, which caught many things I missed.
>
> This version of the patch includes C89 fixes from Sean Farley and
> many correctness/efficiency cleanups from Martin von
> Zweigbergk. Thanks to both!
>
> diff --git a/mercurial/manifest.c b/mercurial/manifest.c
> new file mode 100644
> --- /dev/null
> +++ b/mercurial/manifest.c
> @@ -0,0 +1,865 @@
> +/*
> + * manifest.c - manifest type that does on-demand parsing.
> + *
> + * Copyright 2015, Google Inc.
> + *
> + * This software may be used and distributed according to the terms of
> + * the GNU General Public License, incorporated herein by reference.
> + */
> +#include <assert.h>
> +#include <string.h>
> +#include <stdlib.h>
> +
> +#include <Python.h>
> +
> +/* VC9 doesn't include bool and lacks stdbool.h based on my searching */
> +#ifdef _MSC_VER
> +#define true 1
> +#define false 0
> +typedef unsigned char bool;
> +#else
> +#include <stdbool.h>
> +#endif
> +
> +#define DEFAULT_LINES 100000
> +
> +typedef struct {
> + char *start;
> + Py_ssize_t len; /* length of line not including terminal newline */
> + char hash_suffix;
> + bool from_malloc;
> + bool deleted;
> +} line;
> +
> +typedef struct {
> + PyObject_HEAD
> + PyObject *pydata;
> + line *lines;
> + int numlines; /* number of line entries */
> + int livelines; /* number of non-deleted lines */
> + int maxlines; /* allocated number of lines */
> + bool dirty;
> +} lazymanifest;
> +
> +#define MANIFEST_OOM -1
> +#define MANIFEST_NOT_SORTED -2
> +#define MANIFEST_MALFORMED -3
> +
> +/* defined in parsers.c */
> +PyObject *unhexlify(const char *str, int len);
> +
> +/* get the length of the path for a line */
> +static size_t pathlen(line *l) {
> + return strnlen(l->start, l->len);
> +}
> +
> +/* get the node value of a single line */
> +static PyObject *nodeof(line *l) {
> + char *s = l->start;
> + ssize_t llen = pathlen(l);
> + PyObject *hash = unhexlify(s + llen + 1, 40);
> + if (!hash) {
> + return NULL;
> + }
> + if (l->hash_suffix != '\0') {
> + char newhash[21];
> + memcpy(newhash, PyString_AsString(hash), 20);
> + Py_DECREF(hash);
> + newhash[20] = l->hash_suffix;
> + hash = PyString_FromStringAndSize(newhash, 21);
> + }
> + return hash;
> +}
> +
> +/* get the node hash and flags of a line as a tuple */
> +static PyObject *hashflags(line *l)
> +{
> + char *s = l->start;
> + size_t plen = pathlen(l);
> + PyObject *hash = nodeof(l);
> +
> + /* 40 for hash, 1 for null byte, 1 for newline */
> + size_t hplen = plen + 42;
> + Py_ssize_t flen = l->len - hplen;
> + PyObject *flags;
> + PyObject *tup;
> +
> + if (!hash)
> + return NULL;
> + if (flen > 0) {
> + flags = PyString_FromStringAndSize(s + hplen - 1, flen);
> + } else {
> + flags = PyString_FromString("");
> + }
> + if (!flags) {
> + Py_DECREF(hash);
> + return NULL;
> + }
> + tup = PyTuple_Pack(2, hash, flags);
> + Py_DECREF(flags);
> + Py_DECREF(hash);
> + return tup;
> +}
> +
> +/* if we're about to run out of space in the line index, add more */
> +static bool realloc_if_full(lazymanifest *self)
> +{
> + if (self->numlines == self->maxlines) {
> + self->maxlines *= 2;
> + self->lines = realloc(self->lines, self->maxlines *
> sizeof(line));
> + }
> + return self->lines;
> +}
> +
> +/*
> + * Find the line boundaries in the manifest that 'data' points to and
> store
> + * information about each line in 'self'.
> + */
> +static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
> +{
> + char *prev = NULL;
> + line *l = NULL;
> + while (len > 0) {
> + char *next = memchr(data, '\n', len);
> + if (!next) {
> + return MANIFEST_MALFORMED;
> + }
> + next++; /* advance past newline */
> + if (!realloc_if_full(self)) {
> + return MANIFEST_OOM; /* no memory */
> + }
> + if (prev && strcmp(prev, data) > -1) {
> + /* This data isn't sorted, so we have to abort. */
> + return MANIFEST_NOT_SORTED;
> + }
> + l = self->lines + ((self->numlines)++);
> + l->start = data;
> + l->len = next - data;
> + l->hash_suffix = '\0';
> + l->from_malloc = false;
> + l->deleted = false;
> + len = len - l->len;
> + prev = data;
> + data = next;
> + }
> + self->livelines = self->numlines;
> + return 0;
> +}
> +
> +static int lazymanifest_init(lazymanifest *self, PyObject *args)
> +{
> + char *data;
> + Py_ssize_t len;
> + int err, ret;
> + PyObject *pydata;
> + if (!PyArg_ParseTuple(args, "S", &pydata)) {
> + return -1;
> + }
> + err = PyString_AsStringAndSize(pydata, &data, &len);
> +
> + self->dirty = false;
> + if (err == -1)
> + return -1;
> + self->pydata = pydata;
> + Py_INCREF(self->pydata);
> + Py_BEGIN_ALLOW_THREADS
> + self->lines = malloc(DEFAULT_LINES * sizeof(line));
> + self->maxlines = DEFAULT_LINES;
> + self->numlines = 0;
> + if (!self->lines)
> + ret = -2;
> + else
> + ret = find_lines(self, data, len);
> + Py_END_ALLOW_THREADS
> + switch (ret) {
> + case 0:
> + break;
> + case MANIFEST_OOM:
> + PyErr_NoMemory();
> + break;
> + case MANIFEST_NOT_SORTED:
> + PyErr_Format(PyExc_ValueError,
> + "Manifest lines not in sorted order.");
> + break;
> + case MANIFEST_MALFORMED:
> + PyErr_Format(PyExc_ValueError,
> + "Manifest did not end in a newline.");
> + break;
> + default:
> + PyErr_Format(PyExc_ValueError,
> + "Unknown problem parsing manifest.");
> + }
> + return ret == 0 ? 0 : -1;
> +}
> +
> +static void lazymanifest_dealloc(lazymanifest *self)
> +{
> + /* free any extra lines we had to allocate */
> + int i;
> + for (i = 0; i < self->numlines; i++) {
> + if (self->lines[i].from_malloc) {
> + free(self->lines[i].start);
> + }
> + }
> + if (self->lines) {
> + free(self->lines);
> + self->lines = NULL;
> + }
> + if (self->pydata) {
> + Py_DECREF(self->pydata);
> + self->pydata = NULL;
> + }
> + PyObject_Del(self);
> +}
> +
> +/* iteration support */
> +
> +typedef struct {
> + PyObject_HEAD lazymanifest *m;
> + Py_ssize_t pos;
> +} lmIter;
> +
> +static void lmiter_dealloc(PyObject *o)
> +{
> + lmIter *self = (lmIter *)o;
> + Py_DECREF(self->m);
> + PyObject_Del(self);
> +}
> +
> +static PyObject *lmiter_iternext(PyObject *o)
> +{
> + size_t pl;
> + line *l;
> + Py_ssize_t consumed;
> + PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
> + lmIter *self = (lmIter *)o;
> + Py_ssize_t index = self->pos++;
> + /* skip over deleted manifest entries */
> + while (index < self->m->numlines &&
> + self->m->lines[index].deleted) {
> + index = ++(self->pos);
> + }
> + if (self->m->numlines <= index) {
> + goto bail;
> + }
> + l = &(self->m->lines[index]);
> + pl = pathlen(l);
> + path = PyString_FromStringAndSize(l->start, pl);
> + hash = nodeof(l);
> + consumed = pl + 41;
> + if (l->len > (consumed + 1)) {
> + flags = PyString_FromStringAndSize(l->start + consumed,
> + l->len - consumed - 1);
> + } else {
> + flags = PyString_FromString("");
> +
> + }
> + if (!flags) {
> + goto bail;
> + }
> + ret = PyTuple_Pack(3, path, hash, flags);
> + bail:
> + Py_XDECREF(path);
> + Py_XDECREF(hash);
> + Py_XDECREF(flags);
> + return ret;
> +}
> +
> +static PyTypeObject lazymanifestIterator = {
> + PyObject_HEAD_INIT(NULL)
> + 0, /*ob_size */
> + "parsers.lazymanifest.iterator", /*tp_name */
> + sizeof(lmIter), /*tp_basicsize */
> + 0, /*tp_itemsize */
> + lmiter_dealloc, /*tp_dealloc */
> + 0, /*tp_print */
> + 0, /*tp_getattr */
> + 0, /*tp_setattr */
> + 0, /*tp_compare */
> + 0, /*tp_repr */
> + 0, /*tp_as_number */
> + 0, /*tp_as_sequence */
> + 0, /*tp_as_mapping */
> + 0, /*tp_hash */
> + 0, /*tp_call */
> + 0, /*tp_str */
> + 0, /*tp_getattro */
> + 0, /*tp_setattro */
> + 0, /*tp_as_buffer */
> + /* tp_flags: Py_TPFLAGS_HAVE_ITER tells python to
> + use tp_iter and tp_iternext fields. */
> + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER,
> + "Iterator for a lazymanifest.", /* tp_doc */
> + 0, /* tp_traverse */
> + 0, /* tp_clear */
> + 0, /* tp_richcompare */
> + 0, /* tp_weaklistoffset */
> + PyObject_SelfIter, /* tp_iter: __iter__() method */
> + lmiter_iternext, /* tp_iternext: next() method */
> +};
> +
> +static lazymanifest *lazymanifest_copy(
> + lazymanifest *self, PyObject *unused_args);
> +
> +static PyObject *lazymanifest_getiter(lazymanifest *self)
> +{
> + lmIter *i = NULL;
> + lazymanifest *t = lazymanifest_copy(self, NULL);
> + if (!t) {
> + PyErr_NoMemory();
> + return NULL;
> + }
> + i = PyObject_New(lmIter, &lazymanifestIterator);
> + if (i) {
> + i->m = t;
> + i->pos = 0;
> + } else {
> + Py_DECREF(t);
> + PyErr_NoMemory();
> + }
> + return (PyObject *)i;
> +}
> +
> +/* __getitem__ and __setitem__ support */
> +
> +static Py_ssize_t lazymanifest_size(lazymanifest *self)
> +{
> + return self->livelines;
> +}
> +
> +static int linecmp(const void *left, const void *right)
> +{
> + return strcmp(((const line *)left)->start,
> + ((const line *)right)->start);
> +}
> +
> +static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
> +{
> + line needle;
> + line *hit;
> + if (!PyString_Check(key)) {
> + PyErr_Format(PyExc_TypeError,
> + "getitem: manifest keys must be a string.");
> + return NULL;
> + }
> + needle.start = PyString_AsString(key);
> + needle.len = 0;
> + hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
> + &linecmp);
> + if (!hit || hit->deleted) {
> + PyErr_Format(PyExc_KeyError, "No such manifest entry.");
> + return NULL;
> + }
> + return hashflags(hit);
> +}
> +
> +static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
> +{
> + line needle;
> + line *hit;
> + if (!PyString_Check(key)) {
> + PyErr_Format(PyExc_TypeError,
> + "delitem: manifest keys must be a string.");
> + return -1;
> + }
> + needle.start = PyString_AsString(key);
> + needle.len = 0;
> + hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
> + &linecmp);
> + if (!hit || hit->deleted) {
> + PyErr_Format(PyExc_KeyError,
> + "Tried to delete nonexistent manifest
> entry.");
> + return -1;
> + }
> + self->dirty = true;
> + hit->deleted = true;
> + self->livelines--;
> + return 0;
> +}
> +
> +static int lazymanifest_setitem(
> + lazymanifest *self, PyObject *key, PyObject *value)
> +{
> + char *path;
> + Py_ssize_t plen;
> + PyObject *pyhash;
> + Py_ssize_t hlen;
> + char *hash;
> + char hashextra;
> + PyObject *pyflags;
> + char *flags;
> + Py_ssize_t flen;
> + size_t dlen;
> + char *dest;
> + int i;
> + line new;
> + line *hit;
> + if (!PyString_Check(key)) {
> + PyErr_Format(PyExc_TypeError,
> + "setitem: manifest keys must be a string.");
> + return -1;
> + }
> + if (!value) {
> + return lazymanifest_delitem(self, key);
> + }
> + if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
> + PyErr_Format(PyExc_TypeError,
> + "Manifest values must be a tuple of (node,
> flags).");
> + return -1;
> + }
> + if (PyString_AsStringAndSize(key, &path, &plen) == -1) {
> + return -1;
> + }
> +
> + pyhash = PyTuple_GetItem(value, 0);
> + if (!PyString_Check(pyhash)) {
> + PyErr_Format(PyExc_TypeError,
> + "node must be a 20-byte string");
> + return -1;
> + }
> + hlen = PyString_Size(pyhash);
> + /* Some parts of the codebase try and set 21 or 22
> + * byte "hash" values in order to perturb things for
> + * status. We have to preserve at least the 21st
> + * byte. Sigh. If there's a 22nd byte, we drop it on
> + * the floor, which works fine.
> + */
> + if (hlen != 20 && hlen != 21 && hlen != 22) {
> + PyErr_Format(PyExc_TypeError,
> + "node must be a 20-byte string");
> + return -1;
> + }
> + hash = PyString_AsString(pyhash);
> + hashextra = '\0';
> + if (hlen > 20) {
> + hashextra = hash[20];
> + }
> +
> + pyflags = PyTuple_GetItem(value, 1);
> + if (!PyString_Check(pyflags) || PyString_Size(pyflags) > 1) {
> + PyErr_Format(PyExc_TypeError,
> + "flags must a 0 or 1 byte string");
> + return -1;
> + }
> + if (PyString_AsStringAndSize(pyflags, &flags, &flen) == -1) {
> + return -1;
> + }
> + /* 3 -> two null bytes and one newline */
> + dlen = plen + 40 + flen + 3;
> + dest = malloc(dlen);
> + if (!dest) {
> + PyErr_NoMemory();
> + return -1;
> + }
> + memcpy(dest, path, plen + 1);
> + for (i = 0; i < 20; i++) {
> + sprintf(dest + plen + 1 + (i * 2), "%02hhx", hash[i]);
> + }
> + sprintf(dest + plen + 41, "%s\n", flags);
> + new.start = dest;
> + new.len = dlen - 1;
> + new.hash_suffix = hashextra;
> + new.from_malloc = true; /* is `start` a pointer we allocated?
> */
> + new.deleted = false; /* is this entry deleted? */
> + hit = bsearch(&new, self->lines, self->numlines,
> + sizeof(line), &linecmp);
> + self->dirty = true;
> + if (hit) {
> + /* updating a line we already had */
> + if (hit->from_malloc) {
> + free(hit->start);
> + }
> + if (hit->deleted) {
> + self->livelines++;
> + }
> + *hit = new;
> + } else {
> + /* new line */
> + if (!realloc_if_full(self)) {
> + PyErr_NoMemory();
> + return -1;
> + }
> + self->lines[self->numlines++] = new;
> + self->livelines++;
> + /* TODO: do a binary search and insert rather than
> + * append and qsort. Also offer a batch-insert
> + * interface, which should be a nice little
> + * performance win.
> + */
> + qsort(self->lines, self->numlines, sizeof(line), &linecmp);
> + }
> + return 0;
> +}
> +
> +static PyMappingMethods lazymanifest_mapping_methods = {
> + (lenfunc)lazymanifest_size, /* mp_length */
> + (binaryfunc)lazymanifest_getitem, /* mp_subscript */
> + (objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
> +};
> +
> +/* sequence methods (important or __contains__ builds an iterator */
> +
> +static int lazymanifest_contains(lazymanifest *self, PyObject *key)
> +{
> + line needle;
> + line *hit;
> + if (!PyString_Check(key)) {
> + PyErr_Format(PyExc_TypeError,
> + "contains: manifest keys must be a string.");
> + return 0;
> + }
> + needle.start = PyString_AsString(key);
> + needle.len = 0;
> + hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
> + &linecmp);
> + if (!hit || hit->deleted) {
> + return 0;
> + }
> + return 1;
> +}
> +
> +static PySequenceMethods lazymanifest_seq_meths = {
> + (lenfunc)lazymanifest_size, /* sq_length */
> + 0, /* sq_concat */
> + 0, /* sq_repeat */
> + 0, /* sq_item */
> + 0, /* sq_slice */
> + 0, /* sq_ass_item */
> + 0, /* sq_ass_slice */
> + (objobjproc)lazymanifest_contains, /* sq_contains */
> + 0, /* sq_inplace_concat */
> + 0, /* sq_inplace_repeat */
> +};
> +
> +
> +/* Other methods (copy, diff, etc) */
> +static PyTypeObject lazymanifestType;
> +
> +/* If the manifest has changes, build the new manifest text and reindex
> it. */
> +static int compact(lazymanifest *self) {
> + int i;
> + ssize_t need = 0;
> + char *data;
> + ssize_t copied = 0;
> + int real = 0;
> + line *src, *dst;
> + char *tofree;
> + ssize_t c;
> + PyObject *pydata;
> + char *realdest;
> + int err;
> + Py_ssize_t offset;
> + line new;
> + if (!self->dirty)
> + return 0;
> + for (i = 0; i < self->numlines; i++) {
> + if (!self->lines[i].deleted) {
> + need += self->lines[i].len;
> + }
> + }
> + pydata = PyString_FromStringAndSize(NULL, need);
> + if (!pydata)
> + return -1;
> + data = PyString_AsString(pydata);
> + if (!data) {
> + return -1;
> + }
> + src = self->lines;
> + dst = self->lines;
> + for (i = 0; i < self->numlines; i++, src++) {
> + tofree = NULL;
> + if (src->from_malloc) {
> + tofree = src->start;
> + }
> + if (!src->deleted) {
> + memcpy(data, src->start, src->len);
> + *dst = *src;
> + dst->start = data;
> + dst->from_malloc = false;
> + data += dst->len;
> + dst++;
> + }
> + free(tofree);
> + }
> + Py_DECREF(self->pydata);
> + self->pydata = pydata;
> + self->numlines = self->livelines;
> + self->dirty = false;
> + return 0;
> +}
> +
> +static PyObject *lazymanifest_text(lazymanifest *self, PyObject
> *unused_args)
> +{
> + if (compact(self) != 0) {
> + PyErr_NoMemory();
> + return NULL;
> + }
> + Py_INCREF(self->pydata);
> + return self->pydata;
> +}
> +
> +static lazymanifest *lazymanifest_copy(
> + lazymanifest *self, PyObject *unused_args)
> +{
> + lazymanifest *copy = NULL;
> + if (compact(self) != 0) {
> + goto nomem;
> + }
> + copy = PyObject_New(lazymanifest, &lazymanifestType);
> + if (!copy) {
> + goto nomem;
> + }
> + copy->numlines = self->numlines;
> + copy->livelines = self->livelines;
> + copy->dirty = false;
> + copy->lines = malloc(self->maxlines *sizeof(line));
> + if (!copy->lines) {
> + goto nomem;
> + }
> + memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
> + copy->maxlines = self->maxlines;
> + copy->pydata = self->pydata;
> + Py_INCREF(copy->pydata);
> + return copy;
> + nomem:
> + PyErr_NoMemory();
> + Py_XDECREF(copy);
> + return NULL;
> +}
> +
> +static lazymanifest *lazymanifest_filtercopy(
> + lazymanifest *self, PyObject *matchfn)
> +{
> + lazymanifest *tmp;
> + PyObject *arg;
> + PyObject *arglist;
> + PyObject *result;
> + int i;
> + if (!PyCallable_Check(matchfn)) {
> + PyErr_SetString(PyExc_TypeError, "matchfn must be
> callable");
> + return NULL;
> + }
> + /* compact ourselves first to avoid double-frees later when we
> + * compact tmp so that it doesn't have random pointers to our
> + * underlying data */
> + compact(self);
> + tmp = PyObject_New(lazymanifest, &lazymanifestType);
> + tmp->lines = malloc(self->maxlines * sizeof(line));
> + tmp->maxlines = self->maxlines;
> + tmp->livelines = 0;
> + tmp->numlines = 0;
> + tmp->pydata = self->pydata;
> + Py_INCREF(self->pydata);
> + for (i = 0; i < self->numlines; i++) {
> + arg = PyString_FromString(self->lines[i].start);
> + arglist = PyTuple_Pack(1, arg);
> + result = PyObject_CallObject(matchfn, arglist);
> + /* if the callback raised an exception, just let it
> + * through and give up */
> + if (!result) {
> + free(tmp->lines);
> + tmp->lines = NULL;
> + Py_DECREF(self->pydata);
> + return NULL;
> + }
> + if (PyObject_IsTrue(result)) {
> + assert(!(self->lines[i].from_malloc));
> + tmp->lines[tmp->numlines++] = self->lines[i];
> + tmp->livelines++;
> + }
> + Py_DECREF(arglist);
> + Py_DECREF(arg);
> + Py_DECREF(result);
> + }
> + compact(tmp);
> + return tmp;
> +}
> +
> +static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
> +{
> + lazymanifest *other;
> + PyObject *pyclean = NULL;
> + bool clean;
> + PyObject *emptyTup = NULL, *ret = NULL;
> + PyObject *es;
> + int sneedle = 0, oneedle = 0;
> + line *left;
> + line *right;
> + int result;
> + PyObject *key;
> + PyObject *tup;
> + PyObject *outer;
> + PyObject *r;
> + if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other,
> &pyclean)) {
> + return NULL;
> + }
> + clean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
> + es = PyString_FromString("");
> + if (!es) {
> + goto nomem;
> + }
> + emptyTup = PyTuple_Pack(2, Py_None, es);
> + Py_DECREF(es);
> + if (!emptyTup) {
> + goto nomem;
> + }
> + ret = PyDict_New();
> + if (!ret) {
> + goto nomem;
> + }
> + while (sneedle != self->numlines || oneedle != other->numlines) {
> + left = self->lines + sneedle;
> + right = other->lines + oneedle;
> + /* If we're looking at a deleted entry and it's not
> + * the end of the manifest, just skip it. */
> + if (left->deleted && sneedle < self->numlines) {
> + sneedle++;
> + continue;
> + }
> + if (right->deleted && oneedle < other->numlines) {
> + oneedle++;
> + continue;
> + }
> + /* if we're at the end of either manifest, then we
> + * know the remaining items are adds so we can skip
> + * the strcmp. */
> + if (sneedle == self->numlines) {
> + result = 1;
> + } else if (oneedle == other->numlines) {
> + result = -1;
> + } else {
> + result = linecmp(left, right);
> + }
> + key = result <= 0 ?
> + PyString_FromString(left->start) :
> + PyString_FromString(right->start);
> + if (!key)
> + goto nomem;
> + if (result < 0) {
> + tup = hashflags(left);
> + if (!tup) {
> + goto nomem;
> + }
> + outer = PyTuple_Pack(2, tup, emptyTup);
> + Py_DECREF(tup);
> + if (!outer) {
> + goto nomem;
> + }
> + PyDict_SetItem(ret, key, outer);
> + Py_DECREF(outer);
> + sneedle++;
> + } else if (result > 0) {
> + tup = hashflags(right);
> + if (!tup) {
> + goto nomem;
> + }
> + outer = PyTuple_Pack(2, emptyTup, tup);
> + Py_DECREF(tup);
> + if (!outer) {
> + goto nomem;
> + }
> + PyDict_SetItem(ret, key, outer);
> + Py_DECREF(outer);
> + oneedle++;
> + } else {
> + /* file exists in both manifests */
> + if (left->len != right->len
> + || memcmp(left->start, right->start, left->len)
> + || left->hash_suffix != right->hash_suffix) {
> + PyObject *l = hashflags(left);
> + if (!l) {
> + goto nomem;
> + }
> + r = hashflags(right);
> + if (!r) {
> + Py_DECREF(l);
> + goto nomem;
> + }
> + outer = PyTuple_Pack(2, l, r);
> + Py_DECREF(l);
> + Py_DECREF(r);
> + if (!outer) {
> + goto nomem;
> + }
> + PyDict_SetItem(ret, key, outer);
> + Py_DECREF(outer);
> + } else if (clean) {
> + PyDict_SetItem(ret, key, Py_None);
> + }
> + sneedle++;
> + oneedle++;
> + }
> + Py_DECREF(key);
> + }
> + Py_DECREF(emptyTup);
> + return ret;
> + nomem:
> + PyErr_NoMemory();
> + Py_XDECREF(ret);
> + Py_XDECREF(emptyTup);
> + return NULL;
> +}
> +
> +static PyMethodDef lazymanifest_methods[] = {
> + {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
> + "Make a copy of this lazymanifest."},
> + {"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
> + "Make a copy of this manifest filtered by matchfn."},
> + {"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
> + "Compare this lazymanifest to another one."},
> + {"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
> + "Encode this manifest to text."},
> + {NULL},
> +};
> +
> +static PyTypeObject lazymanifestType = {
> + PyObject_HEAD_INIT(NULL)
> + 0, /* ob_size */
> + "parsers.lazymanifest", /* tp_name */
> + sizeof(lazymanifest), /* tp_basicsize
> */
> + 0, /* tp_itemsize */
> + (destructor)lazymanifest_dealloc, /* tp_dealloc */
> + 0, /* tp_print */
> + 0, /* tp_getattr */
> + 0, /* tp_setattr */
> + 0, /* tp_compare */
> + 0, /* tp_repr */
> + 0, /* tp_as_number
> */
> + &lazymanifest_seq_meths, /*
> tp_as_sequence */
> + &lazymanifest_mapping_methods, /* tp_as_mapping
> */
> + 0, /* tp_hash */
> + 0, /* tp_call */
> + 0, /* tp_str */
> + 0, /* tp_getattro */
> + 0, /* tp_setattro */
> + 0, /* tp_as_buffer
> */
> + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN, /* tp_flags */
> + "TODO(augie)", /* tp_doc */
> + 0, /* tp_traverse */
> + 0, /* tp_clear */
> + 0, /*
> tp_richcompare */
> + 0, /*
> tp_weaklistoffset */
> + (getiterfunc)lazymanifest_getiter, /* tp_iter */
> + 0, /* tp_iternext */
> + lazymanifest_methods, /* tp_methods */
> + 0, /* tp_members */
> + 0, /* tp_getset */
> + 0, /* tp_base */
> + 0, /* tp_dict */
> + 0, /* tp_descr_get
> */
> + 0, /* tp_descr_set
> */
> + 0, /* tp_dictoffset
> */
> + (initproc)lazymanifest_init, /* tp_init */
> + 0, /* tp_alloc */
> +};
> +
> +void manifest_module_init(PyObject * mod)
> +{
> + lazymanifestType.tp_new = PyType_GenericNew;
> + if (PyType_Ready(&lazymanifestType) < 0)
> + return;
> + Py_INCREF(&lazymanifestType);
> +
> + PyModule_AddObject(mod, "lazymanifest",
> + (PyObject *)&lazymanifestType);
> +}
> diff --git a/mercurial/parsers.c b/mercurial/parsers.c
> --- a/mercurial/parsers.c
> +++ b/mercurial/parsers.c
> @@ -71,7 +71,7 @@ static inline int hexdigit(const char *p
> /*
> * Turn a hex-encoded string into binary.
> */
> -static PyObject *unhexlify(const char *str, int len)
> +PyObject *unhexlify(const char *str, int len)
> {
> PyObject *ret;
> char *d;
> @@ -2233,6 +2233,7 @@ static PyMethodDef methods[] = {
> };
>
> void dirs_module_init(PyObject *mod);
> +void manifest_module_init(PyObject *mod);
>
> static void module_init(PyObject *mod)
> {
> @@ -2247,6 +2248,7 @@ static void module_init(PyObject *mod)
> PyModule_AddStringConstant(mod, "versionerrortext",
> versionerrortext);
>
> dirs_module_init(mod);
> + manifest_module_init(mod);
>
> indexType.tp_new = PyType_GenericNew;
> if (PyType_Ready(&indexType) < 0 ||
> diff --git a/setup.py b/setup.py
> --- a/setup.py
> +++ b/setup.py
> @@ -491,6 +491,7 @@ extmodules = [
> Extension('mercurial.mpatch', ['mercurial/mpatch.c'],
> depends=common_depends),
> Extension('mercurial.parsers', ['mercurial/dirs.c',
> + 'mercurial/manifest.c',
> 'mercurial/parsers.c',
> 'mercurial/pathencode.c'],
> depends=common_depends),
> diff --git a/tests/test-manifest.py b/tests/test-manifest.py
> new file mode 100644
> --- /dev/null
> +++ b/tests/test-manifest.py
> @@ -0,0 +1,209 @@
> +import binascii
> +import unittest
> +import itertools
> +
> +import silenttestrunner
> +
> +from mercurial import parsers
> +
> +HASH_1 = '1' * 40
> +HASH_2 = 'f' * 40
> +HASH_3 = '1234567890abcdef0987654321deadbeef0fcafe'
> +A_SHORT_MANIFEST = (
> + 'bar/baz/qux.py\0%(hash2)s%(flag2)s\n'
> + 'foo\0%(hash1)s%(flag1)s\n'
> + ) % {'hash1': HASH_1,
> + 'flag1': '',
> + 'hash2': HASH_2,
> + 'flag2': 'l',
> + }
> +
> +HUGE_MANIFEST_ENTRIES = 200001
> +
> +A_HUGE_MANIFEST = ''.join(sorted(
> + 'file%d\0%s%s\n' % (i, h, f) for i, h, f in
> + itertools.izip(xrange(200001),
> + itertools.cycle((HASH_1, HASH_2)),
> + itertools.cycle(('', 'x', 'l')))))
> +
> +class testmanifest(unittest.TestCase):
> + def testEmptyManifest(self):
> + m = parsers.lazymanifest('')
> + self.assertEqual(0, len(m))
> + self.assertEqual([], list(m))
> +
> + def testManifest(self):
> + m = parsers.lazymanifest(A_SHORT_MANIFEST)
> + want = [
> + ('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'),
> + ('foo', binascii.unhexlify(HASH_1), ''),
> + ]
> + self.assertEqual(len(want), len(m))
> + self.assertEqual(want, list(m))
> + self.assertEqual((binascii.unhexlify(HASH_1), ''), m['foo'])
> + self.assertRaises(KeyError, lambda : m['wat'])
> + self.assertEqual((binascii.unhexlify(HASH_2), 'l'),
> + m['bar/baz/qux.py'])
> +
> + def testSetItem(self):
> + want = binascii.unhexlify(HASH_1), ''
> +
> + m = parsers.lazymanifest('')
> + m['a'] = want
> + self.assertIn('a', m)
> + self.assertEqual(want, m['a'])
> + self.assertEqual('a\0' + HASH_1 + '\n', m.text())
> +
> + m = parsers.lazymanifest(A_SHORT_MANIFEST)
> + m['a'] = want
> + self.assertEqual(want, m['a'])
> + self.assertEqual('a\0' + HASH_1 + '\n' + A_SHORT_MANIFEST,
> + m.text())
> + m2 = m.copy()
> + del m
> + del m2 # make sure we don't double free() anything
> +
> + def testCompaction(self):
> + unhex = binascii.unhexlify
> + h1, h2 = unhex(HASH_1), unhex(HASH_2)
> + m = parsers.lazymanifest(A_SHORT_MANIFEST)
> + m['alpha'] = h1, ''
> + m['beta'] = h2, ''
> + del m['foo']
> + want = 'alpha\0%s\nbar/baz/qux.py\0%sl\nbeta\0%s\n' % (
> + HASH_1, HASH_2, HASH_2)
> + self.assertEqual(want, m.text())
> + self.assertEqual(3, len(m))
> + self.assertEqual((h1, ''), m['alpha'])
> + self.assertEqual((h2, ''), m['beta'])
> + self.assertRaises(KeyError, lambda : m['foo'])
> + w = [('alpha', h1, ''), ('bar/baz/qux.py', h2, 'l'), ('beta', h2,
> '')]
> + self.assertEqual(w, list(m))
> +
> + def testSetGetNodeSuffix(self):
> + clean = parsers.lazymanifest(A_SHORT_MANIFEST)
> + m = parsers.lazymanifest(A_SHORT_MANIFEST)
> + h, f = m['foo']
> + want = h + 'a', f
> + # Merge code wants to set 21-byte fake hashes at times
> + m['foo'] = want
> + self.assertEqual(want, m['foo'])
> + self.assertEqual([('bar/baz/qux.py', binascii.unhexlify(HASH_2),
> 'l'),
> + ('foo', binascii.unhexlify(HASH_1) + 'a', '')],
> + list(m))
> + # Sometimes it even tries a 22-byte fake hash, but we can
> + # return 21 and it'll work out
> + m['foo'] = want[0] + '+', f
> + self.assertEqual(want, m['foo'])
> + # make sure the suffix survives a copy
> + m2 = m.filtercopy(lambda x: x == 'foo')
> + self.assertEqual(want, m2['foo'])
> + m2 = m.copy()
> + self.assertEqual(want, m2['foo'])
> + # suffix with iteration
> + self.assertEqual([('bar/baz/qux.py', binascii.unhexlify(HASH_2),
> 'l'),
> + ('foo', want[0], '')], list(m))
> + # shows up in diff
> + self.assertEqual({'foo': (want, (h, ''))}, m.diff(clean))
> + self.assertEqual({'foo': ((h, ''), want)}, clean.diff(m))
> +
> + def testFilterCopyException(self):
> + m = parsers.lazymanifest(A_SHORT_MANIFEST)
> + def filt(path):
> + if path == 'foo':
> + assert False
> + return True
> + self.assertRaises(AssertionError, m.filtercopy, filt)
> +
> + def testRemoveItem(self):
> + m = parsers.lazymanifest(A_SHORT_MANIFEST)
> + del m['foo']
> + self.assertRaises(KeyError, lambda : m['foo'])
> + self.assertEqual(1, len(m))
> + self.assertEqual(1, len(list(m)))
> +
> + def testManifestDiff(self):
> + MISSING = (None, '')
> + addl = 'z-only-in-left\0' + HASH_1 + '\n'
> + addr = 'z-only-in-right\0' + HASH_2 + 'x\n'
> + left = parsers.lazymanifest(
> + A_SHORT_MANIFEST.replace(HASH_1, HASH_3 + 'x') + addl)
> + right = parsers.lazymanifest(A_SHORT_MANIFEST + addr)
> + want = {
> + 'foo': ((binascii.unhexlify(HASH_3), 'x'),
> + (binascii.unhexlify(HASH_1), '')),
> + 'z-only-in-left': ((binascii.unhexlify(HASH_1), ''), MISSING),
> + 'z-only-in-right': (MISSING, (binascii.unhexlify(HASH_2),
> 'x')),
> + }
> + self.assertEqual(want, left.diff(right))
> +
> + want = {
> + 'bar/baz/qux.py': (MISSING, (binascii.unhexlify(HASH_2),
> 'l')),
> + 'foo': (MISSING, (binascii.unhexlify(HASH_3), 'x')),
> + 'z-only-in-left': (MISSING, (binascii.unhexlify(HASH_1), '')),
> + }
> + self.assertEqual(want, parsers.lazymanifest('').diff(left))
> +
> + want = {
> + 'bar/baz/qux.py': ((binascii.unhexlify(HASH_2), 'l'),
> MISSING),
> + 'foo': ((binascii.unhexlify(HASH_3), 'x'), MISSING),
> + 'z-only-in-left': ((binascii.unhexlify(HASH_1), ''), MISSING),
> + }
> + self.assertEqual(want, left.diff(parsers.lazymanifest('')))
> + copy = right.copy()
> + del copy['z-only-in-right']
> + del right['foo']
> + want = {
> + 'foo': (MISSING, (binascii.unhexlify(HASH_1), '')),
> + 'z-only-in-right': ((binascii.unhexlify(HASH_2), 'x'),
> MISSING),
> + }
> + self.assertEqual(want, right.diff(copy))
> +
> + short = parsers.lazymanifest(A_SHORT_MANIFEST)
> + pruned = short.copy()
> + del pruned['foo']
> + want = {
> + 'foo': ((binascii.unhexlify(HASH_1), ''), MISSING),
> + }
> + self.assertEqual(want, short.diff(pruned))
> + want = {
> + 'foo': (MISSING, (binascii.unhexlify(HASH_1), '')),
> + }
> + self.assertEqual(want, pruned.diff(short))
> + want = {
> + 'bar/baz/qux.py': None,
> + 'foo': (MISSING, (binascii.unhexlify(HASH_1), '')),
> + }
> + self.assertEqual(want, pruned.diff(short, True))
> +
> + def testReversedLines(self):
> + backwards = ''.join(
> + l + '\n' for l in reversed(A_SHORT_MANIFEST.split('\n')) if
> l)
> + try:
> + parsers.lazymanifest(backwards)
> + self.fail('Should have raised ValueError')
> + except ValueError, v:
> + self.assertEqual('Manifest lines not in sorted order.',
> v.message)
> +
> + def testNoTerminalNewline(self):
> + try:
> + parsers.lazymanifest(A_SHORT_MANIFEST + 'wat')
> + self.fail('Should have raised ValueError')
> + except ValueError, v:
> + self.assertEqual('Manifest did not end in a newline.',
> v.message)
> +
> + def testNoNewLineAtAll(self):
> + try:
> + parsers.lazymanifest('wat')
> + self.fail('Should have raised ValueError')
> + except ValueError, v:
> + self.assertEqual('Manifest did not end in a newline.',
> v.message)
> +
> + def testHugeManifest(self):
> + m = parsers.lazymanifest(A_HUGE_MANIFEST)
> + self.assertEqual(HUGE_MANIFEST_ENTRIES, len(m))
> + self.assertEqual(len(m), len(list(m)))
> +
> +
> +if __name__ == '__main__':
> + silenttestrunner.main(__name__)
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://selenic.com/pipermail/mercurial-devel/attachments/20150114/78e38051/attachment.html>
More information about the Mercurial-devel
mailing list