[PATCH] parse_manifest: rewrite to use memchr

Benoit Boissinot bboissin at gmail.com
Tue Sep 10 14:52:59 CDT 2013


On Sun, Sep 8, 2013 at 5:07 AM, Siddharth Agarwal <sid0 at fb.com> wrote:

> # HG changeset patch
> # User Siddharth Agarwal <sid0 at fb.com>
> # Date 1378536479 25200
> #      Fri Sep 06 23:47:59 2013 -0700
> # Node ID 3219c8821869d631bd6dab602a66019c4c43639c
> # Parent  6638dd23999d66a95ce28816f035a1d051963290
> parse_manifest: rewrite to use memchr
>
> memchr is usually smarter than a simple for loop. With gcc 4.4.6 and glibc
> 2.12
> on x86-64, for a 20 MB, 200,000 file manifest, parse_manifest goes from
> 0.116
> seconds to 0.095 seconds.
>

Looks good.


>
> diff --git a/mercurial/parsers.c b/mercurial/parsers.c
> --- a/mercurial/parsers.c
> +++ b/mercurial/parsers.c
> @@ -77,7 +77,7 @@
>  static PyObject *parse_manifest(PyObject *self, PyObject *args)
>  {
>         PyObject *mfdict, *fdict;
> -       char *str, *cur, *start, *zero;
> +       char *str, *start, *end, *zero, *newline;
>         int len;
>
>         if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
> @@ -86,21 +86,24 @@
>                               &str, &len))
>                 goto quit;
>
> -       for (start = cur = str, zero = NULL; cur < str + len; cur++) {
> +       start = str;
> +       end = str + len;
> +       while (start < end) {
>                 PyObject *file = NULL, *node = NULL;
>                 PyObject *flags = NULL;
>                 ptrdiff_t nlen;
>

you can now move the declaration of zero and newline here (and initialize).

>
> -               if (!*cur) {
> -                       zero = cur;
> -                       continue;
> -               }
> -               else if (*cur != '\n')
> -                       continue;
> -
> +               zero = memchr(start, '\0', end - start);
>                 if (!zero) {
>                         PyErr_SetString(PyExc_ValueError,
> -                                       "manifest entry has no separator");
> +                                       "manifest contains trailing
> garbage");
> +                       goto quit;
> +               }
> +
> +               newline = memchr(zero + 1, '\n', end - (zero + 1));
> +               if (!newline) {
> +                       PyErr_SetString(PyExc_ValueError,
> +                                       "manifest contains trailing
> garbage");
>                         goto quit;
>                 }
>
> @@ -109,7 +112,7 @@
>                 if (!file)
>                         goto bail;
>
> -               nlen = cur - zero - 1;
> +               nlen = newline - zero - 1;
>
>                 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
>                 if (!node)
> @@ -128,8 +131,7 @@
>                 if (PyDict_SetItem(mfdict, file, node) == -1)
>                         goto bail;
>
> -               start = cur + 1;
> -               zero = NULL;
> +               start = newline + 1;
>
>                 Py_XDECREF(flags);
>                 Py_XDECREF(node);
> @@ -142,12 +144,6 @@
>                 goto quit;
>         }
>
> -       if (len > 0 && *(cur - 1) != '\n') {
> -               PyErr_SetString(PyExc_ValueError,
> -                               "manifest contains trailing garbage");
> -               goto quit;
> -       }
> -
>         Py_INCREF(Py_None);
>         return Py_None;
>  quit:
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://selenic.com/pipermail/mercurial-devel/attachments/20130910/4cb38f97/attachment.html>


More information about the Mercurial-devel mailing list