[PATCH] revset: fix iteration over ordered addset composed of non-ordered operant

Augie Fackler raf at durin42.com
Fri May 15 16:18:34 CDT 2015


On Fri, May 15, 2015 at 01:29:25AM -0700, Pierre-Yves David wrote:
> # HG changeset patch
> # User Pierre-Yves David <pierre-yves.david at fb.com>
> # Date 1431674743 25200
> #      Fri May 15 00:25:43 2015 -0700
> # Node ID c1b2704abcd0dd81c7f29d1fc21e017665e0941c
> # Parent  d1bd0fd07ee6adf4ab3be2b0a0a7c0df54d55abf
> revset: fix iteration over ordered addset composed of non-ordered operant

Queued this with some commit message copyediting.

>
> Before this change, doing ordered iteration over an 'addset' object composed of
> operant without fastasc or fastdesc method could result into duplicated entry.
> This was the result of applying '_iterordered' on non ordered set.
>
> We fix it by ensuring we iterate over the set in a sorted order. Using the fast
> iterator when it exists on any operand. We kill the '_iterator' method in the
> process because it did not made a lot of sense independently.
>
> Thanks goes to Yuja Nishihara for reporting the issue and analysing the cause.
>
> diff --git a/mercurial/revset.py b/mercurial/revset.py
> --- a/mercurial/revset.py
> +++ b/mercurial/revset.py
> @@ -2972,38 +2972,38 @@ class addset(abstractsmartset):
>      iterate ascending:
>      >>> rs = addset(xs, ys, ascending=True)
>      >>> [x for x in rs], [x for x in rs.fastasc()]  # without _asclist
>      ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5])
>      >>> assert not rs._asclist
> -    >>> len(rs)  # BROKEN
> -    6
> -    >>> [x for x in rs], [x for x in rs.fastasc()]  # BROKEN with _asclist
> -    ([0, 2, 2, 3, 4, 5], [0, 2, 2, 3, 4, 5])
> +    >>> len(rs)
> +    5
> +    >>> [x for x in rs], [x for x in rs.fastasc()]
> +    ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5])
>      >>> assert rs._asclist
>
>      iterate descending:
>      >>> rs = addset(xs, ys, ascending=False)
>      >>> [x for x in rs], [x for x in rs.fastdesc()]  # without _asclist
>      ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0])
>      >>> assert not rs._asclist
> -    >>> len(rs)  # BROKEN
> -    6
> -    >>> [x for x in rs], [x for x in rs.fastdesc()]  # BROKEN with _asclist
> -    ([5, 4, 3, 2, 2, 0], [5, 4, 3, 2, 2, 0])
> +    >>> len(rs)
> +    5
> +    >>> [x for x in rs], [x for x in rs.fastdesc()]
> +    ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0])
>      >>> assert rs._asclist
>
>      iterate ascending without fastasc:
>      >>> rs = addset(xs, generatorset(ys), ascending=True)
>      >>> assert rs.fastasc is None
> -    >>> [x for x in rs]  # BROKEN
> -    [0, 2, 2, 3, 4, 5]
> +    >>> [x for x in rs]
> +    [0, 2, 3, 4, 5]
>
>      iterate descending without fastdesc:
>      >>> rs = addset(generatorset(xs), ys, ascending=False)
>      >>> assert rs.fastdesc is None
> -    >>> [x for x in rs]  # BROKEN
> -    [5, 4, 3, 2, 2, 0]
> +    >>> [x for x in rs]
> +    [5, 4, 3, 2, 0]
>      """
>      def __init__(self, revs1, revs2, ascending=None):
>          self._r1 = revs1
>          self._r2 = revs2
>          self._iter = None
> @@ -3018,53 +3018,61 @@ class addset(abstractsmartset):
>          return bool(self._r1) or bool(self._r2)
>
>      @util.propertycache
>      def _list(self):
>          if not self._genlist:
> -            self._genlist = baseset(self._iterator())
> +            self._genlist = baseset(iter(self))
>          return self._genlist
>
> -    def _iterator(self):
> +    def __iter__(self):
>          """Iterate over both collections without repeating elements
>
>          If the ascending attribute is not set, iterate over the first one and
>          then over the second one checking for membership on the first one so we
>          dont yield any duplicates.
>
>          If the ascending attribute is set, iterate over both collections at the
>          same time, yielding only one value at a time in the given order.
>          """
>          if self._ascending is None:
> -            def gen():
> +            if self._genlist:
> +                return iter(self._genlist)
> +            def arbitraryordergen():
>                  for r in self._r1:
>                      yield r
>                  inr1 = self._r1.__contains__
>                  for r in self._r2:
>                      if not inr1(r):
>                          yield r
> -            gen = gen()
> -        else:
> -            iter1 = iter(self._r1)
> -            iter2 = iter(self._r2)
> -            gen = self._iterordered(self._ascending, iter1, iter2)
> -        return gen
> -
> -    def __iter__(self):
> -        if self._ascending is None:
> -            if self._genlist:
> -                return iter(self._genlist)
> -            return iter(self._iterator())
> +            return arbitraryordergen()
> +        # try to use our own fast iterator if it exists
>          self._trysetasclist()
>          if self._ascending:
>              it = self.fastasc
>          else:
>              it = self.fastdesc
> -        if it is None:
> -            # consume the gen and try again
> -            self._list
> -            return iter(self)
> -        return it()
> +        if it is not None:
> +            return it()
> +        # maybe half of the component supports fast
> +        attr = 'fastdesc'
> +        if self._ascending:
> +            attr = 'fastasc'
> +        # get iterator for _r1
> +        iter1 = getattr(self._r1, attr)
> +        if iter1 is None:
> +            # let's avoid side effect (not sure it matters)
> +            iter1 = iter(sorted(self._r1, reverse=not self._ascending))
> +        else:
> +            iter1 = iter1()
> +        # get iterator for _r2
> +        iter2 = getattr(self._r2, attr)
> +        if iter2 is None:
> +            # let's avoid side effect (not sure it matters)
> +            iter2 = iter(sorted(self._r2, reverse=not self._ascending))
> +        else:
> +            iter2 = iter2()
> +        return self._iterordered(self._ascending, iter1, iter2)
>
>      def _trysetasclist(self):
>          """populate the _asclist attribute if possible and necessary"""
>          if self._genlist is not None and self._asclist is None:
>              self._asclist = sorted(self._genlist)
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel


More information about the Mercurial-devel mailing list