[PATCH] revset: fix iteration over ordered addset composed of non-ordered operant
Augie Fackler
raf at durin42.com
Fri May 15 16:18:34 CDT 2015
On Fri, May 15, 2015 at 01:29:25AM -0700, Pierre-Yves David wrote:
> # HG changeset patch
> # User Pierre-Yves David <pierre-yves.david at fb.com>
> # Date 1431674743 25200
> # Fri May 15 00:25:43 2015 -0700
> # Node ID c1b2704abcd0dd81c7f29d1fc21e017665e0941c
> # Parent d1bd0fd07ee6adf4ab3be2b0a0a7c0df54d55abf
> revset: fix iteration over ordered addset composed of non-ordered operant
Queued this with some commit message copyediting.
>
> Before this change, doing ordered iteration over an 'addset' object composed of
> operant without fastasc or fastdesc method could result into duplicated entry.
> This was the result of applying '_iterordered' on non ordered set.
>
> We fix it by ensuring we iterate over the set in a sorted order. Using the fast
> iterator when it exists on any operand. We kill the '_iterator' method in the
> process because it did not made a lot of sense independently.
>
> Thanks goes to Yuja Nishihara for reporting the issue and analysing the cause.
>
> diff --git a/mercurial/revset.py b/mercurial/revset.py
> --- a/mercurial/revset.py
> +++ b/mercurial/revset.py
> @@ -2972,38 +2972,38 @@ class addset(abstractsmartset):
> iterate ascending:
> >>> rs = addset(xs, ys, ascending=True)
> >>> [x for x in rs], [x for x in rs.fastasc()] # without _asclist
> ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5])
> >>> assert not rs._asclist
> - >>> len(rs) # BROKEN
> - 6
> - >>> [x for x in rs], [x for x in rs.fastasc()] # BROKEN with _asclist
> - ([0, 2, 2, 3, 4, 5], [0, 2, 2, 3, 4, 5])
> + >>> len(rs)
> + 5
> + >>> [x for x in rs], [x for x in rs.fastasc()]
> + ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5])
> >>> assert rs._asclist
>
> iterate descending:
> >>> rs = addset(xs, ys, ascending=False)
> >>> [x for x in rs], [x for x in rs.fastdesc()] # without _asclist
> ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0])
> >>> assert not rs._asclist
> - >>> len(rs) # BROKEN
> - 6
> - >>> [x for x in rs], [x for x in rs.fastdesc()] # BROKEN with _asclist
> - ([5, 4, 3, 2, 2, 0], [5, 4, 3, 2, 2, 0])
> + >>> len(rs)
> + 5
> + >>> [x for x in rs], [x for x in rs.fastdesc()]
> + ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0])
> >>> assert rs._asclist
>
> iterate ascending without fastasc:
> >>> rs = addset(xs, generatorset(ys), ascending=True)
> >>> assert rs.fastasc is None
> - >>> [x for x in rs] # BROKEN
> - [0, 2, 2, 3, 4, 5]
> + >>> [x for x in rs]
> + [0, 2, 3, 4, 5]
>
> iterate descending without fastdesc:
> >>> rs = addset(generatorset(xs), ys, ascending=False)
> >>> assert rs.fastdesc is None
> - >>> [x for x in rs] # BROKEN
> - [5, 4, 3, 2, 2, 0]
> + >>> [x for x in rs]
> + [5, 4, 3, 2, 0]
> """
> def __init__(self, revs1, revs2, ascending=None):
> self._r1 = revs1
> self._r2 = revs2
> self._iter = None
> @@ -3018,53 +3018,61 @@ class addset(abstractsmartset):
> return bool(self._r1) or bool(self._r2)
>
> @util.propertycache
> def _list(self):
> if not self._genlist:
> - self._genlist = baseset(self._iterator())
> + self._genlist = baseset(iter(self))
> return self._genlist
>
> - def _iterator(self):
> + def __iter__(self):
> """Iterate over both collections without repeating elements
>
> If the ascending attribute is not set, iterate over the first one and
> then over the second one checking for membership on the first one so we
> dont yield any duplicates.
>
> If the ascending attribute is set, iterate over both collections at the
> same time, yielding only one value at a time in the given order.
> """
> if self._ascending is None:
> - def gen():
> + if self._genlist:
> + return iter(self._genlist)
> + def arbitraryordergen():
> for r in self._r1:
> yield r
> inr1 = self._r1.__contains__
> for r in self._r2:
> if not inr1(r):
> yield r
> - gen = gen()
> - else:
> - iter1 = iter(self._r1)
> - iter2 = iter(self._r2)
> - gen = self._iterordered(self._ascending, iter1, iter2)
> - return gen
> -
> - def __iter__(self):
> - if self._ascending is None:
> - if self._genlist:
> - return iter(self._genlist)
> - return iter(self._iterator())
> + return arbitraryordergen()
> + # try to use our own fast iterator if it exists
> self._trysetasclist()
> if self._ascending:
> it = self.fastasc
> else:
> it = self.fastdesc
> - if it is None:
> - # consume the gen and try again
> - self._list
> - return iter(self)
> - return it()
> + if it is not None:
> + return it()
> + # maybe half of the component supports fast
> + attr = 'fastdesc'
> + if self._ascending:
> + attr = 'fastasc'
> + # get iterator for _r1
> + iter1 = getattr(self._r1, attr)
> + if iter1 is None:
> + # let's avoid side effect (not sure it matters)
> + iter1 = iter(sorted(self._r1, reverse=not self._ascending))
> + else:
> + iter1 = iter1()
> + # get iterator for _r2
> + iter2 = getattr(self._r2, attr)
> + if iter2 is None:
> + # let's avoid side effect (not sure it matters)
> + iter2 = iter(sorted(self._r2, reverse=not self._ascending))
> + else:
> + iter2 = iter2()
> + return self._iterordered(self._ascending, iter1, iter2)
>
> def _trysetasclist(self):
> """populate the _asclist attribute if possible and necessary"""
> if self._genlist is not None and self._asclist is None:
> self._asclist = sorted(self._genlist)
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
More information about the Mercurial-devel
mailing list