[PATCH 3 of 4 V3] revset: move groupbranchiter over from graphmod

Martijn Pieters mj at zopatista.com
Wed Jun 8 12:20:40 EDT 2016


# HG changeset patch
# User Martijn Pieters <mjpieters at fb.com>
# Date 1465398911 -3600
#      Wed Jun 08 16:15:11 2016 +0100
# Node ID b573c3858edcd746dd0868a6b243dd73ddb8105a
# Parent  c1fbbaf06c22c20b04563b0ec09afb2f07e8e666
revset: move groupbranchiter over from graphmod

This move is to prepare the adaptation of this function into a toposort
predicate.

diff --git a/mercurial/graphmod.py b/mercurial/graphmod.py
--- a/mercurial/graphmod.py
+++ b/mercurial/graphmod.py
@@ -19,8 +19,6 @@
 
 from __future__ import absolute_import
 
-import heapq
-
 from .node import nullrev
 from . import (
     revset,
@@ -37,204 +35,6 @@
 # (so making N negative) and all but the first N characters use that style.
 EDGES = {PARENT: '|', GRANDPARENT: ':', MISSINGPARENT: None}
 
-def groupbranchiter(revs, parentsfunc, firstbranch=()):
-    """Yield revisions from heads to roots one (topo) branch at a time.
-
-    This function aims to be used by a graph generator that wishes to minimize
-    the number of parallel branches and their interleaving.
-
-    Example iteration order (numbers show the "true" order in a changelog):
-
-      o  4
-      |
-      o  1
-      |
-      | o  3
-      | |
-      | o  2
-      |/
-      o  0
-
-    Note that the ancestors of merges are understood by the current
-    algorithm to be on the same branch. This means no reordering will
-    occur behind a merge.
-    """
-
-    ### Quick summary of the algorithm
-    #
-    # This function is based around a "retention" principle. We keep revisions
-    # in memory until we are ready to emit a whole branch that immediately
-    # "merges" into an existing one. This reduces the number of parallel
-    # branches with interleaved revisions.
-    #
-    # During iteration revs are split into two groups:
-    # A) revision already emitted
-    # B) revision in "retention". They are stored as different subgroups.
-    #
-    # for each REV, we do the following logic:
-    #
-    #   1) if REV is a parent of (A), we will emit it. If there is a
-    #   retention group ((B) above) that is blocked on REV being
-    #   available, we emit all the revisions out of that retention
-    #   group first.
-    #
-    #   2) else, we'll search for a subgroup in (B) awaiting for REV to be
-    #   available, if such subgroup exist, we add REV to it and the subgroup is
-    #   now awaiting for REV.parents() to be available.
-    #
-    #   3) finally if no such group existed in (B), we create a new subgroup.
-    #
-    #
-    # To bootstrap the algorithm, we emit the tipmost revision (which
-    # puts it in group (A) from above).
-
-    revs.sort(reverse=True)
-
-    # Set of parents of revision that have been emitted. They can be considered
-    # unblocked as the graph generator is already aware of them so there is no
-    # need to delay the revisions that reference them.
-    #
-    # If someone wants to prioritize a branch over the others, pre-filling this
-    # set will force all other branches to wait until this branch is ready to be
-    # emitted.
-    unblocked = set(firstbranch)
-
-    # list of groups waiting to be displayed, each group is defined by:
-    #
-    #   (revs:    lists of revs waiting to be displayed,
-    #    blocked: set of that cannot be displayed before those in 'revs')
-    #
-    # The second value ('blocked') correspond to parents of any revision in the
-    # group ('revs') that is not itself contained in the group. The main idea
-    # of this algorithm is to delay as much as possible the emission of any
-    # revision.  This means waiting for the moment we are about to display
-    # these parents to display the revs in a group.
-    #
-    # This first implementation is smart until it encounters a merge: it will
-    # emit revs as soon as any parent is about to be emitted and can grow an
-    # arbitrary number of revs in 'blocked'. In practice this mean we properly
-    # retains new branches but gives up on any special ordering for ancestors
-    # of merges. The implementation can be improved to handle this better.
-    #
-    # The first subgroup is special. It corresponds to all the revision that
-    # were already emitted. The 'revs' lists is expected to be empty and the
-    # 'blocked' set contains the parents revisions of already emitted revision.
-    #
-    # You could pre-seed the <parents> set of groups[0] to a specific
-    # changesets to select what the first emitted branch should be.
-    groups = [([], unblocked)]
-    pendingheap = []
-    pendingset = set()
-
-    heapq.heapify(pendingheap)
-    heappop = heapq.heappop
-    heappush = heapq.heappush
-    for currentrev in revs:
-        # Heap works with smallest element, we want highest so we invert
-        if currentrev not in pendingset:
-            heappush(pendingheap, -currentrev)
-            pendingset.add(currentrev)
-        # iterates on pending rev until after the current rev have been
-        # processed.
-        rev = None
-        while rev != currentrev:
-            rev = -heappop(pendingheap)
-            pendingset.remove(rev)
-
-            # Seek for a subgroup blocked, waiting for the current revision.
-            matching = [i for i, g in enumerate(groups) if rev in g[1]]
-
-            if matching:
-                # The main idea is to gather together all sets that are blocked
-                # on the same revision.
-                #
-                # Groups are merged when a common blocking ancestor is
-                # observed. For example, given two groups:
-                #
-                # revs [5, 4] waiting for 1
-                # revs [3, 2] waiting for 1
-                #
-                # These two groups will be merged when we process
-                # 1. In theory, we could have merged the groups when
-                # we added 2 to the group it is now in (we could have
-                # noticed the groups were both blocked on 1 then), but
-                # the way it works now makes the algorithm simpler.
-                #
-                # We also always keep the oldest subgroup first. We can
-                # probably improve the behavior by having the longest set
-                # first. That way, graph algorithms could minimise the length
-                # of parallel lines their drawing. This is currently not done.
-                targetidx = matching.pop(0)
-                trevs, tparents = groups[targetidx]
-                for i in matching:
-                    gr = groups[i]
-                    trevs.extend(gr[0])
-                    tparents |= gr[1]
-                # delete all merged subgroups (except the one we kept)
-                # (starting from the last subgroup for performance and
-                # sanity reasons)
-                for i in reversed(matching):
-                    del groups[i]
-            else:
-                # This is a new head. We create a new subgroup for it.
-                targetidx = len(groups)
-                groups.append(([], set([rev])))
-
-            gr = groups[targetidx]
-
-            # We now add the current nodes to this subgroups. This is done
-            # after the subgroup merging because all elements from a subgroup
-            # that relied on this rev must precede it.
-            #
-            # we also update the <parents> set to include the parents of the
-            # new nodes.
-            if rev == currentrev: # only display stuff in rev
-                gr[0].append(rev)
-            gr[1].remove(rev)
-            parents = [p for p in parentsfunc(rev) if p > nullrev]
-            gr[1].update(parents)
-            for p in parents:
-                if p not in pendingset:
-                    pendingset.add(p)
-                    heappush(pendingheap, -p)
-
-            # Look for a subgroup to display
-            #
-            # When unblocked is empty (if clause), we were not waiting for any
-            # revisions during the first iteration (if no priority was given) or
-            # if we emitted a whole disconnected set of the graph (reached a
-            # root).  In that case we arbitrarily take the oldest known
-            # subgroup. The heuristic could probably be better.
-            #
-            # Otherwise (elif clause) if the subgroup is blocked on
-            # a revision we just emitted, we can safely emit it as
-            # well.
-            if not unblocked:
-                if len(groups) > 1:  # display other subset
-                    targetidx = 1
-                    gr = groups[1]
-            elif not gr[1] & unblocked:
-                gr = None
-
-            if gr is not None:
-                # update the set of awaited revisions with the one from the
-                # subgroup
-                unblocked |= gr[1]
-                # output all revisions in the subgroup
-                for r in gr[0]:
-                    yield r
-                # delete the subgroup that you just output
-                # unless it is groups[0] in which case you just empty it.
-                if targetidx:
-                    del groups[targetidx]
-                else:
-                    gr[0][:] = []
-    # Check if we have some subgroup waiting for revisions we are not going to
-    # iterate over
-    for g in groups:
-        for r in g[0]:
-            yield r
-
 def dagwalker(repo, revs):
     """cset DAG generator yielding (id, CHANGESET, ctx, [parentinfo]) tuples
 
@@ -259,7 +59,7 @@
         if firstbranchrevset:
             firstbranch = repo.revs(firstbranchrevset)
         parentrevs = repo.changelog.parentrevs
-        revs = groupbranchiter(revs, parentrevs, firstbranch)
+        revs = revset.groupbranchiter(revs, parentrevs, firstbranch)
         revs = revset.baseset(revs)
 
     for rev in revs:
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -1887,6 +1887,204 @@
             raise error.ParseError(_("unknown sort key %r") % fk)
     return baseset([c.rev() for c in ctxs])
 
+def groupbranchiter(revs, parentsfunc, firstbranch=()):
+    """Yield revisions from heads to roots one (topo) branch at a time.
+
+    This function aims to be used by a graph generator that wishes to minimize
+    the number of parallel branches and their interleaving.
+
+    Example iteration order (numbers show the "true" order in a changelog):
+
+      o  4
+      |
+      o  1
+      |
+      | o  3
+      | |
+      | o  2
+      |/
+      o  0
+
+    Note that the ancestors of merges are understood by the current
+    algorithm to be on the same branch. This means no reordering will
+    occur behind a merge.
+    """
+
+    ### Quick summary of the algorithm
+    #
+    # This function is based around a "retention" principle. We keep revisions
+    # in memory until we are ready to emit a whole branch that immediately
+    # "merges" into an existing one. This reduces the number of parallel
+    # branches with interleaved revisions.
+    #
+    # During iteration revs are split into two groups:
+    # A) revision already emitted
+    # B) revision in "retention". They are stored as different subgroups.
+    #
+    # for each REV, we do the following logic:
+    #
+    #   1) if REV is a parent of (A), we will emit it. If there is a
+    #   retention group ((B) above) that is blocked on REV being
+    #   available, we emit all the revisions out of that retention
+    #   group first.
+    #
+    #   2) else, we'll search for a subgroup in (B) awaiting for REV to be
+    #   available, if such subgroup exist, we add REV to it and the subgroup is
+    #   now awaiting for REV.parents() to be available.
+    #
+    #   3) finally if no such group existed in (B), we create a new subgroup.
+    #
+    #
+    # To bootstrap the algorithm, we emit the tipmost revision (which
+    # puts it in group (A) from above).
+
+    revs.sort(reverse=True)
+
+    # Set of parents of revision that have been emitted. They can be considered
+    # unblocked as the graph generator is already aware of them so there is no
+    # need to delay the revisions that reference them.
+    #
+    # If someone wants to prioritize a branch over the others, pre-filling this
+    # set will force all other branches to wait until this branch is ready to be
+    # emitted.
+    unblocked = set(firstbranch)
+
+    # list of groups waiting to be displayed, each group is defined by:
+    #
+    #   (revs:    lists of revs waiting to be displayed,
+    #    blocked: set of that cannot be displayed before those in 'revs')
+    #
+    # The second value ('blocked') correspond to parents of any revision in the
+    # group ('revs') that is not itself contained in the group. The main idea
+    # of this algorithm is to delay as much as possible the emission of any
+    # revision.  This means waiting for the moment we are about to display
+    # these parents to display the revs in a group.
+    #
+    # This first implementation is smart until it encounters a merge: it will
+    # emit revs as soon as any parent is about to be emitted and can grow an
+    # arbitrary number of revs in 'blocked'. In practice this mean we properly
+    # retains new branches but gives up on any special ordering for ancestors
+    # of merges. The implementation can be improved to handle this better.
+    #
+    # The first subgroup is special. It corresponds to all the revision that
+    # were already emitted. The 'revs' lists is expected to be empty and the
+    # 'blocked' set contains the parents revisions of already emitted revision.
+    #
+    # You could pre-seed the <parents> set of groups[0] to a specific
+    # changesets to select what the first emitted branch should be.
+    groups = [([], unblocked)]
+    pendingheap = []
+    pendingset = set()
+
+    heapq.heapify(pendingheap)
+    heappop = heapq.heappop
+    heappush = heapq.heappush
+    for currentrev in revs:
+        # Heap works with smallest element, we want highest so we invert
+        if currentrev not in pendingset:
+            heappush(pendingheap, -currentrev)
+            pendingset.add(currentrev)
+        # iterates on pending rev until after the current rev have been
+        # processed.
+        rev = None
+        while rev != currentrev:
+            rev = -heappop(pendingheap)
+            pendingset.remove(rev)
+
+            # Seek for a subgroup blocked, waiting for the current revision.
+            matching = [i for i, g in enumerate(groups) if rev in g[1]]
+
+            if matching:
+                # The main idea is to gather together all sets that are blocked
+                # on the same revision.
+                #
+                # Groups are merged when a common blocking ancestor is
+                # observed. For example, given two groups:
+                #
+                # revs [5, 4] waiting for 1
+                # revs [3, 2] waiting for 1
+                #
+                # These two groups will be merged when we process
+                # 1. In theory, we could have merged the groups when
+                # we added 2 to the group it is now in (we could have
+                # noticed the groups were both blocked on 1 then), but
+                # the way it works now makes the algorithm simpler.
+                #
+                # We also always keep the oldest subgroup first. We can
+                # probably improve the behavior by having the longest set
+                # first. That way, graph algorithms could minimise the length
+                # of parallel lines their drawing. This is currently not done.
+                targetidx = matching.pop(0)
+                trevs, tparents = groups[targetidx]
+                for i in matching:
+                    gr = groups[i]
+                    trevs.extend(gr[0])
+                    tparents |= gr[1]
+                # delete all merged subgroups (except the one we kept)
+                # (starting from the last subgroup for performance and
+                # sanity reasons)
+                for i in reversed(matching):
+                    del groups[i]
+            else:
+                # This is a new head. We create a new subgroup for it.
+                targetidx = len(groups)
+                groups.append(([], set([rev])))
+
+            gr = groups[targetidx]
+
+            # We now add the current nodes to this subgroups. This is done
+            # after the subgroup merging because all elements from a subgroup
+            # that relied on this rev must precede it.
+            #
+            # we also update the <parents> set to include the parents of the
+            # new nodes.
+            if rev == currentrev: # only display stuff in rev
+                gr[0].append(rev)
+            gr[1].remove(rev)
+            parents = [p for p in parentsfunc(rev) if p > node.nullrev]
+            gr[1].update(parents)
+            for p in parents:
+                if p not in pendingset:
+                    pendingset.add(p)
+                    heappush(pendingheap, -p)
+
+            # Look for a subgroup to display
+            #
+            # When unblocked is empty (if clause), we were not waiting for any
+            # revisions during the first iteration (if no priority was given) or
+            # if we emitted a whole disconnected set of the graph (reached a
+            # root).  In that case we arbitrarily take the oldest known
+            # subgroup. The heuristic could probably be better.
+            #
+            # Otherwise (elif clause) if the subgroup is blocked on
+            # a revision we just emitted, we can safely emit it as
+            # well.
+            if not unblocked:
+                if len(groups) > 1:  # display other subset
+                    targetidx = 1
+                    gr = groups[1]
+            elif not gr[1] & unblocked:
+                gr = None
+
+            if gr is not None:
+                # update the set of awaited revisions with the one from the
+                # subgroup
+                unblocked |= gr[1]
+                # output all revisions in the subgroup
+                for r in gr[0]:
+                    yield r
+                # delete the subgroup that you just output
+                # unless it is groups[0] in which case you just empty it.
+                if targetidx:
+                    del groups[targetidx]
+                else:
+                    gr[0][:] = []
+    # Check if we have some subgroup waiting for revisions we are not going to
+    # iterate over
+    for g in groups:
+        for r in g[0]:
+            yield r
+
 @predicate('subrepo([pattern])')
 def subrepo(repo, subset, x):
     """Changesets that add, modify or remove the given subrepo.  If no subrepo


More information about the Mercurial-devel mailing list