[PATCH 3 of 3 V2] revsets: add new toposort predicate

Wed May 18 08:16:14 EDT 2016

# HG changeset patch
# User Martijn Pieters <mjpieters at fb.com>
# Date 1463568973 -3600
#      Wed May 18 11:56:13 2016 +0100
# Node ID f0b544ff97aa39bfc7ac75bcf602280b549fbb0d
# Parent  54de7925d82a18ee1048ee09fab7448c937b7a7f
revsets: add new toposort predicate

Sort revisions in reverse revision order but grouped by topographical branches.
Visualised as a graph, instead of:

  o  4
  |
  | o  3
  | |
  | o  2
  | |
  o |  1
  |/
  o  0

revisions on a 'main' branch are emitted before 'side' branches:

  o  4
  |
  o  1
  |
  | o  3
  | |
  | o  2
  |/
  o  0

where what constitutes a 'main' branch is configurable, so the sort could also
result in:

  o  3
  |
  o  2
  |
  | o  4
  | |
  | o  1
  |/
  o  0

This sort was already available as an experimental option in the graphmod
module, from which it is now removed.

This sort is best used with hg log -G:

  $ hg log -G "toposort(all())"

diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
--- a/mercurial/cmdutil.py
+++ b/mercurial/cmdutil.py
@@ -2147,7 +2147,7 @@
     if opts.get('rev'):
         # User-specified revs might be unsorted, but don't sort before
         # _makelogrevset because it might depend on the order of revs
-        if revs.sortorder != ('-rev',):
+        if revs.sortorder not in (('-rev',), ('topo',)):
             revs.sort(reverse=True)
     if expr:
         # Revset matchers often operate faster on revisions in changelog
diff --git a/mercurial/graphmod.py b/mercurial/graphmod.py
--- a/mercurial/graphmod.py
+++ b/mercurial/graphmod.py
@@ -19,8 +19,6 @@
 
 from __future__ import absolute_import
 
-import heapq
-
 from .node import nullrev
 from . import (
     revset,
@@ -37,204 +35,6 @@
 # (so making N negative) and all but the first N characters use that style.
 EDGES = {PARENT: '|', GRANDPARENT: ':', MISSINGPARENT: None}
 
-def groupbranchiter(revs, parentsfunc, firstbranch=()):
-    """Yield revisions from heads to roots one (topo) branch at a time.
-
-    This function aims to be used by a graph generator that wishes to minimize
-    the number of parallel branches and their interleaving.
-
-    Example iteration order (numbers show the "true" order in a changelog):
-
-      o  4
-      |
-      o  1
-      |
-      | o  3
-      | |
-      | o  2
-      |/
-      o  0
-
-    Note that the ancestors of merges are understood by the current
-    algorithm to be on the same branch. This means no reordering will
-    occur behind a merge.
-    """
-
-    ### Quick summary of the algorithm
-    #
-    # This function is based around a "retention" principle. We keep revisions
-    # in memory until we are ready to emit a whole branch that immediately
-    # "merges" into an existing one. This reduces the number of parallel
-    # branches with interleaved revisions.
-    #
-    # During iteration revs are split into two groups:
-    # A) revision already emitted
-    # B) revision in "retention". They are stored as different subgroups.
-    #
-    # for each REV, we do the following logic:
-    #
-    #   1) if REV is a parent of (A), we will emit it. If there is a
-    #   retention group ((B) above) that is blocked on REV being
-    #   available, we emit all the revisions out of that retention
-    #   group first.
-    #
-    #   2) else, we'll search for a subgroup in (B) awaiting for REV to be
-    #   available, if such subgroup exist, we add REV to it and the subgroup is
-    #   now awaiting for REV.parents() to be available.
-    #
-    #   3) finally if no such group existed in (B), we create a new subgroup.
-    #
-    #
-    # To bootstrap the algorithm, we emit the tipmost revision (which
-    # puts it in group (A) from above).
-
-    revs.sort(reverse=True)
-
-    # Set of parents of revision that have been emitted. They can be considered
-    # unblocked as the graph generator is already aware of them so there is no
-    # need to delay the revisions that reference them.
-    #
-    # If someone wants to prioritize a branch over the others, pre-filling this
-    # set will force all other branches to wait until this branch is ready to be
-    # emitted.
-    unblocked = set(firstbranch)
-
-    # list of groups waiting to be displayed, each group is defined by:
-    #
-    #   (revs:    lists of revs waiting to be displayed,
-    #    blocked: set of that cannot be displayed before those in 'revs')
-    #
-    # The second value ('blocked') correspond to parents of any revision in the
-    # group ('revs') that is not itself contained in the group. The main idea
-    # of this algorithm is to delay as much as possible the emission of any
-    # revision.  This means waiting for the moment we are about to display
-    # these parents to display the revs in a group.
-    #
-    # This first implementation is smart until it encounters a merge: it will
-    # emit revs as soon as any parent is about to be emitted and can grow an
-    # arbitrary number of revs in 'blocked'. In practice this mean we properly
-    # retains new branches but gives up on any special ordering for ancestors
-    # of merges. The implementation can be improved to handle this better.
-    #
-    # The first subgroup is special. It corresponds to all the revision that
-    # were already emitted. The 'revs' lists is expected to be empty and the
-    # 'blocked' set contains the parents revisions of already emitted revision.
-    #
-    # You could pre-seed the <parents> set of groups[0] to a specific
-    # changesets to select what the first emitted branch should be.
-    groups = [([], unblocked)]
-    pendingheap = []
-    pendingset = set()
-
-    heapq.heapify(pendingheap)
-    heappop = heapq.heappop
-    heappush = heapq.heappush
-    for currentrev in revs:
-        # Heap works with smallest element, we want highest so we invert
-        if currentrev not in pendingset:
-            heappush(pendingheap, -currentrev)
-            pendingset.add(currentrev)
-        # iterates on pending rev until after the current rev have been
-        # processed.
-        rev = None
-        while rev != currentrev:
-            rev = -heappop(pendingheap)
-            pendingset.remove(rev)
-
-            # Seek for a subgroup blocked, waiting for the current revision.
-            matching = [i for i, g in enumerate(groups) if rev in g[1]]
-
-            if matching:
-                # The main idea is to gather together all sets that are blocked
-                # on the same revision.
-                #
-                # Groups are merged when a common blocking ancestor is
-                # observed. For example, given two groups:
-                #
-                # revs [5, 4] waiting for 1
-                # revs [3, 2] waiting for 1
-                #
-                # These two groups will be merged when we process
-                # 1. In theory, we could have merged the groups when
-                # we added 2 to the group it is now in (we could have
-                # noticed the groups were both blocked on 1 then), but
-                # the way it works now makes the algorithm simpler.
-                #
-                # We also always keep the oldest subgroup first. We can
-                # probably improve the behavior by having the longest set
-                # first. That way, graph algorithms could minimise the length
-                # of parallel lines their drawing. This is currently not done.
-                targetidx = matching.pop(0)
-                trevs, tparents = groups[targetidx]
-                for i in matching:
-                    gr = groups[i]
-                    trevs.extend(gr[0])
-                    tparents |= gr[1]
-                # delete all merged subgroups (except the one we kept)
-                # (starting from the last subgroup for performance and
-                # sanity reasons)
-                for i in reversed(matching):
-                    del groups[i]
-            else:
-                # This is a new head. We create a new subgroup for it.
-                targetidx = len(groups)
-                groups.append(([], set([rev])))
-
-            gr = groups[targetidx]
-
-            # We now add the current nodes to this subgroups. This is done
-            # after the subgroup merging because all elements from a subgroup
-            # that relied on this rev must precede it.
-            #
-            # we also update the <parents> set to include the parents of the
-            # new nodes.
-            if rev == currentrev: # only display stuff in rev
-                gr[0].append(rev)
-            gr[1].remove(rev)
-            parents = [p for p in parentsfunc(rev) if p > nullrev]
-            gr[1].update(parents)
-            for p in parents:
-                if p not in pendingset:
-                    pendingset.add(p)
-                    heappush(pendingheap, -p)
-
-            # Look for a subgroup to display
-            #
-            # When unblocked is empty (if clause), we were not waiting for any
-            # revisions during the first iteration (if no priority was given) or
-            # if we emitted a whole disconnected set of the graph (reached a
-            # root).  In that case we arbitrarily take the oldest known
-            # subgroup. The heuristic could probably be better.
-            #
-            # Otherwise (elif clause) if the subgroup is blocked on
-            # a revision we just emitted, we can safely emit it as
-            # well.
-            if not unblocked:
-                if len(groups) > 1:  # display other subset
-                    targetidx = 1
-                    gr = groups[1]
-            elif not gr[1] & unblocked:
-                gr = None
-
-            if gr is not None:
-                # update the set of awaited revisions with the one from the
-                # subgroup
-                unblocked |= gr[1]
-                # output all revisions in the subgroup
-                for r in gr[0]:
-                    yield r
-                # delete the subgroup that you just output
-                # unless it is groups[0] in which case you just empty it.
-                if targetidx:
-                    del groups[targetidx]
-                else:
-                    gr[0][:] = []
-    # Check if we have some subgroup waiting for revisions we are not going to
-    # iterate over
-    for g in groups:
-        for r in g[0]:
-            yield r
-
 def dagwalker(repo, revs):
     """cset DAG generator yielding (id, CHANGESET, ctx, [parentinfo]) tuples
 
@@ -252,16 +52,6 @@
 
     gpcache = {}
 
-    if repo.ui.configbool('experimental', 'graph-group-branches', False):
-        firstbranch = ()
-        firstbranchrevset = repo.ui.config(
-            'experimental', 'graph-group-branches.firstbranch', '')
-        if firstbranchrevset:
-            firstbranch = repo.revs(firstbranchrevset)
-        parentrevs = repo.changelog.parentrevs
-        revs = groupbranchiter(revs, parentrevs, firstbranch)
-        revs = revset.baseset(revs)
-
     for rev in revs:
         ctx = repo[rev]
         # partition into parents in the rev set and missing parents, then
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -1973,6 +1973,225 @@
 def tagged(repo, subset, x):
     return tag(repo, subset, x)
 
+ at predicate('toposort(set[, firstbranch...])', safe=True)
+def toposort(repo, subset, x):
+    # revs, parentsfunc, firstbranch=()):
+    """Sort set in topological order.
+
+    Revisions are ordered from heads to roots, one topographical branch at a
+    time. Any branches named are displayed first.
+
+    Example iteration order (numbers show the "true" order in a changelog):
+
+    |  o  4
+    |  |
+    |  o  1
+    |  |
+    |  | o  3
+    |  | |
+    |  | o  2
+    |  |/
+    |  o  0
+
+    Ancestors of merges are understood to be on the same branch. This means no
+    reordering will occur behind a merge.
+
+    """
+    # i18n: "toposort" is a keyword
+    l = getargs(x, 1, 2, _("toposort requires one or two arguments"))
+    firstbranch = ''
+    if len(l) == 2:
+        # i18n: "firstbranch" is a keyword
+        firstbranch = getstring(l[1], _("firstbranch spec must be a string"))
+
+    s = l[0]
+    firstbranch = [repo[b].rev() for b in firstbranch.split() if b in repo]
+    revs = getset(repo, subset, s)
+    parentsfunc = repo.changelog.parentrevs
+
+    def groupbranchiter():
+        # Quick summary of the algorithm
+        #
+        # This function is based around a "retention" principle. We keep
+        # revisions in memory until we are ready to emit a whole branch that
+        # immediately "merges" into an existing one. This reduces the number
+        # of parallel branches with interleaved revisions.
+        #
+        # During iteration revs are split into two groups:
+        # A) revision already emitted
+        # B) revision in "retention". They are stored as different subgroups.
+        #
+        # for each REV, we do the following logic:
+        #
+        #   1) if REV is a parent of (A), we will emit it. If there is a
+        #   retention group ((B) above) that is blocked on REV being
+        #   available, we emit all the revisions out of that retention
+        #   group first.
+        #
+        #   2) else, we'll search for a subgroup in (B) awaiting for REV to be
+        #   available, if such subgroup exist, we add REV to it and the
+        #   subgroup is now awaiting for REV.parents() to be available.
+        #
+        #   3) finally if no such group existed in (B), we create a new
+        #   subgroup.
+        #
+        # To bootstrap the algorithm, we emit the tipmost revision (which
+        # puts it in group (A) from above).
+
+        revs.sort(reverse=True)
+
+        # Set of parents of revision that have been emitted. They can be
+        # considered unblocked as the graph generator is already aware of them
+        # so there is no need to delay the revisions that reference them.
+        #
+        # If someone wants to prioritize a branch over the others, pre-filling
+        # this set will force all other branches to wait until this branch is
+        # ready to be emitted.
+        unblocked = set(firstbranch)
+
+        # list of groups waiting to be displayed, each group is defined by:
+        #
+        #   (revs:    lists of revs waiting to be displayed,
+        #    blocked: set of that cannot be displayed before those in 'revs')
+        #
+        # The second value ('blocked') correspond to parents of any revision
+        # in the group ('revs') that is not itself contained in the group. The
+        # main idea of this algorithm is to delay as much as possible the
+        # emission of any revision.  This means waiting for the moment we are
+        # about to display these parents to display the revs in a group.
+        #
+        # This first implementation is smart until it encounters a merge: it
+        # will emit revs as soon as any parent is about to be emitted and can
+        # grow an arbitrary number of revs in 'blocked'. In practice this mean
+        # we properly retains new branches but gives up on any special
+        # ordering for ancestors of merges. The implementation can be improved
+        # to handle this better.
+        #
+        # The first subgroup is special. It corresponds to all the revision
+        # that were already emitted. The 'revs' lists is expected to be empty
+        # and the 'blocked' set contains the parents revisions of already
+        # emitted revision.
+        #
+        # You could pre-seed the <parents> set of groups[0] to a specific
+        # changesets to select what the first emitted branch should be.
+        groups = [([], unblocked)]
+        pendingheap = []
+        pendingset = set()
+
+        heapq.heapify(pendingheap)
+        heappop = heapq.heappop
+        heappush = heapq.heappush
+        for currentrev in revs:
+            # Heap works with smallest element, we want highest so we invert
+            if currentrev not in pendingset:
+                heappush(pendingheap, -currentrev)
+                pendingset.add(currentrev)
+            # iterates on pending rev until after the current rev have been
+            # processed.
+            rev = None
+            while rev != currentrev:
+                rev = -heappop(pendingheap)
+                pendingset.remove(rev)
+
+                # Seek for a subgroup blocked, waiting for the current
+                # revision.
+                matching = [i for i, g in enumerate(groups) if rev in g[1]]
+
+                if matching:
+                    # The main idea is to gather together all sets that are
+                    # blocked on the same revision.
+                    #
+                    # Groups are merged when a common blocking ancestor is
+                    # observed. For example, given two groups:
+                    #
+                    # revs [5, 4] waiting for 1
+                    # revs [3, 2] waiting for 1
+                    #
+                    # These two groups will be merged when we process
+                    # 1. In theory, we could have merged the groups when
+                    # we added 2 to the group it is now in (we could have
+                    # noticed the groups were both blocked on 1 then), but
+                    # the way it works now makes the algorithm simpler.
+                    #
+                    # We also always keep the oldest subgroup first. We can
+                    # probably improve the behavior by having the longest set
+                    # first. That way, graph algorithms could minimise the
+                    # length of parallel lines their drawing. This is
+                    # currently not done.
+                    targetidx = matching.pop(0)
+                    trevs, tparents = groups[targetidx]
+                    for i in matching:
+                        gr = groups[i]
+                        trevs.extend(gr[0])
+                        tparents |= gr[1]
+                    # delete all merged subgroups (except the one we kept)
+                    # (starting from the last subgroup for performance and
+                    # sanity reasons)
+                    for i in reversed(matching):
+                        del groups[i]
+                else:
+                    # This is a new head. We create a new subgroup for it.
+                    targetidx = len(groups)
+                    groups.append(([], set([rev])))
+
+                gr = groups[targetidx]
+
+                # We now add the current nodes to this subgroups. This is done
+                # after the subgroup merging because all elements from a
+                # subgroup that relied on this rev must precede it.
+                #
+                # we also update the <parents> set to include the parents of
+                # the new nodes.
+                if rev == currentrev:  # only display stuff in rev
+                    gr[0].append(rev)
+                gr[1].remove(rev)
+                parents = [p for p in parentsfunc(rev) if p > node.nullrev]
+                gr[1].update(parents)
+                for p in parents:
+                    if p not in pendingset:
+                        pendingset.add(p)
+                        heappush(pendingheap, -p)
+
+                # Look for a subgroup to display
+                #
+                # When unblocked is empty (if clause), we were not waiting for
+                # any revisions during the first iteration (if no priority was
+                # given) or if we emitted a whole disconnected set of the
+                # graph (reached a root).  In that case we arbitrarily take
+                # the oldest known subgroup. The heuristic could probably be
+                # better.
+                #
+                # Otherwise (elif clause) if the subgroup is blocked on
+                # a revision we just emitted, we can safely emit it as
+                # well.
+                if not unblocked:
+                    if len(groups) > 1:  # display other subset
+                        targetidx = 1
+                        gr = groups[1]
+                elif not gr[1] & unblocked:
+                    gr = None
+
+                if gr is not None:
+                    # update the set of awaited revisions with the one from the
+                    # subgroup
+                    unblocked |= gr[1]
+                    # output all revisions in the subgroup
+                    for r in gr[0]:
+                        yield r
+                    # delete the subgroup that you just output
+                    # unless it is groups[0] in which case you just empty it.
+                    if targetidx:
+                        del groups[targetidx]
+                    else:
+                        gr[0][:] = []
+        # Check if we have some subgroup waiting for revisions we are not
+        # going to iterate over
+        for g in groups:
+            for r in g[0]:
+                yield r
+
+    return baseset(groupbranchiter(), sortorder=('topo',))
+
 @predicate('unstable()', safe=True)
 def unstable(repo, subset, x):
     """Non-obsolete changesets with obsolete ancestors.
diff --git a/tests/test-glog-topological.t b/tests/test-glog-topological.t
--- a/tests/test-glog-topological.t
+++ b/tests/test-glog-topological.t
@@ -40,7 +40,7 @@
 
 (display all nodes)
 
-  $ hg --config experimental.graph-group-branches=1 log -G
+  $ hg log -G -r 'toposort(all())'
   o  8
   |
   o  3
@@ -62,7 +62,7 @@
 
 (revset skipping nodes)
 
-  $ hg --config experimental.graph-group-branches=1 log -G --rev 'not (2+6)'
+  $ hg log -G --rev 'toposort(not (2+6))'
   o  8
   |
   o  3
@@ -80,7 +80,7 @@
 
 (begin) from the other branch
 
-  $ hg --config experimental.graph-group-branches=1 --config experimental.graph-group-branches.firstbranch=5 log -G
+  $ hg log -G -r 'toposort(all(), 5)'
   o  7
   |
   o  6
diff --git a/tests/test-revset.t b/tests/test-revset.t
--- a/tests/test-revset.t
+++ b/tests/test-revset.t
@@ -1100,6 +1100,46 @@
   0 b12  m111 u112 111 10800
   2 b111 m11  u12  111 3600
 
+ toposort prioritises graph branches
+
+  $ hg up 2
+  0 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ touch a
+  $ hg addremove
+  adding a
+  $ hg ci -m 't1' -u 'tu' -d '130 0'
+  created new head
+  $ echo 'a' >> a
+  $ hg ci -m 't2' -u 'tu' -d '130 0'
+  $ hg book book1
+  $ hg up 4
+  0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+  (leaving bookmark book1)
+  $ touch a
+  $ hg addremove
+  adding a
+  $ hg ci -m 't3' -u 'tu' -d '130 0'
+
+  $ hg log -r 'toposort(all())'
+  7 b111 t3   tu   130 0
+  4 b111 m112 u111 110 14400
+  3 b112 m111 u11  120 0
+  6 b111 t2   tu   130 0
+  5 b111 t1   tu   130 0
+  2 b111 m11  u12  111 3600
+  1 b11  m12  u111 112 7200
+  0 b12  m111 u112 111 10800
+
+  $ hg log -r 'toposort(all(), book1)'
+  6 b111 t2   tu   130 0
+  5 b111 t1   tu   130 0
+  7 b111 t3   tu   130 0
+  4 b111 m112 u111 110 14400
+  3 b112 m111 u11  120 0
+  2 b111 m11  u12  111 3600
+  1 b11  m12  u111 112 7200
+  0 b12  m111 u112 111 10800
+
 verify that revsets retain sort order keys
 
   $ cat <<EOF > $TESTTMP/echosortorder.py
@@ -1133,6 +1173,9 @@
   $ hg echosortorder 'reverse(sort(all(), "user -branch date rev"))'
   Sort order: ('-user', 'branch', '-date', '-rev')
 
+  $ hg echosortorder 'toposort(all())'
+  Sort order: ('topo',)
+
   $ cd ..
   $ cd repo