[PATCH 3 of 3 V2] revsets: add new toposort predicate
Martijn Pieters
mj at zopatista.com
Wed May 18 08:16:14 EDT 2016
# HG changeset patch
# User Martijn Pieters <mjpieters at fb.com>
# Date 1463568973 3600
# Wed May 18 11:56:13 2016 +0100
# Node ID f0b544ff97aa39bfc7ac75bcf602280b549fbb0d
# Parent 54de7925d82a18ee1048ee09fab7448c937b7a7f
revsets: add new toposort predicate
Sort revisions in reverse revision order but grouped by topographical branches.
Visualised as a graph, instead of:
o 4

 o 3
 
 o 2
 
o  1
/
o 0
revisions on a 'main' branch are emitted before 'side' branches:
o 4

o 1

 o 3
 
 o 2
/
o 0
where what constitutes a 'main' branch is configurable, so the sort could also
result in:
o 3

o 2

 o 4
 
 o 1
/
o 0
This sort was already available as an experimental option in the graphmod
module, from which it is now removed.
This sort is best used with hg log G:
$ hg log G "toposort(all())"
diff git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
 a/mercurial/cmdutil.py
+++ b/mercurial/cmdutil.py
@@ 2147,7 +2147,7 @@
if opts.get('rev'):
# Userspecified revs might be unsorted, but don't sort before
# _makelogrevset because it might depend on the order of revs
 if revs.sortorder != ('rev',):
+ if revs.sortorder not in (('rev',), ('topo',)):
revs.sort(reverse=True)
if expr:
# Revset matchers often operate faster on revisions in changelog
diff git a/mercurial/graphmod.py b/mercurial/graphmod.py
 a/mercurial/graphmod.py
+++ b/mercurial/graphmod.py
@@ 19,8 +19,6 @@
from __future__ import absolute_import
import heapq

from .node import nullrev
from . import (
revset,
@@ 37,204 +35,6 @@
# (so making N negative) and all but the first N characters use that style.
EDGES = {PARENT: '', GRANDPARENT: ':', MISSINGPARENT: None}
def groupbranchiter(revs, parentsfunc, firstbranch=()):
 """Yield revisions from heads to roots one (topo) branch at a time.

 This function aims to be used by a graph generator that wishes to minimize
 the number of parallel branches and their interleaving.

 Example iteration order (numbers show the "true" order in a changelog):

 o 4
 
 o 1
 
  o 3
  
  o 2
 /
 o 0

 Note that the ancestors of merges are understood by the current
 algorithm to be on the same branch. This means no reordering will
 occur behind a merge.
 """

 ### Quick summary of the algorithm
 #
 # This function is based around a "retention" principle. We keep revisions
 # in memory until we are ready to emit a whole branch that immediately
 # "merges" into an existing one. This reduces the number of parallel
 # branches with interleaved revisions.
 #
 # During iteration revs are split into two groups:
 # A) revision already emitted
 # B) revision in "retention". They are stored as different subgroups.
 #
 # for each REV, we do the following logic:
 #
 # 1) if REV is a parent of (A), we will emit it. If there is a
 # retention group ((B) above) that is blocked on REV being
 # available, we emit all the revisions out of that retention
 # group first.
 #
 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
 # available, if such subgroup exist, we add REV to it and the subgroup is
 # now awaiting for REV.parents() to be available.
 #
 # 3) finally if no such group existed in (B), we create a new subgroup.
 #
 #
 # To bootstrap the algorithm, we emit the tipmost revision (which
 # puts it in group (A) from above).

 revs.sort(reverse=True)

 # Set of parents of revision that have been emitted. They can be considered
 # unblocked as the graph generator is already aware of them so there is no
 # need to delay the revisions that reference them.
 #
 # If someone wants to prioritize a branch over the others, prefilling this
 # set will force all other branches to wait until this branch is ready to be
 # emitted.
 unblocked = set(firstbranch)

 # list of groups waiting to be displayed, each group is defined by:
 #
 # (revs: lists of revs waiting to be displayed,
 # blocked: set of that cannot be displayed before those in 'revs')
 #
 # The second value ('blocked') correspond to parents of any revision in the
 # group ('revs') that is not itself contained in the group. The main idea
 # of this algorithm is to delay as much as possible the emission of any
 # revision. This means waiting for the moment we are about to display
 # these parents to display the revs in a group.
 #
 # This first implementation is smart until it encounters a merge: it will
 # emit revs as soon as any parent is about to be emitted and can grow an
 # arbitrary number of revs in 'blocked'. In practice this mean we properly
 # retains new branches but gives up on any special ordering for ancestors
 # of merges. The implementation can be improved to handle this better.
 #
 # The first subgroup is special. It corresponds to all the revision that
 # were already emitted. The 'revs' lists is expected to be empty and the
 # 'blocked' set contains the parents revisions of already emitted revision.
 #
 # You could preseed the <parents> set of groups[0] to a specific
 # changesets to select what the first emitted branch should be.
 groups = [([], unblocked)]
 pendingheap = []
 pendingset = set()

 heapq.heapify(pendingheap)
 heappop = heapq.heappop
 heappush = heapq.heappush
 for currentrev in revs:
 # Heap works with smallest element, we want highest so we invert
 if currentrev not in pendingset:
 heappush(pendingheap, currentrev)
 pendingset.add(currentrev)
 # iterates on pending rev until after the current rev have been
 # processed.
 rev = None
 while rev != currentrev:
 rev = heappop(pendingheap)
 pendingset.remove(rev)

 # Seek for a subgroup blocked, waiting for the current revision.
 matching = [i for i, g in enumerate(groups) if rev in g[1]]

 if matching:
 # The main idea is to gather together all sets that are blocked
 # on the same revision.
 #
 # Groups are merged when a common blocking ancestor is
 # observed. For example, given two groups:
 #
 # revs [5, 4] waiting for 1
 # revs [3, 2] waiting for 1
 #
 # These two groups will be merged when we process
 # 1. In theory, we could have merged the groups when
 # we added 2 to the group it is now in (we could have
 # noticed the groups were both blocked on 1 then), but
 # the way it works now makes the algorithm simpler.
 #
 # We also always keep the oldest subgroup first. We can
 # probably improve the behavior by having the longest set
 # first. That way, graph algorithms could minimise the length
 # of parallel lines their drawing. This is currently not done.
 targetidx = matching.pop(0)
 trevs, tparents = groups[targetidx]
 for i in matching:
 gr = groups[i]
 trevs.extend(gr[0])
 tparents = gr[1]
 # delete all merged subgroups (except the one we kept)
 # (starting from the last subgroup for performance and
 # sanity reasons)
 for i in reversed(matching):
 del groups[i]
 else:
 # This is a new head. We create a new subgroup for it.
 targetidx = len(groups)
 groups.append(([], set([rev])))

 gr = groups[targetidx]

 # We now add the current nodes to this subgroups. This is done
 # after the subgroup merging because all elements from a subgroup
 # that relied on this rev must precede it.
 #
 # we also update the <parents> set to include the parents of the
 # new nodes.
 if rev == currentrev: # only display stuff in rev
 gr[0].append(rev)
 gr[1].remove(rev)
 parents = [p for p in parentsfunc(rev) if p > nullrev]
 gr[1].update(parents)
 for p in parents:
 if p not in pendingset:
 pendingset.add(p)
 heappush(pendingheap, p)

 # Look for a subgroup to display
 #
 # When unblocked is empty (if clause), we were not waiting for any
 # revisions during the first iteration (if no priority was given) or
 # if we emitted a whole disconnected set of the graph (reached a
 # root). In that case we arbitrarily take the oldest known
 # subgroup. The heuristic could probably be better.
 #
 # Otherwise (elif clause) if the subgroup is blocked on
 # a revision we just emitted, we can safely emit it as
 # well.
 if not unblocked:
 if len(groups) > 1: # display other subset
 targetidx = 1
 gr = groups[1]
 elif not gr[1] & unblocked:
 gr = None

 if gr is not None:
 # update the set of awaited revisions with the one from the
 # subgroup
 unblocked = gr[1]
 # output all revisions in the subgroup
 for r in gr[0]:
 yield r
 # delete the subgroup that you just output
 # unless it is groups[0] in which case you just empty it.
 if targetidx:
 del groups[targetidx]
 else:
 gr[0][:] = []
 # Check if we have some subgroup waiting for revisions we are not going to
 # iterate over
 for g in groups:
 for r in g[0]:
 yield r

def dagwalker(repo, revs):
"""cset DAG generator yielding (id, CHANGESET, ctx, [parentinfo]) tuples
@@ 252,16 +52,6 @@
gpcache = {}
 if repo.ui.configbool('experimental', 'graphgroupbranches', False):
 firstbranch = ()
 firstbranchrevset = repo.ui.config(
 'experimental', 'graphgroupbranches.firstbranch', '')
 if firstbranchrevset:
 firstbranch = repo.revs(firstbranchrevset)
 parentrevs = repo.changelog.parentrevs
 revs = groupbranchiter(revs, parentrevs, firstbranch)
 revs = revset.baseset(revs)

for rev in revs:
ctx = repo[rev]
# partition into parents in the rev set and missing parents, then
diff git a/mercurial/revset.py b/mercurial/revset.py
 a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ 1973,6 +1973,225 @@
def tagged(repo, subset, x):
return tag(repo, subset, x)
+ at predicate('toposort(set[, firstbranch...])', safe=True)
+def toposort(repo, subset, x):
+ # revs, parentsfunc, firstbranch=()):
+ """Sort set in topological order.
+
+ Revisions are ordered from heads to roots, one topographical branch at a
+ time. Any branches named are displayed first.
+
+ Example iteration order (numbers show the "true" order in a changelog):
+
+  o 4
+  
+  o 1
+  
+   o 3
+   
+   o 2
+  /
+  o 0
+
+ Ancestors of merges are understood to be on the same branch. This means no
+ reordering will occur behind a merge.
+
+ """
+ # i18n: "toposort" is a keyword
+ l = getargs(x, 1, 2, _("toposort requires one or two arguments"))
+ firstbranch = ''
+ if len(l) == 2:
+ # i18n: "firstbranch" is a keyword
+ firstbranch = getstring(l[1], _("firstbranch spec must be a string"))
+
+ s = l[0]
+ firstbranch = [repo[b].rev() for b in firstbranch.split() if b in repo]
+ revs = getset(repo, subset, s)
+ parentsfunc = repo.changelog.parentrevs
+
+ def groupbranchiter():
+ # Quick summary of the algorithm
+ #
+ # This function is based around a "retention" principle. We keep
+ # revisions in memory until we are ready to emit a whole branch that
+ # immediately "merges" into an existing one. This reduces the number
+ # of parallel branches with interleaved revisions.
+ #
+ # During iteration revs are split into two groups:
+ # A) revision already emitted
+ # B) revision in "retention". They are stored as different subgroups.
+ #
+ # for each REV, we do the following logic:
+ #
+ # 1) if REV is a parent of (A), we will emit it. If there is a
+ # retention group ((B) above) that is blocked on REV being
+ # available, we emit all the revisions out of that retention
+ # group first.
+ #
+ # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
+ # available, if such subgroup exist, we add REV to it and the
+ # subgroup is now awaiting for REV.parents() to be available.
+ #
+ # 3) finally if no such group existed in (B), we create a new
+ # subgroup.
+ #
+ # To bootstrap the algorithm, we emit the tipmost revision (which
+ # puts it in group (A) from above).
+
+ revs.sort(reverse=True)
+
+ # Set of parents of revision that have been emitted. They can be
+ # considered unblocked as the graph generator is already aware of them
+ # so there is no need to delay the revisions that reference them.
+ #
+ # If someone wants to prioritize a branch over the others, prefilling
+ # this set will force all other branches to wait until this branch is
+ # ready to be emitted.
+ unblocked = set(firstbranch)
+
+ # list of groups waiting to be displayed, each group is defined by:
+ #
+ # (revs: lists of revs waiting to be displayed,
+ # blocked: set of that cannot be displayed before those in 'revs')
+ #
+ # The second value ('blocked') correspond to parents of any revision
+ # in the group ('revs') that is not itself contained in the group. The
+ # main idea of this algorithm is to delay as much as possible the
+ # emission of any revision. This means waiting for the moment we are
+ # about to display these parents to display the revs in a group.
+ #
+ # This first implementation is smart until it encounters a merge: it
+ # will emit revs as soon as any parent is about to be emitted and can
+ # grow an arbitrary number of revs in 'blocked'. In practice this mean
+ # we properly retains new branches but gives up on any special
+ # ordering for ancestors of merges. The implementation can be improved
+ # to handle this better.
+ #
+ # The first subgroup is special. It corresponds to all the revision
+ # that were already emitted. The 'revs' lists is expected to be empty
+ # and the 'blocked' set contains the parents revisions of already
+ # emitted revision.
+ #
+ # You could preseed the <parents> set of groups[0] to a specific
+ # changesets to select what the first emitted branch should be.
+ groups = [([], unblocked)]
+ pendingheap = []
+ pendingset = set()
+
+ heapq.heapify(pendingheap)
+ heappop = heapq.heappop
+ heappush = heapq.heappush
+ for currentrev in revs:
+ # Heap works with smallest element, we want highest so we invert
+ if currentrev not in pendingset:
+ heappush(pendingheap, currentrev)
+ pendingset.add(currentrev)
+ # iterates on pending rev until after the current rev have been
+ # processed.
+ rev = None
+ while rev != currentrev:
+ rev = heappop(pendingheap)
+ pendingset.remove(rev)
+
+ # Seek for a subgroup blocked, waiting for the current
+ # revision.
+ matching = [i for i, g in enumerate(groups) if rev in g[1]]
+
+ if matching:
+ # The main idea is to gather together all sets that are
+ # blocked on the same revision.
+ #
+ # Groups are merged when a common blocking ancestor is
+ # observed. For example, given two groups:
+ #
+ # revs [5, 4] waiting for 1
+ # revs [3, 2] waiting for 1
+ #
+ # These two groups will be merged when we process
+ # 1. In theory, we could have merged the groups when
+ # we added 2 to the group it is now in (we could have
+ # noticed the groups were both blocked on 1 then), but
+ # the way it works now makes the algorithm simpler.
+ #
+ # We also always keep the oldest subgroup first. We can
+ # probably improve the behavior by having the longest set
+ # first. That way, graph algorithms could minimise the
+ # length of parallel lines their drawing. This is
+ # currently not done.
+ targetidx = matching.pop(0)
+ trevs, tparents = groups[targetidx]
+ for i in matching:
+ gr = groups[i]
+ trevs.extend(gr[0])
+ tparents = gr[1]
+ # delete all merged subgroups (except the one we kept)
+ # (starting from the last subgroup for performance and
+ # sanity reasons)
+ for i in reversed(matching):
+ del groups[i]
+ else:
+ # This is a new head. We create a new subgroup for it.
+ targetidx = len(groups)
+ groups.append(([], set([rev])))
+
+ gr = groups[targetidx]
+
+ # We now add the current nodes to this subgroups. This is done
+ # after the subgroup merging because all elements from a
+ # subgroup that relied on this rev must precede it.
+ #
+ # we also update the <parents> set to include the parents of
+ # the new nodes.
+ if rev == currentrev: # only display stuff in rev
+ gr[0].append(rev)
+ gr[1].remove(rev)
+ parents = [p for p in parentsfunc(rev) if p > node.nullrev]
+ gr[1].update(parents)
+ for p in parents:
+ if p not in pendingset:
+ pendingset.add(p)
+ heappush(pendingheap, p)
+
+ # Look for a subgroup to display
+ #
+ # When unblocked is empty (if clause), we were not waiting for
+ # any revisions during the first iteration (if no priority was
+ # given) or if we emitted a whole disconnected set of the
+ # graph (reached a root). In that case we arbitrarily take
+ # the oldest known subgroup. The heuristic could probably be
+ # better.
+ #
+ # Otherwise (elif clause) if the subgroup is blocked on
+ # a revision we just emitted, we can safely emit it as
+ # well.
+ if not unblocked:
+ if len(groups) > 1: # display other subset
+ targetidx = 1
+ gr = groups[1]
+ elif not gr[1] & unblocked:
+ gr = None
+
+ if gr is not None:
+ # update the set of awaited revisions with the one from the
+ # subgroup
+ unblocked = gr[1]
+ # output all revisions in the subgroup
+ for r in gr[0]:
+ yield r
+ # delete the subgroup that you just output
+ # unless it is groups[0] in which case you just empty it.
+ if targetidx:
+ del groups[targetidx]
+ else:
+ gr[0][:] = []
+ # Check if we have some subgroup waiting for revisions we are not
+ # going to iterate over
+ for g in groups:
+ for r in g[0]:
+ yield r
+
+ return baseset(groupbranchiter(), sortorder=('topo',))
+
@predicate('unstable()', safe=True)
def unstable(repo, subset, x):
"""Nonobsolete changesets with obsolete ancestors.
diff git a/tests/testglogtopological.t b/tests/testglogtopological.t
 a/tests/testglogtopological.t
+++ b/tests/testglogtopological.t
@@ 40,7 +40,7 @@
(display all nodes)
 $ hg config experimental.graphgroupbranches=1 log G
+ $ hg log G r 'toposort(all())'
o 8

o 3
@@ 62,7 +62,7 @@
(revset skipping nodes)
 $ hg config experimental.graphgroupbranches=1 log G rev 'not (2+6)'
+ $ hg log G rev 'toposort(not (2+6))'
o 8

o 3
@@ 80,7 +80,7 @@
(begin) from the other branch
 $ hg config experimental.graphgroupbranches=1 config experimental.graphgroupbranches.firstbranch=5 log G
+ $ hg log G r 'toposort(all(), 5)'
o 7

o 6
diff git a/tests/testrevset.t b/tests/testrevset.t
 a/tests/testrevset.t
+++ b/tests/testrevset.t
@@ 1100,6 +1100,46 @@
0 b12 m111 u112 111 10800
2 b111 m11 u12 111 3600
+ toposort prioritises graph branches
+
+ $ hg up 2
+ 0 files updated, 0 files merged, 0 files removed, 0 files unresolved
+ $ touch a
+ $ hg addremove
+ adding a
+ $ hg ci m 't1' u 'tu' d '130 0'
+ created new head
+ $ echo 'a' >> a
+ $ hg ci m 't2' u 'tu' d '130 0'
+ $ hg book book1
+ $ hg up 4
+ 0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+ (leaving bookmark book1)
+ $ touch a
+ $ hg addremove
+ adding a
+ $ hg ci m 't3' u 'tu' d '130 0'
+
+ $ hg log r 'toposort(all())'
+ 7 b111 t3 tu 130 0
+ 4 b111 m112 u111 110 14400
+ 3 b112 m111 u11 120 0
+ 6 b111 t2 tu 130 0
+ 5 b111 t1 tu 130 0
+ 2 b111 m11 u12 111 3600
+ 1 b11 m12 u111 112 7200
+ 0 b12 m111 u112 111 10800
+
+ $ hg log r 'toposort(all(), book1)'
+ 6 b111 t2 tu 130 0
+ 5 b111 t1 tu 130 0
+ 7 b111 t3 tu 130 0
+ 4 b111 m112 u111 110 14400
+ 3 b112 m111 u11 120 0
+ 2 b111 m11 u12 111 3600
+ 1 b11 m12 u111 112 7200
+ 0 b12 m111 u112 111 10800
+
verify that revsets retain sort order keys
$ cat <<EOF > $TESTTMP/echosortorder.py
@@ 1133,6 +1173,9 @@
$ hg echosortorder 'reverse(sort(all(), "user branch date rev"))'
Sort order: ('user', 'branch', 'date', 'rev')
+ $ hg echosortorder 'toposort(all())'
+ Sort order: ('topo',)
+
$ cd ..
$ cd repo
More information about the Mercurialdevel
mailing list