[PATCH 1 of 6 V2] obsolete: compute successors set

pierre-yves.david at logilab.fr pierre-yves.david at logilab.fr
Thu Dec 13 08:51:48 CST 2012


# HG changeset patch
# User Pierre-Yves David <pierre-yves.david at logilab.fr>
# Date 1355409523 -3600
# Node ID 4eb351a0771c5cdf48b288d37fe0dc80b4ac38cb
# Parent  e440a2c0d944de45f4f1939bdf0e5fecaee7d5c7
obsolete: compute successors set

Successors set are an important part of obsolescence. It is necessary to detect
and solve divergence situation. This changeset add a core function to compute
them, a debug command to audit them and solid test on the concept.

Check function docstring for details about the concept.

diff --git a/mercurial/commands.py b/mercurial/commands.py
--- a/mercurial/commands.py
+++ b/mercurial/commands.py
@@ -2457,10 +2457,68 @@ def debugsub(ui, repo, rev=None):
     for k, v in sorted(ctx.substate.items()):
         ui.write(('path %s\n') % k)
         ui.write((' source   %s\n') % v[0])
         ui.write((' revision %s\n') % v[1])
 
+ at command('debugsuccessorssets',
+    [],
+    _('[REV]'))
+def debugsuccessorssets(ui, repo, *revs):
+    """show set of successors for revision
+
+    A successors set of changeset A is a consistent group of revisions that
+    succeed A. It contains non-obsolete changesets only.
+
+    In most cases a changeset A has a single successors set containing a single
+    successors (changeset A replaced by A').
+
+    A changeset that is made obsolete with no successors are called "pruned".
+    Such changesets have no successors sets at all.
+
+    A changeset that has been "split" will have a successors set containing
+    more than one successors.
+
+    A changeset that has been rewritten in multiple different ways is called
+    "divergent". Such changesets have multiple successor sets (each of which
+    may also be split, i.e. have multiple successors).
+
+
+    result is displayed as follows::
+
+        <rev1>
+            <successors-1A>
+        <rev2>
+            <successors-2A>
+            <successors-2B1> <successors-2B2> <successors-2B3>
+
+    Here rev2 has two possible (i.e. divergent) successors sets. The first
+    holds one element, whereas the second holds three (i.e. the changeset has
+    been split).
+
+    add --debug if you want full size node id.
+    """
+    # passed to successorssets caching computation from one call to another
+    cache = {}
+    ctx2str = str
+    node2str = short
+    if ui.debug():
+        def ctx2str(ctx):
+            return ctx.hex()
+        node2str = hex
+    for rev in scmutil.revrange(repo, revs):
+        ctx = repo[rev]
+        ui.write('%s\n'% ctx2str(ctx))
+        for succsset in obsolete.successorssets(repo, ctx.node(), cache):
+            if succsset:
+                ui.write('    ')
+                ui.write(node2str(succsset[0]))
+                for node in succsset[1:]:
+                    ui.write(' ')
+                    ui.write(node2str(node))
+            ui.write('\n')
+    pass
+
 @command('debugwalk', walkopts, _('[OPTION]... [FILE]...'))
 def debugwalk(ui, repo, *pats, **opts):
     """show how files match on given patterns"""
     m = scmutil.match(repo[None], pats, opts)
     items = list(repo.walk(m))
diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py
--- a/mercurial/obsolete.py
+++ b/mercurial/obsolete.py
@@ -400,10 +400,188 @@ def allsuccessors(obsstore, nodes, ignor
             for suc in mark[1]:
                 if suc not in seen:
                     seen.add(suc)
                     remaining.add(suc)
 
+def successorssets(repo, initialnode, cache=None):
+    """Return all set of successors of initial nodes
+
+    Successors set of changeset A are a group of revision that succeed A. It
+    succeed A as a consistent whole, each revision being only partial
+    replacement.  Successors set contains non-obsolete changeset only.
+
+    In most cases a changeset A have zero (changeset pruned) or a single
+    successors set that contains a single successor (changeset A replaced by
+    A')
+
+    When changeset is split, it results successors set containing more than
+    a single element. Divergent rewriting will result in multiple successors
+    sets.
+
+    They are returned as a list of tuples containing all valid successors sets.
+
+    Final successors unknown locally are considered plain prune (obsoleted
+    without successors).
+
+    The optional `cache` parameter is a dictionary that may contains
+    precomputed successors sets. It is meant to reuse the computation of
+    previous call to `successorssets` when multiple calls are made at the same
+    time. The cache dictionary is updated in place. The caller is responsible
+    for its live spawn. Code that makes multiple calls to `successorssets`
+    *must* use this cache mechanism or suffer terrible performances."""
+
+    succmarkers = repo.obsstore.successors
+
+    # Stack of nodes we search successors sets for
+    toproceed = [initialnode]
+    # set version of above list for fast loop detection
+    # element added to "toproceed" must be added here
+    stackedset = set(toproceed)
+    if cache is None:
+        cache = {}
+
+    # This while loop is the flattened version of a recursive search for
+    # successors sets
+    #
+    # def successorssets(x):
+    #    successors = directsuccessors(x)
+    #    ss = [[]]
+    #    for succ in directsuccessors(x):
+    #        # product as in itertools cartesian product
+    #        ss = product(ss, successorssets(succ))
+    #    return ss
+    #
+    # But we can not use plain recursive calls here:
+    # - that would blow the python call stack
+    # - obsolescence markers may have cycles, we need to handle them.
+    #
+    # The `toproceed` list act as our call stack. Every node we search
+    # successors set for are stacked there.
+    #
+    # The `stackedset` is set version of this stack used to check if a node is
+    # already stacked. This check is used to detect cycles and prevent infinite
+    # loop.
+    #
+    # successors set of all nodes are stored in the `cache` dictionary.
+    #
+    # After this while loop ends we use the cache to return the successors sets
+    # for the node requested by the caller.
+    while toproceed:
+        # Every iteration tries to compute the successors sets of the topmost
+        # node of the stack: CURRENT.
+        #
+        # There are four possible outcomes:
+        #
+        # 1) We already know the successors sets of CURRENT:
+        #    -> mission accomplished, pop it from the stack.
+        # 2) Node is not obsolete:
+        #    -> the node is its own successors sets. Add it to the cache.
+        # 3) We know successors sets of all direct successors of CURRENT:
+        #    -> We can compute CURRENT successors set and add it to the
+        #       cache.
+        # 4) We do not know successors set of direct successors of CURRENT:
+        #    -> We add those successors to the stack.
+        #
+        current = toproceed[-1]
+        if current in cache:
+            # case (1) We already know the successors sets
+            stackedset.remove(toproceed.pop())
+        elif current not in succmarkers:
+            # case (2) The node is not obsolete.
+            if current in repo:
+                # We have a valid last successors.
+                cache[current] = [(current,)]
+            else:
+                # Final obsolete version is unknown locally.
+                # Do not count that as a valid successors
+                cache[current] = []
+        else:
+            # case (3) and (4)
+            #
+            # We proceed in two phases:
+            #
+            # Phase 1 aims to distinct case (3) from case (4)
+            #
+            #     For each direct successors of CURRENT, we check that its
+            #     successors sets are known. If they are not, we stack the
+            #     unknown node and proceed to the next iteration of the while
+            #     loop. (case 3)
+            #
+            #     During this step, we may detect obsolescence cycles. A node
+            #     with unknown successors sets but already in the call stack.
+            #     In such situation, we arbitrary set the successors sets of the
+            #     node to nothing (node pruned) to break the cycle.
+            #
+            #     If no break were encountered we proceeed to phase 2
+            #
+            # Phase 2 computes successors sets of CURRENT. (case 4)
+            #
+            #    See details in phase 2 itself.
+            #
+            # Note the two levels of iteration in each phase.
+            # - The first one handles obsolescence markers using CURRENT as
+            #   precursor (successors markers of CURRENT).
+            #
+            #   Having multiple entry here means divergence.
+            #
+            # - The second one handles successors defined in each marker.
+            #
+            #   Having none means pruned node, multiple successors means split,
+            #   single successors are standard replacement.
+            #
+            for mark in succmarkers[current]:
+                for suc in mark[1]:
+                    if suc not in cache:
+                        if suc in stackedset:
+                            # cycle breaking
+                            cache[suc] = []
+                        else:
+                            # case (3) If we have not computed successors sets
+                            # of one of those successors we add it to the
+                            # `toproceed` stack and stop all work for this
+                            # iteration.
+                            toproceed.append(suc)
+                            stackedset.add(suc)
+                            break
+                else:
+                    continue
+                break
+            else:
+                # case (4) know all successors sets of all direct successors
+                #
+                #
+                # Successors set contributed by each marker depends of the
+                # successors sets of all its "successors" node.
+                #
+                # Each different marker is a divergence in obsolescence
+                # history. It contributes successors sets dictinct from other
+                # markers.
+                #
+                # Within a marker, a successor may have divergent successors
+                # sets. In such a case, the marker will contribute multiple
+                # divergent successors sets. If multiple successors have
+                # divergents successors sets, a cartesien product is used.
+                succssets = []
+                for mark in succmarkers[current]:
+                    # successors sets contributed by this marker
+                    markss = [[]]
+                    for suc in mark[1]:
+                        productresult = []
+                        for prefix in markss:
+                            for suffix in cache[suc]:
+                                newss = list(prefix)
+                                for part in suffix:
+                                    # do not duplicated entry in successors set
+                                    # first entry wins.
+                                    if part not in newss:
+                                        newss.append(part)
+                                productresult.append(newss)
+                        markss = productresult
+                    succssets.extend(markss)
+                cache[current] = list(set(tuple(r) for r in succssets if r))
+    return cache[initialnode]
+
 def _knownrevs(repo, nodes):
     """yield revision numbers of known nodes passed in parameters
 
     Unknown revisions are silently ignored."""
     torev = repo.changelog.nodemap.get
diff --git a/tests/test-debugcomplete.t b/tests/test-debugcomplete.t
--- a/tests/test-debugcomplete.t
+++ b/tests/test-debugcomplete.t
@@ -94,10 +94,11 @@ Show debug commands if there are no othe
   debugrevlog
   debugrevspec
   debugsetparents
   debugstate
   debugsub
+  debugsuccessorssets
   debugwalk
   debugwireargs
 
 Do not show the alias of a debug command if there are other candidates
 (this should hide rawcommit)
@@ -244,10 +245,11 @@ Show all commands + options
   debugrevlog: changelog, manifest, dump
   debugrevspec: 
   debugsetparents: 
   debugstate: nodates, datesort
   debugsub: rev
+  debugsuccessorssets: 
   debugwalk: include, exclude
   debugwireargs: three, four, five, ssh, remotecmd, insecure
   graft: rev, continue, edit, log, currentdate, currentuser, date, user, tool, dry-run
   grep: print0, all, text, follow, ignore-case, files-with-matches, line-number, rev, user, date, include, exclude
   heads: rev, topo, active, closed, style, template
diff --git a/tests/test-obsolete-divergent.t b/tests/test-obsolete-divergent.t
new file mode 100644
--- /dev/null
+++ b/tests/test-obsolete-divergent.t
@@ -0,0 +1,398 @@
+Test file dedicated to testing the divergent troubles from obsolete changeset.
+
+This is the most complexe troubles from far so we isolate it in a dedicated
+file.
+
+Enable obsolete
+
+  $ cat > obs.py << EOF
+  > import mercurial.obsolete
+  > mercurial.obsolete._enabled = True
+  > EOF
+  $ cat >> $HGRCPATH << EOF
+  > [ui]
+  > logtemplate = {rev}:{node|short} {desc}\n
+  > [extensions]
+  > obs=${TESTTMP}/obs.py
+  > [alias]
+  > debugobsolete = debugobsolete -d '0 0'
+  > [phases]
+  > publish=False
+  > EOF
+
+
+  $ mkcommit() {
+  >    echo "$1" > "$1"
+  >    hg add "$1"
+  >    hg ci -m "$1"
+  > }
+  $ getid() {
+  >    hg id --debug -ir "desc('$1')"
+  > }
+
+setup repo
+
+  $ hg init reference
+  $ cd reference
+  $ mkcommit base
+  $ mkcommit A_0
+  $ hg up 0
+  0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+  $ mkcommit A_1
+  created new head
+  $ hg up 0
+  0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+  $ mkcommit A_2
+  created new head
+  $ hg up 0
+  0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+  $ cd ..
+
+
+  $ newcase() {
+  >    hg clone -u 0 -q reference $1
+  >    cd $1
+  > }
+
+direct divergence
+-----------------
+
+A_1 have two direct and divergent successors A_1 and A_1
+
+  $ newcase direct
+  $ hg debugobsolete `getid A_0` `getid A_1`
+  $ hg debugobsolete `getid A_0` `getid A_2`
+  $ hg log -G --hidden
+  o  3:392fd25390da A_2
+  |
+  | o  2:82623d38b9ba A_1
+  |/
+  | x  1:007dc284c1f8 A_0
+  |/
+  @  0:d20a80d4def3 base
+  
+  $ hg debugsuccessorssets 'all()'
+  d20a80d4def3
+      d20a80d4def3
+  007dc284c1f8
+      82623d38b9ba
+      392fd25390da
+  82623d38b9ba
+      82623d38b9ba
+  392fd25390da
+      392fd25390da
+  $ cd ..
+
+
+indirect divergence with known changeset
+-------------------------------------------
+
+  $ newcase indirect_known
+  $ hg debugobsolete `getid A_0` `getid A_1`
+  $ hg debugobsolete `getid A_0` `getid A_2`
+  $ mkcommit A_3
+  created new head
+  $ hg debugobsolete `getid A_2` `getid A_3`
+  $ hg log -G --hidden
+  @  4:01f36c5a8fda A_3
+  |
+  | x  3:392fd25390da A_2
+  |/
+  | o  2:82623d38b9ba A_1
+  |/
+  | x  1:007dc284c1f8 A_0
+  |/
+  o  0:d20a80d4def3 base
+  
+  $ hg debugsuccessorssets 'all()'
+  d20a80d4def3
+      d20a80d4def3
+  007dc284c1f8
+      01f36c5a8fda
+      82623d38b9ba
+  82623d38b9ba
+      82623d38b9ba
+  392fd25390da
+      01f36c5a8fda
+  01f36c5a8fda
+      01f36c5a8fda
+  $ cd ..
+
+
+indirect divergence with known changeset
+-------------------------------------------
+
+  $ newcase indirect_unknown
+  $ hg debugobsolete `getid A_0` aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+  $ hg debugobsolete aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa `getid A_1`
+  $ hg debugobsolete `getid A_0` `getid A_2`
+  $ hg log -G --hidden
+  o  3:392fd25390da A_2
+  |
+  | o  2:82623d38b9ba A_1
+  |/
+  | x  1:007dc284c1f8 A_0
+  |/
+  @  0:d20a80d4def3 base
+  
+  $ hg debugsuccessorssets 'all()'
+  d20a80d4def3
+      d20a80d4def3
+  007dc284c1f8
+      82623d38b9ba
+      392fd25390da
+  82623d38b9ba
+      82623d38b9ba
+  392fd25390da
+      392fd25390da
+  $ cd ..
+
+do not take unknown node in account if they are final
+-----------------------------------------------------
+
+  $ newcase final-unknown
+  $ hg debugobsolete `getid A_0` `getid A_1`
+  $ hg debugobsolete `getid A_1` `getid A_2`
+  $ hg debugobsolete `getid A_0` bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+  $ hg debugobsolete bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb cccccccccccccccccccccccccccccccccccccccc
+  $ hg debugobsolete `getid A_1` dddddddddddddddddddddddddddddddddddddddd
+
+  $ hg debugsuccessorssets 'desc('A_0')'
+  007dc284c1f8
+      392fd25390da
+
+  $ cd ..
+
+divergence that converge again is not divergence anymore
+-----------------------------------------------------
+
+  $ newcase converged_divergence
+  $ hg debugobsolete `getid A_0` `getid A_1`
+  $ hg debugobsolete `getid A_0` `getid A_2`
+  $ mkcommit A_3
+  created new head
+  $ hg debugobsolete `getid A_1` `getid A_3`
+  $ hg debugobsolete `getid A_2` `getid A_3`
+  $ hg log -G --hidden
+  @  4:01f36c5a8fda A_3
+  |
+  | x  3:392fd25390da A_2
+  |/
+  | x  2:82623d38b9ba A_1
+  |/
+  | x  1:007dc284c1f8 A_0
+  |/
+  o  0:d20a80d4def3 base
+  
+  $ hg debugsuccessorssets 'all()'
+  d20a80d4def3
+      d20a80d4def3
+  007dc284c1f8
+      01f36c5a8fda
+  82623d38b9ba
+      01f36c5a8fda
+  392fd25390da
+      01f36c5a8fda
+  01f36c5a8fda
+      01f36c5a8fda
+  $ cd ..
+
+split is not divergences
+-----------------------------
+
+  $ newcase split
+  $ hg debugobsolete `getid A_0` `getid A_1` `getid A_2`
+  $ hg log -G --hidden
+  o  3:392fd25390da A_2
+  |
+  | o  2:82623d38b9ba A_1
+  |/
+  | x  1:007dc284c1f8 A_0
+  |/
+  @  0:d20a80d4def3 base
+  
+  $ hg debugsuccessorssets 'all()'
+  d20a80d4def3
+      d20a80d4def3
+  007dc284c1f8
+      82623d38b9ba 392fd25390da
+  82623d38b9ba
+      82623d38b9ba
+  392fd25390da
+      392fd25390da
+
+Even when subsequente rewriting happen
+
+  $ mkcommit A_3
+  created new head
+  $ hg debugobsolete `getid A_1` `getid A_3`
+  $ hg up 0
+  0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+  $ mkcommit A_4
+  created new head
+  $ hg debugobsolete `getid A_2` `getid A_4`
+  $ hg up 0
+  0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+  $ mkcommit A_5
+  created new head
+  $ hg debugobsolete `getid A_4` `getid A_5`
+  $ hg log -G --hidden
+  @  6:e442cfc57690 A_5
+  |
+  | x  5:6a411f0d7a0a A_4
+  |/
+  | o  4:01f36c5a8fda A_3
+  |/
+  | x  3:392fd25390da A_2
+  |/
+  | x  2:82623d38b9ba A_1
+  |/
+  | x  1:007dc284c1f8 A_0
+  |/
+  o  0:d20a80d4def3 base
+  
+  $ hg debugsuccessorssets 'all()'
+  d20a80d4def3
+      d20a80d4def3
+  007dc284c1f8
+      01f36c5a8fda e442cfc57690
+  82623d38b9ba
+      01f36c5a8fda
+  392fd25390da
+      e442cfc57690
+  01f36c5a8fda
+      01f36c5a8fda
+  6a411f0d7a0a
+      e442cfc57690
+  e442cfc57690
+      e442cfc57690
+
+Check more complexe obsolescence graft (with divergence)
+
+  $ mkcommit B_0; hg up 0
+  0 files updated, 0 files merged, 2 files removed, 0 files unresolved
+  $ hg debugobsolete `getid B_0` `getid A_2`
+  $ mkcommit A_7; hg up 0
+  created new head
+  0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+  $ mkcommit A_8; hg up 0
+  created new head
+  0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+  $ hg debugobsolete `getid A_5` `getid A_7` `getid A_8`
+  $ mkcommit A_9; hg up 0
+  created new head
+  0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+  $ hg debugobsolete `getid A_5` `getid A_9`
+  $ hg log -G --hidden
+  o  10:bed64f5d2f5a A_9
+  |
+  | o  9:14608b260df8 A_8
+  |/
+  | o  8:7ae126973a96 A_7
+  |/
+  | x  7:3750ebee865d B_0
+  | |
+  | x  6:e442cfc57690 A_5
+  |/
+  | x  5:6a411f0d7a0a A_4
+  |/
+  | o  4:01f36c5a8fda A_3
+  |/
+  | x  3:392fd25390da A_2
+  |/
+  | x  2:82623d38b9ba A_1
+  |/
+  | x  1:007dc284c1f8 A_0
+  |/
+  @  0:d20a80d4def3 base
+  
+  $ hg debugsuccessorssets 'all()'
+  d20a80d4def3
+      d20a80d4def3
+  007dc284c1f8
+      01f36c5a8fda bed64f5d2f5a
+      01f36c5a8fda 7ae126973a96 14608b260df8
+  82623d38b9ba
+      01f36c5a8fda
+  392fd25390da
+      bed64f5d2f5a
+      7ae126973a96 14608b260df8
+  01f36c5a8fda
+      01f36c5a8fda
+  6a411f0d7a0a
+      bed64f5d2f5a
+      7ae126973a96 14608b260df8
+  e442cfc57690
+      bed64f5d2f5a
+      7ae126973a96 14608b260df8
+  3750ebee865d
+      bed64f5d2f5a
+      7ae126973a96 14608b260df8
+  7ae126973a96
+      7ae126973a96
+  14608b260df8
+      14608b260df8
+  bed64f5d2f5a
+      bed64f5d2f5a
+
+fix the divergence
+
+  $ mkcommit A_A; hg up 0
+  created new head
+  0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+  $ hg debugobsolete `getid A_9` `getid A_A`
+  $ hg debugobsolete `getid A_7` `getid A_A`
+  $ hg debugobsolete `getid A_8` `getid A_A`
+  $ hg log -G --hidden
+  o  11:a139f71be9da A_A
+  |
+  | x  10:bed64f5d2f5a A_9
+  |/
+  | x  9:14608b260df8 A_8
+  |/
+  | x  8:7ae126973a96 A_7
+  |/
+  | x  7:3750ebee865d B_0
+  | |
+  | x  6:e442cfc57690 A_5
+  |/
+  | x  5:6a411f0d7a0a A_4
+  |/
+  | o  4:01f36c5a8fda A_3
+  |/
+  | x  3:392fd25390da A_2
+  |/
+  | x  2:82623d38b9ba A_1
+  |/
+  | x  1:007dc284c1f8 A_0
+  |/
+  @  0:d20a80d4def3 base
+  
+  $ hg debugsuccessorssets 'all()'
+  d20a80d4def3
+      d20a80d4def3
+  007dc284c1f8
+      01f36c5a8fda a139f71be9da
+  82623d38b9ba
+      01f36c5a8fda
+  392fd25390da
+      a139f71be9da
+  01f36c5a8fda
+      01f36c5a8fda
+  6a411f0d7a0a
+      a139f71be9da
+  e442cfc57690
+      a139f71be9da
+  3750ebee865d
+      a139f71be9da
+  7ae126973a96
+      a139f71be9da
+  14608b260df8
+      a139f71be9da
+  bed64f5d2f5a
+      a139f71be9da
+  a139f71be9da
+      a139f71be9da
+
+  $ cd ..
+


More information about the Mercurial-devel mailing list