D5991: copies: do copy tracing based on ctx.p[12]copies() if configured

martinvonz (Martin von Zweigbergk) phabricator at mercurial-scm.org
Thu Feb 21 00:28:16 UTC 2019


martinvonz created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  This adds an option to do copy tracing in a changeset-optimized
  way. If the metadata is stored in filelogs, this is obviously going to
  be suboptimal. The point is that it provides a way of transitioning to
  changeset-stored metadata.
  
  Some of the tests behave a little differently, but they all seem
  resonable to me.
  
  The config option may very well be renamed later when it's clearer
  what options we want and how they will behave.
  
  When the test suite is run with --extra-config-opt to use the new copy
  tracing, all tests pass, besides test-copies.t (which fails in the
  same way as you can see in this patch).
  
  `hg debugpathcopies 4.0 4.8` reports 82 copies. With this option
  enabled, the only difference is this:
  
    -mercurial/pure/bdiff.py -> mercurial/cffi/bdiff.py
    +setup_bdiff_cffi.py -> mercurial/cffi/bdiff.py
  
  I believe that happened because it was renamed in different ways on
  different sides of a merge and the new algorithm arbitrarily prefers
  copies that happened on p1. The runtime is about 0.85 seconds with the
  old copy tracing and 5.7 seconds with the new copy tracing. That's
  kind of slow, but actually better than I had expected.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5991

AFFECTED FILES
  mercurial/configitems.py
  mercurial/copies.py
  tests/test-copies.t

CHANGE DETAILS

diff --git a/tests/test-copies.t b/tests/test-copies.t
--- a/tests/test-copies.t
+++ b/tests/test-copies.t
@@ -1,9 +1,17 @@
+#testcases filelog compatibility
 
   $ cat >> $HGRCPATH << EOF
   > [alias]
   > l = log -G -T '{rev} {desc}\n{files}\n'
   > EOF
 
+#if compatibility
+  $ cat >> $HGRCPATH << EOF
+  > [experimental]
+  > copies.read-from = compatibility
+  > EOF
+#endif
+
   $ REPONUM=0
   $ newrepo() {
   >     cd $TESTTMP
@@ -338,7 +346,7 @@
   $ hg debugpathcopies 1 2
   x -> z
   $ hg debugpathcopies 0 2
-  x -> z
+  x -> z (filelog !)
 
 Copy file that exists on both sides of the merge, different content
   $ newrepo
@@ -476,7 +484,8 @@
   $ hg debugpathcopies 1 4
   $ hg debugpathcopies 2 4
   $ hg debugpathcopies 0 4
-  x -> z
+  x -> z (filelog !)
+  y -> z (compatibility !)
   $ hg debugpathcopies 1 5
   $ hg debugpathcopies 2 5
   $ hg debugpathcopies 0 5
diff --git a/mercurial/copies.py b/mercurial/copies.py
--- a/mercurial/copies.py
+++ b/mercurial/copies.py
@@ -166,6 +166,10 @@
     # files might have to be traced back to the fctx parent of the last
     # one-side-only changeset, but not further back than that
     repo = a._repo
+
+    if repo.ui.config('experimental', 'copies.read-from') == 'compatibility':
+        return _changesetforwardcopies(a, b, match)
+
     debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
     dbg = repo.ui.debug
     if debug:
@@ -216,6 +220,76 @@
                 % (util.timer() - start))
     return cm
 
+def _changesetforwardcopies(a, b, match):
+    if a.rev() == node.nullrev:
+        return {}
+
+    repo = a.repo()
+    children = {}
+    cl = repo.changelog
+    missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
+    for r in missingrevs:
+        for p in cl.parentrevs(r):
+            if p == node.nullrev:
+                continue
+            if p not in children:
+                children[p] = [r]
+            else:
+                children[p].append(r)
+
+    roots = set(children) - set(missingrevs)
+    # 'work' contains 3-tuples of a (revision number, parent number, copies).
+    # The parent number is only used for knowing which parent the copies dict
+    # came from.
+    work = [(r, 1, {}) for r in roots]
+    heapq.heapify(work)
+    while work:
+        r, i1, copies1 = heapq.heappop(work)
+        if work and work[0][0] == r:
+            # We are tracing copies from both parents
+            r, i2, copies2 = heapq.heappop(work)
+            copies = {}
+            ctx = repo[r]
+            p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
+            allcopies = set(copies1) | set(copies2)
+            # TODO: perhaps this filtering should be done as long as ctx
+            # is merge, whether or not we're tracing from both parent.
+            for dst in allcopies:
+                if not match(dst):
+                    continue
+                if dst not in copies2:
+                    # Copied on p1 side: mark as copy from p1 side if it didn't
+                    # already exist on p2 side
+                    if dst not in p2man:
+                        copies[dst] = copies1[dst]
+                elif dst not in copies1:
+                    # Copied on p2 side: mark as copy from p2 side if it didn't
+                    # already exist on p1 side
+                    if dst not in p1man:
+                        copies[dst] = copies2[dst]
+                else:
+                    # Copied on both sides: mark as copy from p1 side
+                    copies[dst] = copies1[dst]
+        else:
+            copies = copies1
+        if r == b.rev():
+            return copies
+        for c in children[r]:
+            childctx = repo[c]
+            if r == childctx.p1().rev():
+                parent = 1
+                childcopies = childctx.p1copies()
+            else:
+                assert r == childctx.p2().rev()
+                parent = 2
+                childcopies = childctx.p2copies()
+            if not match.always():
+                childcopies = {dst: src for dst, src in childcopies.items()
+                               if match(dst)}
+            childcopies = _chain(a, childctx, copies, childcopies)
+            heapq.heappush(work, (c, parent, childcopies))
+    assert False
+
 def _forwardcopies(a, b, match=None):
     """find {dst at b: src at a} copy mapping where a is an ancestor of b"""
 
diff --git a/mercurial/configitems.py b/mercurial/configitems.py
--- a/mercurial/configitems.py
+++ b/mercurial/configitems.py
@@ -482,6 +482,9 @@
 coreconfigitem('experimental', 'copytrace.sourcecommitlimit',
     default=100,
 )
+coreconfigitem('experimental', 'copies.read-from',
+    default="filelog-only",
+)
 coreconfigitem('experimental', 'crecordtest',
     default=None,
 )



To: martinvonz, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list