[PATCH 2 of 2 stable] diff: search beyond ancestor when detecting renames

Mads Kiilerich mads at kiilerich.com
Mon Nov 11 21:08:18 CST 2013


# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1384225676 -3600
#      Tue Nov 12 04:07:56 2013 +0100
# Branch stable
# Node ID 6bda09b4adc115285149d36fc74824370478a15a
# Parent  82165da0edded3801c421273d49ebccbf382878a
diff: search beyond ancestor when detecting renames

This removes an optimization that was introduced in 91eb4512edd0 but was too
aggressive - as indicated by how it changed test-mq-merge.t .

We are walking filelogs to find copy sources and we can thus not be sure to hit
the base revision and find the renamed file there - it could also be in the
first ancestor of the base ... in the filelog.

But again, we are walking the filelog and can thus not easily know when we hit
the first ancestor of the base revision and which filename to look for there.
The lower bound for how far we have to go has to be found from the lowest
changelog revision that is an ancestor of only one of the compared revisions.
Any filelog ancestor with a revision number lower than that revision will be
the ancestor of both compared revisions, and there is thus no reason to go
further back than that.

The profile of copies._tracefile is changed to match its new mode of
operation. Repo objects also has to be passed around in new places ... but that
seems to be necessary.

diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -169,7 +169,7 @@ def perfpathcopies(ui, repo, rev1, rev2)
     ctx1 = scmutil.revsingle(repo, rev1, rev1)
     ctx2 = scmutil.revsingle(repo, rev2, rev2)
     def d():
-        copies.pathcopies(ctx1, ctx2)
+        copies.pathcopies(repo, ctx1, ctx2)
     timer(d)
 
 @command('perfmanifest', [], 'REV')
diff --git a/hgext/histedit.py b/hgext/histedit.py
--- a/hgext/histedit.py
+++ b/hgext/histedit.py
@@ -247,7 +247,7 @@ def collapse(repo, first, last, commitop
         files.update(ctx.files())
 
     # Recompute copies (avoid recording a -> b -> a)
-    copied = copies.pathcopies(base, last)
+    copied = copies.pathcopies(repo, base, last)
 
     # prune files which were reverted by the updates
     def samefile(f):
diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
--- a/mercurial/cmdutil.py
+++ b/mercurial/cmdutil.py
@@ -1636,7 +1636,8 @@ def forget(ui, repo, match, prefix, expl
 
 def duplicatecopies(repo, rev, fromrev):
     '''reproduce copies from fromrev to rev in the dirstate'''
-    for dst, src in copies.pathcopies(repo[fromrev], repo[rev]).iteritems():
+    for dst, src in copies.pathcopies(repo, repo[fromrev], repo[rev]
+                                      ).iteritems():
         # copies.pathcopies returns backward renames, so dst might not
         # actually be in the dirstate
         if repo.dirstate[dst] in "nma":
@@ -1721,7 +1722,7 @@ def amend(ui, repo, commitfunc, old, ext
                 user = ctx.user()
                 date = ctx.date()
                 # Recompute copies (avoid recording a -> b -> a)
-                copied = copies.pathcopies(base, ctx)
+                copied = copies.pathcopies(repo, base, ctx)
 
                 # Prune files which were reverted by the updates: if old
                 # introduced file X and our intermediate commit, node,
@@ -2102,7 +2103,7 @@ def revert(ui, repo, ctx, parents, *pats
                 checkout(f)
                 normal(f)
 
-            copied = copies.pathcopies(repo[parent], ctx)
+            copied = copies.pathcopies(repo, repo[parent], ctx)
 
             for f in add[0] + undelete[0] + revert[0]:
                 if f in copied:
diff --git a/mercurial/commands.py b/mercurial/commands.py
--- a/mercurial/commands.py
+++ b/mercurial/commands.py
@@ -5375,7 +5375,7 @@ def status(ui, repo, *pats, **opts):
     changestates = zip(states, 'MAR!?IC', stat)
 
     if (opts.get('all') or opts.get('copies')) and not opts.get('no_status'):
-        copy = copies.pathcopies(repo[node1], repo[node2])
+        copy = copies.pathcopies(repo, repo[node1], repo[node2])
 
     fm = ui.formatter('status', opts)
     fmt = '%s' + end
diff --git a/mercurial/copies.py b/mercurial/copies.py
--- a/mercurial/copies.py
+++ b/mercurial/copies.py
@@ -98,15 +98,14 @@ def _chain(src, dst, a, b):
 
     return t
 
-def _tracefile(fctx, actx):
-    '''return file context that is the ancestor of fctx present in actx'''
-    stop = actx.rev()
-    am = actx.manifest()
+def _tracefile(fctx, am, limit=-1):
+    '''return file context that is the ancestor of fctx present in ancestor
+    manifest am, stopping after the first ancestor lower than limit'''
 
     for f in fctx.ancestors():
         if am.get(f.path(), None) == f.filenode():
             return f
-        if f.rev() < stop:
+        if f.rev() < limit:
             return None
 
 def _dirstatecopies(d):
@@ -117,7 +116,7 @@ def _dirstatecopies(d):
             del c[k]
     return c
 
-def _forwardcopies(a, b):
+def _forwardcopies(repo, a, b):
     '''find {dst at b: src at a} copy mapping where a is an ancestor of b'''
 
     # check for working copy
@@ -129,6 +128,13 @@ def _forwardcopies(a, b):
             # short-circuit to avoid issues with merge states
             return _dirstatecopies(w)
 
+    # files might have to be traced back to the fctx parent of the last
+    # one-side-only changeset, but not further back than that
+    limit = _findlimit(repo, a.rev(), b.rev())
+    if limit is None:
+        limit = -1
+    am = a.manifest()
+
     # find where new files came from
     # we currently don't try to find where old files went, too expensive
     # this means we can miss a case like 'hg rm b; hg cp a b'
@@ -137,7 +143,7 @@ def _forwardcopies(a, b):
     missing.difference_update(a.manifest().iterkeys())
 
     for f in missing:
-        ofctx = _tracefile(b[f], a)
+        ofctx = _tracefile(b[f], am, limit)
         if ofctx:
             cm[f] = ofctx.path()
 
@@ -147,11 +153,11 @@ def _forwardcopies(a, b):
 
     return cm
 
-def _backwardrenames(a, b):
+def _backwardrenames(repo, a, b):
     # Even though we're not taking copies into account, 1:n rename situations
     # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
     # arbitrarily pick one of the renames.
-    f = _forwardcopies(b, a)
+    f = _forwardcopies(repo, b, a)
     r = {}
     for k, v in sorted(f.iteritems()):
         # remove copies
@@ -160,16 +166,17 @@ def _backwardrenames(a, b):
         r[v] = k
     return r
 
-def pathcopies(x, y):
+def pathcopies(repo, x, y):
     '''find {dst at y: src at x} copy mapping for directed compare'''
     if x == y or not x or not y:
         return {}
     a = y.ancestor(x)
     if a == x:
-        return _forwardcopies(x, y)
+        return _forwardcopies(repo, x, y)
     if a == y:
-        return _backwardrenames(x, y)
-    return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
+        return _backwardrenames(repo, x, y)
+    return _chain(x, y,
+                  _backwardrenames(repo, x, a), _forwardcopies(repo, a, y))
 
 def mergecopies(repo, c1, c2, ca):
     """
diff --git a/mercurial/patch.py b/mercurial/patch.py
--- a/mercurial/patch.py
+++ b/mercurial/patch.py
@@ -1579,7 +1579,7 @@ def diff(repo, node1=None, node2=None, m
 
     copy = {}
     if opts.git or opts.upgrade:
-        copy = copies.pathcopies(ctx1, ctx2)
+        copy = copies.pathcopies(repo, ctx1, ctx2)
 
     def difffn(opts, losedata):
         return trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
diff --git a/tests/test-mq-merge.t b/tests/test-mq-merge.t
--- a/tests/test-mq-merge.t
+++ b/tests/test-mq-merge.t
@@ -147,11 +147,13 @@ Check patcha is still a git patch:
   -b
   +a
   +c
-  diff --git a/aa b/aa
-  new file mode 100644
-  --- /dev/null
+  diff --git a/a b/aa
+  copy from a
+  copy to aa
+  --- a/a
   +++ b/aa
-  @@ -0,0 +1,1 @@
+  @@ -1,1 +1,1 @@
+  -b
   +a
 
 Check patcha2 is still a regular patch:
diff --git a/tests/test-mv-cp-st-diff.t b/tests/test-mv-cp-st-diff.t
--- a/tests/test-mv-cp-st-diff.t
+++ b/tests/test-mv-cp-st-diff.t
@@ -1567,3 +1567,41 @@ unrelated branch diff
   @@ -0,0 +1,1 @@
   +a
   $ cd ..
+
+
+test for case where we didn't look sufficiently far back to find rename ancestor
+
+  $ hg init diffstop
+  $ cd diffstop
+  $ echo > f
+  $ hg ci -qAmf
+  $ hg mv f g
+  $ hg ci -m'f->g'
+  $ hg up -qr0
+  $ touch x
+  $ hg ci -qAmx
+  $ echo f > f
+  $ hg ci -qmf=f
+  $ hg merge -q
+  $ hg ci -mmerge
+  $ hg log -G --template '{rev}  {desc}'
+  @    4  merge
+  |\
+  | o  3  f=f
+  | |
+  | o  2  x
+  | |
+  o |  1  f->g
+  |/
+  o  0  f
+  
+  $ hg diff --git -r 2
+  diff --git a/f b/g
+  rename from f
+  rename to g
+  --- a/f
+  +++ b/g
+  @@ -1,1 +1,1 @@
+  -
+  +f
+  $ cd ..


More information about the Mercurial-devel mailing list