[PATCH 5 of 5 experimental] copies: handle backward merge detection of rename/copies (issue1932)

Tue Sep 21 14:18:21 CDT 2010

# HG changeset patch
# User Gilles Moris <gilles.moris at free.fr>
# Date 1284830190 -7200
# Node ID 476edbf49cec059636f18b9820d4dada06ba99de
# Parent  80d923116951ab6649dbbb505359605d6698a2d4
copies: handle backward merge detection of rename/copies (issue1932)

The concept is to use a "divied and conqueer" approach to build on the
existing algorithm.

Due to the fact that copies metadata point backward in the past, we need
to go down in the history between to given revision if they have something
in common.
Usually, we use a real common ancestor in a merge, but this can not be the
case for special operations like backward merge of the working repository,
or more generally crossing branches with changes in the working directory,
or transplanting some changes lesewhere in the dag. The ancestor is then
chosen as the parent of the changeset being transplanted. And this does not
cope with the current copy detection.

The solution adopted is to split the copy detection between the node on each
side and the given ancestor, find a real common ancestor on each side between
those, and try to rejoin the copy detection on each side.

diff -r 80d923116951 -r 476edbf49cec mercurial/copies.py

--- a/mercurial/copies.py	Wed Sep 15 16:04:52 2010 +0200
+++ b/mercurial/copies.py	Sat Sep 18 19:16:30 2010 +0200
@@ -84,38 +84,15 @@
         return None
     return limit
 
-def copies(repo, c1, c2, ca, checkdirs=False):
-    """
-    Find moves and copies between context c1 and c2
-    """
-    # avoid silly behavior for update from empty dir
-    if not c1 or not c2 or c1 == c2:
-        return {}, {}
+def _findcopies(repo, c1, c2, ca, limit, ctx):
+    copy = {}
+    fullcopy = {}
+    diverge = {}
 
-    # avoid silly behavior for parent -> working dir
-    if c2.node() is None and c1.node() == repo.dirstate.parents()[0]:
-        return repo.dirstate.copies(), {}
-
-    limit = _findlimit(repo, c1.rev(), c2.rev())
-    if limit is None:
-        # no common ancestor, no copies
-        return {}, {}
     m1 = c1.manifest()
     m2 = c2.manifest()
     ma = ca.manifest()
 
-    def makectx(f, n):
-        if len(n) != 20: # in a working context?
-            if c1.rev() is None:
-                return c1.filectx(f)
-            return c2.filectx(f)
-        return repo.filectx(f, fileid=n)
-
-    ctx = util.lrucachefunc(makectx)
-    copy = {}
-    fullcopy = {}
-    diverge = {}
-
     def related(f1, f2, limit):
         # Walk back to common ancestor to see if the two files originate
         # from the same file. Since workingfilectx's rev() is None it messes
@@ -178,8 +155,17 @@
 
     repo.ui.debug("  searching for copies back to rev %d\n" % limit)
 
-    u1 = _nonoverlap(m1, m2, ma)
-    u2 = _nonoverlap(m2, m1, ma)
+    if c1 == ca:
+        u1 = []
+        u2 = _nonoverlap(m2, m1, {})
+        ma = {}
+    elif c2 == ca:
+        u1 = _nonoverlap(m1, m2, {})
+        u2 = []
+        ma = {}
+    else:
+        u1 = _nonoverlap(m1, m2, ma)
+        u2 = _nonoverlap(m2, m1, ma)
 
     if u1:
         repo.ui.debug("  unmatched files in local:\n   %s\n"
@@ -193,6 +179,104 @@
     for f in u2:
         checkcopies(f, m2, m1)
 
+    return fullcopy, copy, diverge
+
+def copies(repo, c1, c2, ca, checkdirs=False):
+    """
+    Find moves and copies between context c1 and c2
+    """
+    # avoid silly behavior for update from empty dir
+    if not c1 or not c2 or c1 == c2:
+        return {}, {}
+
+    # avoid silly behavior for parent -> working dir
+    if c2.node() is None and c1.node() == repo.dirstate.parents()[0]:
+        return repo.dirstate.copies(), {}
+
+    limit = _findlimit(repo, c1.rev(), c2.rev())
+    if limit is None:
+        # no common ancestor, no copies
+        return {}, {}
+    m1 = c1.manifest()
+    m2 = c2.manifest()
+    ma = ca.manifest()
+
+    def makectx(f, n):
+        if len(n) != 20: # in a working context?
+            if c1.rev() is None:
+                return c1.filectx(f)
+            return c2.filectx(f)
+        return repo.filectx(f, fileid=n)
+
+    ctx = util.lrucachefunc(makectx)
+
+    def adjust(fcp, cp, div):
+        fullcopy = {}
+        copy = {}
+        diverge = {}
+        for s, d in cp.items():
+            if s in ma: # backward
+                copy[d] = s
+            else: # forward
+                copy[s] = d
+        for s, d in div.items():
+            anc = [f for f in d if f in ma]
+            oth = [f for f in d if f not in ma]
+            if len(anc) > 1:
+                for f in anc:
+                    diverge[f] = oth
+            else:
+                for f in oth:
+                    copy[f] = anc[0]
+                    fullcopy[f] = anc[0]
+        for s, d in fcp.items():
+            if s in copy: # backward
+                fullcopy[d] = copy[s]
+            else: # forward
+                fullcopy[d] = s
+        return fullcopy, copy, diverge
+    
+    ca1 = c1.ancestor(ca)
+    ca2 = c2.ancestor(ca)
+    if ca1 == ca and ca2 == ca:
+        fullcopy, copy, diverge = _findcopies(repo, c1, c2, ca, limit, ctx)
+    else:
+        # split and merge
+        res1 = _findcopies(repo, c1, ca, ca1, limit, ctx)
+        res2 = _findcopies(repo, c2, ca, ca2, limit, ctx)
+        if ca1 != ca:
+            res1 = adjust(*res1)
+        if ca2 != ca:
+            res2 = adjust(*res2)
+        fcp1, cp1, div1 = res1
+        fcp2, cp2, div2 = res2
+        # blends the two sides
+        fullcopy = fcp1
+        fullcopy.update(fcp2)
+        diverge = {}
+        copy = {}
+        div1.update(div2)
+        if div1:
+            diverge = div1
+        else:
+            for s, d in cp1.items():
+                div1.setdefault(d, []).append(s)
+            for s, d in cp2.items():
+                div2.setdefault(d, []).append(s)
+            for f in frozenset(div1.keys() + div2.keys()):
+                if f in div1 and f in div2:
+                    cs1 = frozenset(div1[f])
+                    cs2 = frozenset(div2[f])
+                    converge = cs1 & cs2
+                    if cs1 - converge and cs2 - converge: # divergent renames
+                        diverge[f] = list(cs1 ^ cs2)
+                    elif cs1 != cs2:
+                        copy.update((k, f) for k in cs1 ^ cs2)
+                elif f in div1:
+                    copy.update((k, f) for k in div1[f])
+                elif f in div2:
+                    copy.update((k, f) for k in div2[f])
+
     diverge2 = set()
     for of, fl in diverge.items():
         if len(fl) == 1:
@@ -253,6 +337,8 @@
         repo.ui.debug("  dir %s -> %s\n" % (d, dirmove[d]))
 
     # check unaccounted nonoverlapping files against directory moves
+    u1 = _nonoverlap(m1, m2, ma)
+    u2 = _nonoverlap(m2, m1, ma)
     for f in u1 + u2:
         if f not in fullcopy:
             for d in dirmove: