[PATCH 3 of 3 v2] graft: apply get-only changes in-memory

David Schleimer dschleimer at fb.com
Tue Nov 27 17:36:58 CST 2012


# HG changeset patch
# User David Schleimer <dschleimer at fb.com>
# Date 1354059396 28800
# Node ID e1b037a6e83cab745d1598ed6635a123a6eefb32
# Parent  6986940313bae1dad92551a2a3b21af5c9e1b67d
graft: apply get-only changes in-memory

This changes the graft code to apply the simplest-possible changes
in-memory.  Specifically, changes where all we need to do is set the
contents of files to the version in the commit being grafted.  This
means that no files were deleted and no files were modified on by both
the target branch and the change we're grafting.

Based on one of our real-world branches, with 497 grafts,
approximately 80% of cherry-picked commits will fall into the fast
case here.  This patch takes a graft with all 497 revisions on the
branch in question from 5299 seconds to 3251 seconds.

The revisions that fall in the fast case were more common near the
beginning of the branch than the end.  Longer lived branches are
likely to see less of an improvement from this patch, but they should
be unlikely to see a regression.

For single revision grafts and grafts with file-level merges, this
patch does not provide any benefits.  It does not appear to be a
regression for these cases however.

In my testing, grafting a single revision (with no file-level merges)
took 16 seconds both before and after this change.  Grafting 2
revisions (with no file-level merges) took 25 seconds before this
change, and 17 seconds after this change.  Grafting a single revision
(with a file-level merge) took 17 seconds before this change and 17
seconds after this change.  Grafting 2 revisions (with file-level
merges) took 29 seconds before this change and 29 seconds after this
change.

diff --git a/mercurial/commands.py b/mercurial/commands.py
--- a/mercurial/commands.py
+++ b/mercurial/commands.py
@@ -2825,45 +2825,20 @@
 
     wlock = repo.wlock()
     try:
+        current = repo['.']
         for pos, ctx in enumerate(repo.set("%ld", revs)):
-            current = repo['.']
 
             ui.status(_('grafting revision %s\n') % ctx.rev())
             if opts.get('dry_run'):
                 continue
 
-            # we don't merge the first commit when continuing
-            if not cont:
-                # perform the graft merge with p1(rev) as 'ancestor'
-                try:
-                    # ui.forcemerge is an internal variable, do not document
-                    repo.ui.setconfig('ui', 'forcemerge', opts.get('tool', ''))
-                    stats = mergemod.update(repo, ctx.node(), True, True, False,
-                                            ctx.p1().node())
-                finally:
-                    repo.ui.setconfig('ui', 'forcemerge', '')
-                # report any conflicts
-                if stats and stats[3] > 0:
-                    # write out state for --continue
-                    nodelines = [repo[rev].hex() + "\n" for rev in revs[pos:]]
-                    repo.opener.write('graftstate', ''.join(nodelines))
-                    raise util.Abort(
-                        _("unresolved conflicts, can't continue"),
-                        hint=_('use hg resolve and hg graft --continue'))
-            else:
-                cont = False
-
-            # drop the second merge parent
-            repo.setparents(current.node(), nullid)
-            repo.dirstate.write()
-            # fix up dirstate for copies and renames
-            cmdutil.duplicatecopies(repo, ctx.rev(), ctx.p1().rev())
-
-            # commit
             source = ctx.extra().get('source')
             if not source:
                 source = ctx.hex()
-            extra = {'source': source}
+            extra = {
+                'source': source,
+                'branch': current.extra().get('branch', 'default'),
+                }
             user = ctx.user()
             if opts.get('user'):
                 user = opts['user']
@@ -2873,10 +2848,90 @@
             message = ctx.description()
             if opts.get('log'):
                 message += '\n(grafted from %s)' % ctx.hex()
-            node = repo.commit(text=message, user=user,
-                        date=date, extra=extra, editor=editor)
+
+            # we don't merge the first commit when continuing
+            if not cont:
+                # perform the graft merge with p1(rev) as 'ancestor'
+
+                try:
+                    repo.ui.setconfig('ui', 'forcemerge',
+                                      opts.get('tool', ''))
+
+                    action = mergemod.calculateupdates(repo, current, ctx,
+                                                       ctx.p1(),
+                                                       branchmerge=True,
+                                                       force=True,
+                                                       partial=False)
+                finally:
+                    repo.ui.setconfig('ui', 'forcemerge', '')
+
+                filelist, filectxfn = mergemod.memapplyinfo(ctx, action)
+
+                if filelist:
+
+                    memctx = context.memctx(repo, (current.node(), None),
+                                            text=message,
+                                            files=filelist,
+                                            filectxfn=filectxfn,
+                                            user=user,
+                                            date=date,
+                                            extra=extra)
+                else:
+                    try:
+                        repo.ui.debug("falling back to on-disk merge\n")
+                        # first update to the current rev if we've
+                        # commited anything since the last update
+                        if current.node() != repo['.'].node():
+                            mergemod.update(repo, current.node(),
+                                            branchmerge=False,
+                                            force=True,
+                                            partial=None)
+                        # ui.forcemerge is an internal variable, do not document
+                        repo.ui.setconfig('ui', 'forcemerge',
+                                          opts.get('tool', ''))
+                        stats = mergemod.applyupdates(repo, action,
+                                                      wctx=repo[None],
+                                                      mctx=ctx,
+                                                      actx=ctx.p1(),
+                                                      overwrite=False)
+                        repo.setparents(current.node(), ctx.node())
+                        mergemod.recordupdates(repo, action, branchmerge=True)
+                    finally:
+                        repo.ui.setconfig('ui', 'forcemerge', '')
+                    # report any conflicts
+                    if stats and stats[3] > 0:
+                        # write out state for --continue
+                        nodelines = [repo[rev].hex() + "\n"
+                                     for rev in revs[pos:]]
+                        repo.opener.write('graftstate', ''.join(nodelines))
+                        raise util.Abort(
+                            _("unresolved conflicts, can't continue"),
+                            hint=_('use hg resolve and hg graft --continue'))
+
+            if cont or not filelist:
+                cont = False
+                repo.setparents(current.node(), nullid)
+                repo.dirstate.write()
+                # fix up dirstate for copies and renames
+                cmdutil.duplicatecopies(repo, ctx.rev(), ctx.p1().rev())
+
+            # commit
+                node = repo.commit(text=message, user=user,
+                                   date=date, extra=extra, editor=editor)
+            else:
+                node = repo.commitctx(memctx)
+
             if node is None:
                 ui.status(_('graft for revision %s is empty\n') % ctx.rev())
+            else:
+                current = repo[node]
+
+        # if we've committed at least one revision, update to the most recent
+        if current.node() != repo['.'].node():
+            mergemod.update(repo, current.node(),
+                            branchmerge=False,
+                            force=True,
+                            partial=False)
     finally:
         wlock.release()
 
diff --git a/mercurial/merge.py b/mercurial/merge.py
--- a/mercurial/merge.py
+++ b/mercurial/merge.py
@@ -651,3 +651,43 @@
     if not partial:
         repo.hook('update', parent1=xp1, parent2=xp2, error=stats[3])
     return stats
+
+
+def memapplyinfo(mctx, action):
+    """Extract a list of files and a filectxfunction from an action list
+
+    This function is for translating from the merge-internal action
+    list format to a list of files, and a file context function,
+    suitable for constructing a memctx object.
+
+    Please note that not all actions are supported for in-memory
+    commits.  This function will return a None, None tuple if you pass
+    it a list with an unsupprted action.
+
+    When successful, this function will return a (filelist, filectxfn) tuple.
+    """
+
+    memactions = set(['g'])
+    copymap = copies.pathcopies(mctx.p1(), mctx)
+
+    actionmap = {}
+    filelist = []
+    for a in action:
+        actionmap[a[0]] = a
+        filelist.append(a[0])
+        if a[1] not in memactions:
+            return None, None
+
+
+    def filectxfn(repo, memctx, path):
+        a = actionmap[path]
+        flags = a[2]
+        islink = 'l' in flags
+        isexec = 'x' in flags
+        mfilectx = mctx.filectx(path)
+        return context.memfilectx(path, mfilectx.data(),
+                                  islink=islink,
+                                  isexec=isexec,
+                                  copied=copymap.get(path))
+
+    return filelist, filectxfn
diff --git a/tests/test-graft.t b/tests/test-graft.t
--- a/tests/test-graft.t
+++ b/tests/test-graft.t
@@ -135,8 +135,9 @@
     checking for directory renames
   resolving manifests
    overwrite: False, partial: False
-   ancestor: 68795b066622, local: ef0ef43d49e7+, remote: 5d205f8b35b6
+   ancestor: 68795b066622, local: ef0ef43d49e7, remote: 5d205f8b35b6
    b: local copied/moved to a -> m
+  falling back to on-disk merge
   preserving b for resolve of b
   updating: b 1/1 files (100.00%)
   picked tool 'internal:merge' for b (binary False symlink False)
@@ -148,18 +149,23 @@
     searching for copies back to rev 1
   resolving manifests
    overwrite: False, partial: False
-   ancestor: 4c60f11aa304, local: 6b9e5368ca4e+, remote: 97f8bfe72746
+   ancestor: 4c60f11aa304, local: 6b9e5368ca4e, remote: 97f8bfe72746
    e: remote is newer -> g
-  updating: e 1/1 files (100.00%)
-  getting e
   e
   grafting revision 4
     searching for copies back to rev 1
   resolving manifests
    overwrite: False, partial: False
-   ancestor: 4c60f11aa304, local: 1905859650ec+, remote: 9c233e8e184d
+   ancestor: 4c60f11aa304, local: 1905859650ec, remote: 9c233e8e184d
    e: versions differ -> m
    d: remote is newer -> g
+  falling back to on-disk merge
+  resolving manifests
+   overwrite: True, partial: False
+   ancestor: 6b9e5368ca4e+, local: 6b9e5368ca4e+, remote: 1905859650ec
+   e: remote is newer -> g
+  updating: e 1/1 files (100.00%)
+  getting e
   preserving e for resolve of e
   updating: d 1/2 files (50.00%)
   getting d


More information about the Mercurial-devel mailing list