[PATCH 09 of 11] filecommit: add a fast path to reuse raw revision data

Jun Wu quark at fb.com
Wed May 10 04:34:28 EDT 2017


# HG changeset patch
# User Jun Wu <quark at fb.com>
# Date 1494402182 25200
#      Wed May 10 00:43:02 2017 -0700
# Node ID 9d4e0dc68196d113a7dd153748eea1cc731734d3
# Parent  e4ad86c15b06986e51d4754f3c11bd24cbae30bb
# Available At https://bitbucket.org/quark-zju/hg-draft
#              hg pull https://bitbucket.org/quark-zju/hg-draft -r 9d4e0dc68196
filecommit: add a fast path to reuse raw revision data

The high-level idea is similar to metadataonlyctx.

If filelog parents and metadata match, then raw revision data (rawtext,
rawflags, hash) could be reused. This saves time calculating hash or going
through flag processors.

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -1433,4 +1433,5 @@ class localrepository(object):
         meta = {}
         copy = fctx.renamed()
+        metamatched = not bool(copy)
         if copy and copy[0] != fname:
             # Mark the new revision of this file as a copy of another
@@ -1476,4 +1477,5 @@ class localrepository(object):
                 meta["copy"] = cfname
                 meta["copyrev"] = hex(crev)
+                metamatched = (crev == copy[1])
                 fparent1, fparent2 = nullid, newfparent
             else:
@@ -1491,4 +1493,22 @@ class localrepository(object):
                 fparent2 = nullid
 
+        # reuse rawdata? (skip flagprocessors and hash calculation)
+        if (metamatched and node is not None
+            # some filectxs do not support rawdata or flags
+            and util.safehasattr(fctx, 'rawdata')
+            and util.safehasattr(fctx, 'rawflags')
+            # some (external) filelogs do not have addrawrevision
+            and util.safehasattr(flog, 'addrawrevision')
+            # parents must match to be able to reuse rawdata
+            and fctx.filelog().parents(node) == (fparent1, fparent2)):
+            # node is different from fparents, no need to check manifest flag
+            changelist.append(fname)
+            if node in getattr(flog, 'nodemap', ()):
+                self.ui.debug('reusing %s filelog node (exact match)\n' % fname)
+                return node
+            self.ui.debug('reusing %s filelog rawdata\n' % fname)
+            return flog.addrawrevision(fctx.rawdata(), tr, linkrev, fparent1,
+                                       fparent2, node, fctx.rawflags())
+
         # is the file changed?
         text = fctx.data()
diff --git a/tests/test-commit-amend.t b/tests/test-commit-amend.t
--- a/tests/test-commit-amend.t
+++ b/tests/test-commit-amend.t
@@ -1179,2 +1179,41 @@ Test if amend preserves executable bit c
   
 #endif
+
+File node could be reused
+
+  $ cd $TESTTMP
+  $ hg init reuse-test
+  $ cd reuse-test
+  $ echo 1 > a
+  $ echo 2 > b
+  $ hg commit -m 12 -A a b
+  $ echo 3 >> a
+  $ hg commit -m 3
+
+  $ hg ci --debug --amend -m 'without content change'
+  amending changeset 0bd823dca296
+  copying changeset 0bd823dca296 to dd3d87f356df
+  committing files:
+  a
+  reusing a filelog node (exact match)
+  committing manifest
+  committing changelog
+  committed changeset 2:92bc7a9d76f010337ece134e095054c094d44760
+
+(but is not smart enough to reuse manifest-only changes)
+
+  $ chmod +x b
+  $ hg ci --debug --amend -m 'without content change'
+  invalid branchheads * (glob) (?)
+  amending changeset 92bc7a9d76f0
+  committing files:
+  b
+  committing manifest
+  committing changelog
+  copying changeset 2bfd5c45b3f5 to dd3d87f356df
+  committing files:
+  a
+  b
+  committing manifest
+  committing changelog
+  committed changeset 4:ba954a28eb454eb63e7348349f8e87e7b1be3601
diff --git a/tests/test-context.py b/tests/test-context.py
--- a/tests/test-context.py
+++ b/tests/test-context.py
@@ -147,2 +147,23 @@ print(actx2.status(other=wcctx,
                    listclean=True))
 print('wcctx._status=%s' % (str(wcctx._status)))
+
+# copy filectx from repo to repo2 using overlayfilectx and memctx
+print('=== filelog rawdata reuse ===')
+os.chdir('..')
+u.setconfig('ui', 'debug', '1')
+repo2 = hg.repository(u, 'test2', create=1)
+
+def copyctx(newrepo, ctx):
+    cl = newrepo.changelog
+    p1 = cl.node(len(cl) - 1)
+    files = ctx.files()
+    desc = 'copied: %s' % ctx.description()
+    def getfctx(repo, memctx, path):
+        if path not in ctx:
+            return None
+        return context.overlayfilectx(ctx[path])
+    return context.memctx(newrepo, [p1, None], desc, files, getfctx)
+
+for rev in repo:
+    print('copying rev %d from test1 to test2' % rev)
+    copyctx(repo2, repo[rev]).commit()
diff --git a/tests/test-context.py.out b/tests/test-context.py.out
--- a/tests/test-context.py.out
+++ b/tests/test-context.py.out
@@ -45,2 +45,23 @@ wcctx._status=<status modified=['bar-m']
 <status modified=[], added=['bar-r'], removed=[], deleted=[], unknown=[], ignored=[], clean=['foo']>
 wcctx._status=<status modified=['bar-m'], added=['bar-a'], removed=[], deleted=[], unknown=[], ignored=[], clean=[]>
+=== filelog rawdata reuse ===
+copying rev 0 from test1 to test2
+committing files:
+foo
+reusing foo filelog rawdata
+committing manifest
+committing changelog
+copying rev 1 from test1 to test2
+committing files:
+foo
+reusing foo filelog rawdata
+committing manifest
+committing changelog
+copying rev 2 from test1 to test2
+committing files:
+bar-m
+reusing bar-m filelog rawdata
+bar-r
+reusing bar-r filelog rawdata
+committing manifest
+committing changelog


More information about the Mercurial-devel mailing list