D6183: copies: add config option for writing copy metadata to file and/or changset

martinvonz (Martin von Zweigbergk) phabricator at mercurial-scm.org
Tue Apr 2 15:31:45 EDT 2019


martinvonz created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  This introduces a config option that lets you choose to write copy
  metadata to the changeset extras instead of to filelog. There's also
  an option to write it to both places. I imagine that may possibly be
  useful when transitioning an existing repo.
  
  The copy metadata is stored as two fields in extras: one for copies
  since p1 and one for copies since p2.
  
  I may need to add more information later in order to make copy tracing
  faster. Specifically, I'm thinking out recording which files were
  added or removed so that copies._chaincopies() doesn't have to look at
  the manifest for that. But that would just be an optimization and that
  can be added once we know if it's necessary.
  
  I have also considered saving space by using replacing the destination
  file path by an index into the "files" list, but that can also be
  changed later (but before the feature is ready to release).

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6183

AFFECTED FILES
  mercurial/changelog.py
  mercurial/configitems.py
  mercurial/localrepo.py
  tests/test-annotate.t
  tests/test-copies-in-changeset.t
  tests/test-fastannotate-hg.t

CHANGE DETAILS

diff --git a/tests/test-fastannotate-hg.t b/tests/test-fastannotate-hg.t
--- a/tests/test-fastannotate-hg.t
+++ b/tests/test-fastannotate-hg.t
@@ -443,7 +443,7 @@
   > def reposetup(ui, repo):
   >     class legacyrepo(repo.__class__):
   >         def _filecommit(self, fctx, manifest1, manifest2,
-  >                         linkrev, tr, changelist):
+  >                         linkrev, tr, changelist, includecopymeta):
   >             fname = fctx.path()
   >             text = fctx.data()
   >             flog = self.file(fname)
diff --git a/tests/test-copies-in-changeset.t b/tests/test-copies-in-changeset.t
new file mode 100644
--- /dev/null
+++ b/tests/test-copies-in-changeset.t
@@ -0,0 +1,113 @@
+
+  $ cat >> $HGRCPATH << EOF
+  > [experimental]
+  > copies.write-to=changeset-only
+  > [alias]
+  > changesetcopies = log -r . -T "files: {files}
+  >   p1copies: {get(extras,'p1copies')}
+  >   p2copies: {get(extras,'p2copies')}
+  >   "
+  > EOF
+
+Check that copies are recorded correctly
+
+  $ hg init repo
+  $ cd repo
+  $ echo a > a
+  $ hg add a
+  $ hg ci -m initial
+  $ hg cp a b
+  $ hg cp a c
+  $ hg cp a d
+  $ hg ci -m 'copy a to b, c, and d'
+  $ hg changesetcopies
+  files: b c d
+  p1copies: b\x00a (esc)
+  c\x00a (esc)
+  d\x00a (esc)
+  p2copies: 
+
+Check that renames are recorded correctly
+
+  $ hg mv b b2
+  $ hg ci -m 'rename b to b2'
+  $ hg changesetcopies
+  files: b b2
+  p1copies: b2\x00b (esc)
+  p2copies: 
+
+Rename onto existing file. This should get recorded in the changeset files list and in the extras,
+even though there is no filelog entry.
+
+  $ hg cp b2 c --force
+  $ hg st --copies
+  M c
+    b2
+  $ hg debugindex c
+     rev linkrev nodeid       p1           p2
+       0       1 b789fdd96dc2 000000000000 000000000000
+  $ hg ci -m 'move b onto d'
+  $ hg changesetcopies
+  files: c
+  p1copies: c\x00b2 (esc)
+  p2copies: 
+  $ hg debugindex c
+     rev linkrev nodeid       p1           p2
+       0       1 b789fdd96dc2 000000000000 000000000000
+
+Create a merge commit with copying done during merge.
+
+  $ hg co 0
+  0 files updated, 0 files merged, 3 files removed, 0 files unresolved
+  $ hg cp a e
+  $ hg cp a f
+  $ hg ci -m 'copy a to e and f'
+  created new head
+  $ hg merge 3
+  3 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  (branch merge, don't forget to commit)
+File 'a' exists on both sides, so 'g' could be recorded as being from p1 or p2, but we currently
+always record it as being from p1
+  $ hg cp a g
+File 'd' exists only in p2, so 'h' should be from p2
+  $ hg cp d h
+File 'f' exists only in p1, so 'i' should be from p1
+  $ hg cp f i
+  $ hg ci -m 'merge'
+  $ hg changesetcopies
+  files: g h i
+  p1copies: g\x00a (esc)
+  i\x00f (esc)
+  p2copies: h\x00d (esc)
+
+Test writing to both changeset and filelog
+
+  $ hg cp a j
+  $ hg ci -m 'copy a to j' --config experimental.copies.write-to=compatibility
+  $ hg changesetcopies
+  files: j
+  p1copies: j\x00a (esc)
+  p2copies: 
+  $ hg debugdata j 0
+  \x01 (esc)
+  copy: a
+  copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
+  \x01 (esc)
+  a
+
+Test writing only to filelog
+
+  $ hg cp a k
+  $ hg ci -m 'copy a to k' --config experimental.copies.write-to=filelog-only
+  $ hg changesetcopies
+  files: k
+  p1copies: 
+  p2copies: 
+  $ hg debugdata k 0
+  \x01 (esc)
+  copy: a
+  copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
+  \x01 (esc)
+  a
+
+  $ cd ..
diff --git a/tests/test-annotate.t b/tests/test-annotate.t
--- a/tests/test-annotate.t
+++ b/tests/test-annotate.t
@@ -438,7 +438,7 @@
   > def reposetup(ui, repo):
   >     class legacyrepo(repo.__class__):
   >         def _filecommit(self, fctx, manifest1, manifest2,
-  >                         linkrev, tr, changelist):
+  >                         linkrev, tr, changelist, includecopymeta):
   >             fname = fctx.path()
   >             text = fctx.data()
   >             flog = self.file(fname)
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -2292,7 +2292,8 @@
         """Returns the wlock if it's held, or None if it's not."""
         return self._currentlock(self._wlockref)
 
-    def _filecommit(self, fctx, manifest1, manifest2, linkrev, tr, changelist):
+    def _filecommit(self, fctx, manifest1, manifest2, linkrev, tr, changelist,
+                    includecopymeta):
         """
         commit an individual file as part of a larger transaction
         """
@@ -2337,7 +2338,7 @@
             if manifest2: # branch merge
                 if fparent2 == nullid or cnode is None: # copied on remote side
                     if cfname in manifest2:
-                        cnode= manifest2[cfname]
+                        cnode = manifest2[cfname]
                         newfparent = fparent1
 
             # Here, we used to search backwards through history to try to find
@@ -2351,8 +2352,9 @@
 
             if cnode:
                 self.ui.debug(" %s: copy %s:%s\n" % (fname, cfname, hex(cnode)))
-                meta["copy"] = cfname
-                meta["copyrev"] = hex(cnode)
+                if includecopymeta:
+                    meta["copy"] = cfname
+                    meta["copyrev"] = hex(cnode)
                 fparent1, fparent2 = nullid, newfparent
             else:
                 self.ui.warn(_("warning: can't find ancestor for '%s' "
@@ -2520,6 +2522,12 @@
         p1, p2 = ctx.p1(), ctx.p2()
         user = ctx.user()
 
+        writecopiesto = self.ui.config('experimental', 'copies.write-to')
+        writefilecopymeta = writecopiesto != 'changeset-only'
+        p1copies, p2copies = None, None
+        if writecopiesto in ('changeset-only', 'compatibility'):
+            p1copies = ctx.p1copies()
+            p2copies = ctx.p2copies()
         with self.lock(), self.transaction("commit") as tr:
             trp = weakref.proxy(tr)
 
@@ -2553,7 +2561,8 @@
                         else:
                             added.append(f)
                             m[f] = self._filecommit(fctx, m1, m2, linkrev,
-                                                    trp, changed)
+                                                    trp, changed,
+                                                    writefilecopymeta)
                             m.setflag(f, fctx.flags())
                     except OSError:
                         self.ui.warn(_("trouble committing %s!\n") %
@@ -2607,7 +2616,8 @@
             self.changelog.delayupdate(tr)
             n = self.changelog.add(mn, files, ctx.description(),
                                    trp, p1.node(), p2.node(),
-                                   user, ctx.date(), ctx.extra().copy())
+                                   user, ctx.date(), ctx.extra().copy(),
+                                   p1copies, p2copies)
             xp1, xp2 = p1.hex(), p2 and p2.hex() or ''
             self.hook('pretxncommit', throw=True, node=hex(n), parent1=xp1,
                       parent2=xp2)
diff --git a/mercurial/configitems.py b/mercurial/configitems.py
--- a/mercurial/configitems.py
+++ b/mercurial/configitems.py
@@ -488,6 +488,9 @@
 coreconfigitem('experimental', 'copies.read-from',
     default="filelog-only",
 )
+coreconfigitem('experimental', 'copies.write-to',
+    default='file',
+)
 coreconfigitem('experimental', 'crecordtest',
     default=None,
 )
diff --git a/mercurial/changelog.py b/mercurial/changelog.py
--- a/mercurial/changelog.py
+++ b/mercurial/changelog.py
@@ -77,6 +77,13 @@
     ]
     return "\0".join(items)
 
+def encodecopies(copies):
+    items = [
+        '%s\0%s' % (_string_escape(k), _string_escape(copies[k]))
+        for k in sorted(copies)
+    ]
+    return "\n".join(items)
+
 def stripdesc(desc):
     """strip trailing whitespace and leading and trailing empty lines"""
     return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
@@ -530,7 +537,7 @@
         return l[3:]
 
     def add(self, manifest, files, desc, transaction, p1, p2,
-                  user, date=None, extra=None):
+                  user, date=None, extra=None, p1copies=None, p2copies=None):
         # Convert to UTF-8 encoded bytestrings as the very first
         # thing: calling any method on a localstr object will turn it
         # into a str object and the cached UTF-8 string is thus lost.
@@ -559,6 +566,13 @@
             elif branch in (".", "null", "tip"):
                 raise error.StorageError(_('the name \'%s\' is reserved')
                                          % branch)
+        if (p1copies or p2copies) and extra is None:
+            extra = {}
+        if p1copies:
+            extra['p1copies'] = encodecopies(p1copies)
+        if p2copies:
+            extra['p2copies'] = encodecopies(p2copies)
+
         if extra:
             extra = encodeextra(extra)
             parseddate = "%s %s" % (parseddate, extra)



To: martinvonz, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list