[PATCH 1 of 2] largefiles: use the convert extension for 'lfconvert --to-normal'

Matt Harbison matt_harbison at yahoo.com
Thu May 28 19:32:56 UTC 2015


# HG changeset patch
# User Matt Harbison <matt_harbison at yahoo.com>
# Date 1432834477 14400
#      Thu May 28 13:34:37 2015 -0400
# Node ID f587c4efdb86aea17bb8c44f0a21aa1efb780444
# Parent  bcb17d7dbec25088eaec5e4d34dedbd7057c5d68
largefiles: use the convert extension for 'lfconvert --to-normal'

The logic in the convert extension is more advanced, supporting extra features
like converting revision IDs in 'extras' (e.g. 'amend_source'), supports
updating hashes in commit messages, and outputs an SHA map file.  Rather than
try to duplicate all of that, just use the existing code.

Even though the convert extension supports user supplied options like filemap,
etc, those features aren't available on the lfconvert interface.  Therefore, it
is safe to use the filemap mechanism (in memory) to handle the standin -> file
rename.  The convert extension handles the destination locking for this path.

There was a comment in test-lfconvert.t about the hash on rev 5 being different
because it was doing a better job than "hg remove" + "hg merge" + "hg commit".
It isn't clear to me what was happening or why, but now the hashes match the
original repo exactly after a roundtrip, which seems like a good idea.  If there
really was something beneficial about the previous behavior, perhaps merge can
be changed so that everyone benefits.

Converting to a largefiles repo still uses the original (limited) lfconvert
logic.

diff --git a/hgext/largefiles/lfcommands.py b/hgext/largefiles/lfcommands.py
--- a/hgext/largefiles/lfcommands.py
+++ b/hgext/largefiles/lfcommands.py
@@ -16,6 +16,9 @@
 from mercurial.i18n import _
 from mercurial.lock import release
 
+from hgext.convert import convcmd
+from hgext.convert import filemap
+
 import lfutil
 import basestore
 
@@ -70,12 +73,6 @@
     success = False
     dstwlock = dstlock = None
     try:
-        # Lock destination to prevent modification while it is converted to.
-        # Don't need to lock src because we are just reading from its history
-        # which can't change.
-        dstwlock = rdst.wlock()
-        dstlock = rdst.lock()
-
         # Get a list of all changesets in the source.  The easy way to do this
         # is to simply walk the changelog, using changelog.nodesbetween().
         # Take a look at mercurial/revlog.py:639 for more details.
@@ -84,6 +81,12 @@
             rsrc.heads())[0])
         revmap = {node.nullid: node.nullid}
         if tolfile:
+            # Lock destination to prevent modification while it is converted to.
+            # Don't need to lock src because we are just reading from its
+            # history which can't change.
+            dstwlock = rdst.wlock()
+            dstlock = rdst.lock()
+
             lfiles = set()
             normalfiles = set()
             if not pats:
@@ -118,16 +121,51 @@
                 rdst.requirements.add('largefiles')
                 rdst._writerequirements()
         else:
-            for ctx in ctxs:
-                ui.progress(_('converting revisions'), ctx.rev(),
-                    unit=_('revision'), total=rsrc['tip'].rev())
-                _addchangeset(ui, rsrc, rdst, ctx, revmap)
+            class lfsource(filemap.filemap_source):
+                def __init__(self, ui, source):
+                    super(lfsource, self).__init__(ui, source, None)
+                    self.filemapper.rename[lfutil.shortname] = '.'
 
-            ui.progress(_('converting revisions'), None)
+                def getfile(self, name, rev):
+                    realname, realrev = rev
+                    f = super(lfsource, self).getfile(name, rev)
+
+                    if (not realname.startswith(lfutil.shortnameslash)
+                            or f[0] is None):
+                        return f
+
+                    # Substitute in the largefile data for the hash
+                    hash = f[0].strip()
+                    path = lfutil.findfile(rsrc, hash)
+
+                    if path is None:
+                        raise util.Abort(_("missing largefile for \'%s\' in %s")
+                                          % (realname, realrev))
+                    fp = open(path, 'rb')
+
+                    try:
+                        return (fp.read(), f[1])
+                    finally:
+                        fp.close()
+
+            class converter(convcmd.converter):
+                def __init__(self, ui, source, dest, revmapfile, opts):
+                    src = lfsource(ui, source)
+
+                    super(converter, self).__init__(ui, src, dest, revmapfile,
+                                                    opts)
+
+            found, missing = downloadlfiles(ui, rsrc)
+            if missing != 0:
+                raise util.Abort(_("all largefiles must be present locally"))
+
+            convcmd.converter = converter
+            convcmd.convert(ui, src, dest)
         success = True
     finally:
-        rdst.dirstate.clear()
-        release(dstlock, dstwlock)
+        if tolfile:
+            rdst.dirstate.clear()
+            release(dstlock, dstwlock)
         if not success:
             # we failed, remove the new directory
             shutil.rmtree(rdst.root)
diff --git a/tests/test-lfconvert.t b/tests/test-lfconvert.t
--- a/tests/test-lfconvert.t
+++ b/tests/test-lfconvert.t
@@ -226,6 +226,7 @@
   $ hg commit -m "add anotherlarge (should be a largefile)"
   $ cat .hglf/anotherlarge
   3b71f43ff30f4b15b5cd85dd9e95ebc7e84eb5a3
+  $ hg tag mytag
   $ cd ..
 
 round-trip: converting back to a normal (non-largefiles) repo with
@@ -233,25 +234,30 @@
   $ cd largefiles-repo
   $ hg lfconvert --to-normal . ../normal-repo
   initializing destination ../normal-repo
+  0 additional largefiles cached
+  scanning source...
+  sorting...
+  converting...
+  7 add large, normal1
+  6 add sub/*
+  5 rename sub/ to stuff/
+  4 add normal3, modify sub/*
+  3 remove large, normal3
+  2 merge
+  1 add anotherlarge (should be a largefile)
+  0 Added tag mytag for changeset abacddda7028
   $ cd ../normal-repo
   $ cat >> .hg/hgrc <<EOF
   > [extensions]
   > largefiles = !
   > EOF
 
-# Hmmm: the changeset ID for rev 5 is different from the original
-# normal repo (../bigfile-repo), because the changelog filelist
-# differs between the two incarnations of rev 5: this repo includes
-# 'large' in the list, but ../bigfile-repo does not. Since rev 5
-# removes 'large' relative to the first parent in both repos, it seems
-# to me that lfconvert is doing a *better* job than
-# "hg remove" + "hg merge" + "hg commit".
-#  $ hg -R ../bigfile-repo debugdata -c 5
-#  $ hg debugdata -c 5
   $ hg log -G --template "{rev}:{node|short}  {desc|firstline}\n"
-  o  6:1635824e6f59  add anotherlarge (should be a largefile)
+  o  7:b5fedc110b9d  Added tag mytag for changeset 867ab992ecf4
   |
-  o    5:7215f8deeaaf  merge
+  o  6:867ab992ecf4  add anotherlarge (should be a largefile)
+  |
+  o    5:4884f215abda  merge
   |\
   | o  4:7285f817b77e  remove large, normal3
   | |
@@ -264,8 +270,9 @@
   o  0:117b8328f97a  add large, normal1
   
   $ hg update
-  4 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  5 files updated, 0 files merged, 0 files removed, 0 files unresolved
   $ hg locate
+  .hgtags
   anotherlarge
   normal1
   stuff/maybelarge.dat
@@ -284,15 +291,18 @@
   scanning source...
   sorting...
   converting...
-  6 add large, normal1
-  5 add sub/*
-  4 rename sub/ to stuff/
-  3 add normal3, modify sub/*
-  2 remove large, normal3
-  1 merge
-  0 add anotherlarge (should be a largefile)
+  7 add large, normal1
+  6 add sub/*
+  5 rename sub/ to stuff/
+  4 add normal3, modify sub/*
+  3 remove large, normal3
+  2 merge
+  1 add anotherlarge (should be a largefile)
+  0 Added tag mytag for changeset abacddda7028
 
   $ hg -R largefiles-repo-hg log -G --template "{rev}:{node|short}  {desc|firstline}\n"
+  o  7:2f08f66459b7  Added tag mytag for changeset 17126745edfd
+  |
   o  6:17126745edfd  add anotherlarge (should be a largefile)
   |
   o    5:9cc5aa7204f0  merge
@@ -315,8 +325,8 @@
   checking manifests
   crosschecking files in changesets and manifests
   checking files
-  8 files, 7 changesets, 12 total revisions
-  searching 7 changesets for largefiles
+  9 files, 8 changesets, 13 total revisions
+  searching 8 changesets for largefiles
   changeset 0:d4892ec57ce2: large references missing $TESTTMP/largefiles-repo-hg/.hg/largefiles/2e000fa7e85759c7f4c254d4d9c33ef481e459a7 (glob)
   changeset 1:334e5237836d: sub/maybelarge.dat references missing $TESTTMP/largefiles-repo-hg/.hg/largefiles/34e163be8e43c5631d8b92e9c43ab0bf0fa62b9c (glob)
   changeset 2:261ad3f3f037: stuff/maybelarge.dat references missing $TESTTMP/largefiles-repo-hg/.hg/largefiles/34e163be8e43c5631d8b92e9c43ab0bf0fa62b9c (glob)
@@ -336,6 +346,18 @@
   $ rm -f "${USERCACHE}"/*
   $ hg lfconvert --to-normal issue3519 normalized3519
   initializing destination normalized3519
+  4 additional largefiles cached
+  scanning source...
+  sorting...
+  converting...
+  7 add large, normal1
+  6 add sub/*
+  5 rename sub/ to stuff/
+  4 add normal3, modify sub/*
+  3 remove large, normal3
+  2 merge
+  1 add anotherlarge (should be a largefile)
+  0 Added tag mytag for changeset abacddda7028
 
 Ensure the abort message is useful if a largefile is entirely unavailable
   $ rm -rf normalized3519
@@ -344,8 +366,20 @@
   $ rm largefiles-repo/.hg/largefiles/*
   $ hg lfconvert --to-normal issue3519 normalized3519
   initializing destination normalized3519
+  anotherlarge: largefile 3b71f43ff30f4b15b5cd85dd9e95ebc7e84eb5a3 not available from file:/*/$TESTTMP/largefiles-repo (glob)
+  stuff/maybelarge.dat: largefile 76236b6a2c6102826c61af4297dd738fb3b1de38 not available from file:/*/$TESTTMP/largefiles-repo (glob)
+  stuff/maybelarge.dat: largefile 76236b6a2c6102826c61af4297dd738fb3b1de38 not available from file:/*/$TESTTMP/largefiles-repo (glob)
+  sub/maybelarge.dat: largefile 76236b6a2c6102826c61af4297dd738fb3b1de38 not available from file:/*/$TESTTMP/largefiles-repo (glob)
   large: largefile 2e000fa7e85759c7f4c254d4d9c33ef481e459a7 not available from file:/*/$TESTTMP/largefiles-repo (glob)
-  abort: missing largefile 'large' from revision d4892ec57ce212905215fad1d9018f56b99202ad
+  sub/maybelarge.dat: largefile 76236b6a2c6102826c61af4297dd738fb3b1de38 not available from file:/*/$TESTTMP/largefiles-repo (glob)
+  large: largefile 2e000fa7e85759c7f4c254d4d9c33ef481e459a7 not available from file:/*/$TESTTMP/largefiles-repo (glob)
+  stuff/maybelarge.dat: largefile 34e163be8e43c5631d8b92e9c43ab0bf0fa62b9c not available from file:/*/$TESTTMP/largefiles-repo (glob)
+  large: largefile 2e000fa7e85759c7f4c254d4d9c33ef481e459a7 not available from file:/*/$TESTTMP/largefiles-repo (glob)
+  sub/maybelarge.dat: largefile 34e163be8e43c5631d8b92e9c43ab0bf0fa62b9c not available from file:/*/$TESTTMP/largefiles-repo (glob)
+  large: largefile 2e000fa7e85759c7f4c254d4d9c33ef481e459a7 not available from file:/*/$TESTTMP/largefiles-repo (glob)
+  0 additional largefiles cached
+  11 largefiles failed to download
+  abort: all largefiles must be present locally
   [255]
 
 


More information about the Mercurial-devel mailing list