[PATCH 7 of 7] lfs: only hardlink between the usercache and local store if the blob verifies

Matt Harbison mharbison72 at gmail.com
Thu Dec 21 15:39:49 EST 2017


# HG changeset patch
# User Matt Harbison <matt_harbison at yahoo.com>
# Date 1513883619 18000
#      Thu Dec 21 14:13:39 2017 -0500
# Node ID 909f1310d199e4b5f542e1e72b07f2bf992dd7a7
# Parent  d86777c0c198848df6e9083272685a202bebe534
lfs: only hardlink between the usercache and local store if the blob verifies

This fixes the issue where verify (and other read commands) would propagate
corrupt blobs.  I originalled coded this to only hardlink if 'verify=True' for
store.read(), but then good blobs weren't being linked, and this broke a bunch
of tests.  (The blob in repo5 that is being corrupted seems to be linked into
repo5 in the loop running dumpflog.py prior to it being corrupted, but only if
verify=False is handled too.)  It's probably better to do a one time extra
verification in order to create these files, so that the repo can be copied to a
removable drive.

Adding the same check to store.write() was only for completeness, but also needs
to do a one time extra verification to avoid breaking tests.

diff --git a/hgext/lfs/blobstore.py b/hgext/lfs/blobstore.py
--- a/hgext/lfs/blobstore.py
+++ b/hgext/lfs/blobstore.py
@@ -111,15 +111,23 @@
         # XXX: should we verify the content of the cache, and hardlink back to
         # the local store on success, but truncate, write and link on failure?
         if not self.cachevfs.exists(oid):
-            self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
-            lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
+            if verify or hashlib.sha256(data).hexdigest() == oid:
+                self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
+                lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
 
     def read(self, oid, verify=True):
         """Read blob from local blobstore."""
         if not self.vfs.exists(oid):
             blob = self._read(self.cachevfs, oid, verify)
-            self.ui.note(_('lfs: found %s in the usercache\n') % oid)
-            lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
+
+            # Even if revlog will verify the content, it needs to be verified
+            # now before making the hardlink to avoid propagating corrupt blobs.
+            # Don't abort if corruption is detected, because `hg verify` will
+            # give more useful info about the corruption- simply don't add the
+            # hardlink.
+            if verify or hashlib.sha256(blob).hexdigest() == oid:
+                self.ui.note(_('lfs: found %s in the usercache\n') % oid)
+                lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
         else:
             self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
             blob = self._read(self.vfs, oid, verify)
diff --git a/tests/test-lfs.t b/tests/test-lfs.t
--- a/tests/test-lfs.t
+++ b/tests/test-lfs.t
@@ -658,8 +658,8 @@
   lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store
   4 files, 5 changesets, 10 total revisions
 
-BUG: Verify will copy/link a corrupted file from the usercache into the local
-store, and poison it.  (The verify with a good remote now fails.)
+Verify will not copy/link a corrupted file from the usercache into the local
+store, and poison it.  (The verify with a good remote now works.)
 
   $ rm -r fromcorrupt/.hg/store/lfs/objects/66/100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
   $ hg -R fromcorrupt verify -v
@@ -668,10 +668,8 @@
   checking manifests
   crosschecking files in changesets and manifests
   checking files
-  lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the usercache
    l at 1: unpacking 46a2f24864bc: integrity check failed on data/l.i:0
   lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
-  lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store
    large at 0: unpacking 2c531e0992ff: integrity check failed on data/large.i:0
   lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store
   lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store
@@ -685,17 +683,12 @@
   checking manifests
   crosschecking files in changesets and manifests
   checking files
-  lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store
-   l at 1: unpacking 46a2f24864bc: integrity check failed on data/l.i:0
+  lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the usercache
   lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
   lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store
-   large at 0: unpacking 2c531e0992ff: integrity check failed on data/large.i:0
   lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store
   lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store
   4 files, 5 changesets, 10 total revisions
-  2 integrity errors encountered!
-  (first damaged changeset appears to be 0)
-  [1]
 
 Damaging a file required by the update destination fails the update.
 
@@ -739,7 +732,6 @@
   lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
   lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store
   lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store
-  lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store
   abort: detected corrupt lfs object: 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
   (run hg verify)
   [255]
@@ -750,10 +742,8 @@
   checking manifests
   crosschecking files in changesets and manifests
   checking files
-  lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store
    l at 1: unpacking 46a2f24864bc: integrity check failed on data/l.i:0
   lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
-  lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store
    large at 0: unpacking 2c531e0992ff: integrity check failed on data/large.i:0
   lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store
   lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store


More information about the Mercurial-devel mailing list