D7881: lfs: check content length after downloading content

mharbison72 (Matt Harbison) phabricator at mercurial-scm.org
Wed Jan 15 04:09:51 UTC 2020


mharbison72 created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Adapted from the Facebook repo[1].  The intent is to distinguish between the
  connection dying and getting served a corrupt blob.
  
  The original message:
  
  HTTP makes no provision to tell your client that you failed halfway through
  producing your response and won't have the answer they're looking for. So, if a
  LFS server fails while producing a response, then we'll report an OID mismatch.
  
  We can do a little better and disambiguate between "the server sent us the
  wrong blob" (very scary) and "the server crashed" (merely annoying) by looking
  at the content length of the response we got back. If it's not what was
  advertised, we can reasonably safely assume the server crashed.
  
  [1] https://github.com/facebookexperimental/eden/commit/2a4a6fab4e882ed89b948bfc1e7d56d7c3c99dd2

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7881

AFFECTED FILES
  hgext/lfs/blobstore.py
  hgext/lfs/wireprotolfsserver.py
  tests/test-lfs-serve-access.t

CHANGE DETAILS

diff --git a/tests/test-lfs-serve-access.t b/tests/test-lfs-serve-access.t
--- a/tests/test-lfs-serve-access.t
+++ b/tests/test-lfs-serve-access.t
@@ -210,7 +210,7 @@
   > 
   >     store = repo.svfs.lfslocalblobstore
   >     class badstore(store.__class__):
-  >         def download(self, oid, src):
+  >         def download(self, oid, src, contentlength):
   >             '''Called in the server to handle reading from the client in a
   >             PUT request.'''
   >             origread = src.read
@@ -218,7 +218,7 @@
   >                 # Simulate bad data/checksum failure from the client
   >                 return b'0' * len(origread(nbytes))
   >             src.read = _badread
-  >             super(badstore, self).download(oid, src)
+  >             super(badstore, self).download(oid, src, contentlength)
   > 
   >         def _read(self, vfs, oid, verify):
   >             '''Called in the server to read data for a GET request, and then
@@ -351,8 +351,8 @@
   $LOCALIP - - [$ERRDATE$] HG error:   (glob)
   $LOCALIP - - [$ERRDATE$] HG error:  Exception happened while processing request '/.hg/lfs/objects/b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c': (glob)
   $LOCALIP - - [$ERRDATE$] HG error:  Traceback (most recent call last): (glob)
-  $LOCALIP - - [$ERRDATE$] HG error:      localstore.download(oid, req.bodyfh) (glob)
-  $LOCALIP - - [$ERRDATE$] HG error:      super(badstore, self).download(oid, src) (glob)
+  $LOCALIP - - [$ERRDATE$] HG error:      localstore.download(oid, req.bodyfh, req.headers[b'Content-Length'])
+  $LOCALIP - - [$ERRDATE$] HG error:      super(badstore, self).download(oid, src, contentlength)
   $LOCALIP - - [$ERRDATE$] HG error:      _(b'corrupt remote lfs object: %s') % oid (glob)
   $LOCALIP - - [$ERRDATE$] HG error:  LfsCorruptionError: corrupt remote lfs object: b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c (no-py3 !)
   $LOCALIP - - [$ERRDATE$] HG error:  hgext.lfs.blobstore.LfsCorruptionError: corrupt remote lfs object: b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c (py3 !)
diff --git a/hgext/lfs/wireprotolfsserver.py b/hgext/lfs/wireprotolfsserver.py
--- a/hgext/lfs/wireprotolfsserver.py
+++ b/hgext/lfs/wireprotolfsserver.py
@@ -327,7 +327,7 @@
 
         statusmessage = hgwebcommon.statusmessage
         try:
-            localstore.download(oid, req.bodyfh)
+            localstore.download(oid, req.bodyfh, req.headers[b'Content-Length'])
             res.status = statusmessage(HTTP_OK if existed else HTTP_CREATED)
         except blobstore.LfsCorruptionError:
             _logexception(req)
diff --git a/hgext/lfs/blobstore.py b/hgext/lfs/blobstore.py
--- a/hgext/lfs/blobstore.py
+++ b/hgext/lfs/blobstore.py
@@ -155,15 +155,29 @@
 
         return self.vfs(oid, b'rb')
 
-    def download(self, oid, src):
+    def download(self, oid, src, content_length):
         """Read the blob from the remote source in chunks, verify the content,
         and write to this local blobstore."""
         sha256 = hashlib.sha256()
+        size = 0
 
         with self.vfs(oid, b'wb', atomictemp=True) as fp:
             for chunk in util.filechunkiter(src, size=1048576):
                 fp.write(chunk)
                 sha256.update(chunk)
+                size += len(chunk)
+
+            # If the server advertised a length longer than what we actually
+            # received, then we should expect that the server crashed while
+            # producing the response (but the server has no way of telling us
+            # that), and we really don't need to try to write the response to
+            # the localstore, because it's not going to match the expected.
+            if content_length is not None and int(content_length) != size:
+                msg = (
+                    b"Response length (%s) does not match Content-Length "
+                    b"header (%d): likely server-side crash"
+                )
+                raise LfsRemoteError(_(msg) % (size, int(content_length)))
 
             realoid = node.hex(sha256.digest())
             if realoid != oid:
@@ -492,6 +506,7 @@
         response = b''
         try:
             with contextlib.closing(self.urlopener.open(request)) as res:
+                contentlength = res.info().get(b"content-length")
                 ui = self.ui  # Shorten debug lines
                 if self.ui.debugflag:
                     ui.debug(b'Status: %d\n' % res.status)
@@ -503,7 +518,7 @@
                 if action == b'download':
                     # If downloading blobs, store downloaded data to local
                     # blobstore
-                    localstore.download(oid, res)
+                    localstore.download(oid, res, contentlength)
                 else:
                     while True:
                         data = res.read(1048576)
@@ -637,7 +652,7 @@
     def readbatch(self, pointers, tostore):
         for p in _deduplicate(pointers):
             with self.vfs(p.oid(), b'rb') as fp:
-                tostore.download(p.oid(), fp)
+                tostore.download(p.oid(), fp, None)
 
 
 class _nullremote(object):



To: mharbison72, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list