D7881: lfs: check content length after downloading content

mharbison72 (Matt Harbison) phabricator at mercurial-scm.org
Wed Jan 15 08:18:05 EST 2020


Closed by commit rHG0ee0a3f6a990: lfs: check content length after downloading content (authored by mharbison72).
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7881?vs=19280&id=19291

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7881/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7881

AFFECTED FILES
  hgext/lfs/blobstore.py
  hgext/lfs/wireprotolfsserver.py
  tests/test-lfs-serve-access.t

CHANGE DETAILS

diff --git a/tests/test-lfs-serve-access.t b/tests/test-lfs-serve-access.t
--- a/tests/test-lfs-serve-access.t
+++ b/tests/test-lfs-serve-access.t
@@ -210,7 +210,7 @@
   > 
   >     store = repo.svfs.lfslocalblobstore
   >     class badstore(store.__class__):
-  >         def download(self, oid, src):
+  >         def download(self, oid, src, contentlength):
   >             '''Called in the server to handle reading from the client in a
   >             PUT request.'''
   >             origread = src.read
@@ -218,7 +218,7 @@
   >                 # Simulate bad data/checksum failure from the client
   >                 return b'0' * len(origread(nbytes))
   >             src.read = _badread
-  >             super(badstore, self).download(oid, src)
+  >             super(badstore, self).download(oid, src, contentlength)
   > 
   >         def _read(self, vfs, oid, verify):
   >             '''Called in the server to read data for a GET request, and then
@@ -351,8 +351,8 @@
   $LOCALIP - - [$ERRDATE$] HG error:   (glob)
   $LOCALIP - - [$ERRDATE$] HG error:  Exception happened while processing request '/.hg/lfs/objects/b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c': (glob)
   $LOCALIP - - [$ERRDATE$] HG error:  Traceback (most recent call last): (glob)
-  $LOCALIP - - [$ERRDATE$] HG error:      localstore.download(oid, req.bodyfh) (glob)
-  $LOCALIP - - [$ERRDATE$] HG error:      super(badstore, self).download(oid, src) (glob)
+  $LOCALIP - - [$ERRDATE$] HG error:      localstore.download(oid, req.bodyfh, req.headers[b'Content-Length'])
+  $LOCALIP - - [$ERRDATE$] HG error:      super(badstore, self).download(oid, src, contentlength)
   $LOCALIP - - [$ERRDATE$] HG error:      _(b'corrupt remote lfs object: %s') % oid (glob)
   $LOCALIP - - [$ERRDATE$] HG error:  LfsCorruptionError: corrupt remote lfs object: b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c (no-py3 !)
   $LOCALIP - - [$ERRDATE$] HG error:  hgext.lfs.blobstore.LfsCorruptionError: corrupt remote lfs object: b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c (py3 !)
diff --git a/hgext/lfs/wireprotolfsserver.py b/hgext/lfs/wireprotolfsserver.py
--- a/hgext/lfs/wireprotolfsserver.py
+++ b/hgext/lfs/wireprotolfsserver.py
@@ -327,7 +327,7 @@
 
         statusmessage = hgwebcommon.statusmessage
         try:
-            localstore.download(oid, req.bodyfh)
+            localstore.download(oid, req.bodyfh, req.headers[b'Content-Length'])
             res.status = statusmessage(HTTP_OK if existed else HTTP_CREATED)
         except blobstore.LfsCorruptionError:
             _logexception(req)
diff --git a/hgext/lfs/blobstore.py b/hgext/lfs/blobstore.py
--- a/hgext/lfs/blobstore.py
+++ b/hgext/lfs/blobstore.py
@@ -155,15 +155,29 @@
 
         return self.vfs(oid, b'rb')
 
-    def download(self, oid, src):
+    def download(self, oid, src, content_length):
         """Read the blob from the remote source in chunks, verify the content,
         and write to this local blobstore."""
         sha256 = hashlib.sha256()
+        size = 0
 
         with self.vfs(oid, b'wb', atomictemp=True) as fp:
             for chunk in util.filechunkiter(src, size=1048576):
                 fp.write(chunk)
                 sha256.update(chunk)
+                size += len(chunk)
+
+            # If the server advertised a length longer than what we actually
+            # received, then we should expect that the server crashed while
+            # producing the response (but the server has no way of telling us
+            # that), and we really don't need to try to write the response to
+            # the localstore, because it's not going to match the expected.
+            if content_length is not None and int(content_length) != size:
+                msg = (
+                    b"Response length (%s) does not match Content-Length "
+                    b"header (%d): likely server-side crash"
+                )
+                raise LfsRemoteError(_(msg) % (size, int(content_length)))
 
             realoid = node.hex(sha256.digest())
             if realoid != oid:
@@ -492,6 +506,7 @@
         response = b''
         try:
             with contextlib.closing(self.urlopener.open(request)) as res:
+                contentlength = res.info().get(b"content-length")
                 ui = self.ui  # Shorten debug lines
                 if self.ui.debugflag:
                     ui.debug(b'Status: %d\n' % res.status)
@@ -503,7 +518,7 @@
                 if action == b'download':
                     # If downloading blobs, store downloaded data to local
                     # blobstore
-                    localstore.download(oid, res)
+                    localstore.download(oid, res, contentlength)
                 else:
                     while True:
                         data = res.read(1048576)
@@ -637,7 +652,7 @@
     def readbatch(self, pointers, tostore):
         for p in _deduplicate(pointers):
             with self.vfs(p.oid(), b'rb') as fp:
-                tostore.download(p.oid(), fp)
+                tostore.download(p.oid(), fp, None)
 
 
 class _nullremote(object):



To: mharbison72, #hg-reviewers, pulkit
Cc: mercurial-devel


More information about the Mercurial-devel mailing list