[PATCH 7 of 7 V2] lfs: deduplicate oids in the transfer

Matt Harbison mharbison72 at gmail.com
Tue Feb 6 00:29:10 EST 2018


# HG changeset patch
# User Matt Harbison <matt_harbison at yahoo.com>
# Date 1517779063 18000
#      Sun Feb 04 16:17:43 2018 -0500
# Node ID 3693ddf8c5f6f50cdb3272929f40c48d0400beb3
# Parent  73ff18b27dd03449d99668148afc02894162a2ae
lfs: deduplicate oids in the transfer

Apparently, we can't rely on the server to deduplicate for us.

Sadly, the pointer object isn't hashable, so it can't be reduced by converting
it to a set.  In order to be hashable, it needs to be immutable.  I had a bunch
of code to change it to composition and forward the readonly dict methods to a
member dict.  But the pointer is updated via __setitem__() when creating the
pointer file.  So it didn't see worth adding all of that code to the class.

diff --git a/hgext/lfs/blobstore.py b/hgext/lfs/blobstore.py
--- a/hgext/lfs/blobstore.py
+++ b/hgext/lfs/blobstore.py
@@ -194,11 +194,11 @@
 
     def writebatch(self, pointers, fromstore):
         """Batch upload from local to remote blobstore."""
-        self._batch(pointers, fromstore, 'upload')
+        self._batch(_deduplicate(pointers), fromstore, 'upload')
 
     def readbatch(self, pointers, tostore):
         """Batch download from remote to local blostore."""
-        self._batch(pointers, tostore, 'download')
+        self._batch(_deduplicate(pointers), tostore, 'download')
 
     def _batchrequest(self, pointers, action):
         """Get metadata about objects pointed by pointers for given action
@@ -399,13 +399,13 @@
         self.vfs = lfsvfs(fullpath)
 
     def writebatch(self, pointers, fromstore):
-        for p in pointers:
+        for p in _deduplicate(pointers):
             content = fromstore.read(p.oid(), verify=True)
             with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
                 fp.write(content)
 
     def readbatch(self, pointers, tostore):
-        for p in pointers:
+        for p in _deduplicate(pointers):
             with self.vfs(p.oid(), 'rb') as fp:
                 tostore.download(p.oid(), fp)
 
@@ -444,6 +444,13 @@
     None: _promptremote,
 }
 
+def _deduplicate(pointers):
+    """Remove any duplicate oids that exist in the list"""
+    reduced = util.sortdict()
+    for p in pointers:
+        reduced[p.oid()] = p
+    return reduced.values()
+
 def _verify(oid, content):
     realoid = hashlib.sha256(content).hexdigest()
     if realoid != oid:
diff --git a/tests/test-lfs-test-server.t b/tests/test-lfs-test-server.t
--- a/tests/test-lfs-test-server.t
+++ b/tests/test-lfs-test-server.t
@@ -153,12 +153,10 @@
 
   $ rm -rf .hg/store/lfs `hg config lfs.usercache`
   $ hg archive -vr 1 ../archive
-  lfs: need to transfer 4 objects (63 bytes)
+  lfs: need to transfer 3 objects (51 bytes)
   lfs: downloading 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b (12 bytes)
   lfs: adding 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b to the usercache
   lfs: processed: 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b
-  lfs: downloading 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b (12 bytes)
-  lfs: processed: 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b
   lfs: downloading 37a65ab78d5ecda767e8622c248b5dbff1e68b1678ab0e730d5eb8601ec8ad19 (20 bytes)
   lfs: adding 37a65ab78d5ecda767e8622c248b5dbff1e68b1678ab0e730d5eb8601ec8ad19 to the usercache
   lfs: processed: 37a65ab78d5ecda767e8622c248b5dbff1e68b1678ab0e730d5eb8601ec8ad19
@@ -181,12 +179,10 @@
 
   $ rm -rf .hg/store/lfs `hg config lfs.usercache`
   $ hg cat -vr 1 a b c
-  lfs: need to transfer 3 objects (43 bytes)
+  lfs: need to transfer 2 objects (31 bytes)
   lfs: downloading 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b (12 bytes)
   lfs: adding 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b to the usercache
   lfs: processed: 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b
-  lfs: downloading 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b (12 bytes)
-  lfs: processed: 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b
   lfs: downloading d11e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 (19 bytes)
   lfs: adding d11e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998 to the usercache
   lfs: processed: d11e1a642b60813aee592094109b406089b8dff4cb157157f753418ec7857998
@@ -207,12 +203,10 @@
   reverting b
   reverting c
   reverting d
-  lfs: need to transfer 4 objects (63 bytes)
+  lfs: need to transfer 3 objects (51 bytes)
   lfs: downloading 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b (12 bytes)
   lfs: adding 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b to the usercache
   lfs: processed: 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b
-  lfs: downloading 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b (12 bytes)
-  lfs: processed: 31cf46fbc4ecd458a0943c5b4881f1f5a6dd36c53d6167d5b69ac45149b38e5b
   lfs: downloading 37a65ab78d5ecda767e8622c248b5dbff1e68b1678ab0e730d5eb8601ec8ad19 (20 bytes)
   lfs: adding 37a65ab78d5ecda767e8622c248b5dbff1e68b1678ab0e730d5eb8601ec8ad19 to the usercache
   lfs: processed: 37a65ab78d5ecda767e8622c248b5dbff1e68b1678ab0e730d5eb8601ec8ad19


More information about the Mercurial-devel mailing list