[PATCH remotefilelog-ext] fileserverclient: use new iterbatch() method

Mon Mar 21 02:51:17 UTC 2016

# HG changeset patch
# User Augie Fackler <augie at google.com>
# Date 1457036127 18000
#      Thu Mar 03 15:15:27 2016 -0500
# Node ID 669622eff4bfc2273a0bd7fd102819fabf0fc03c
# Parent  be02547f644b229f84bdeb47970fbb9198741695
fileserverclient: use new iterbatch() method

This allows the client to send a single batch request for all file contents
and then handle the responses as they stream back to the client, which should
improve both running time and the user experience as far as it goes with
progress.

diff --git a/remotefilelog/fileserverclient.py b/remotefilelog/fileserverclient.py
--- a/remotefilelog/fileserverclient.py
+++ b/remotefilelog/fileserverclient.py
@@ -6,9 +6,10 @@
 # GNU General Public License version 2 or any later version.
 
 from mercurial.i18n import _
-from mercurial import util, sshpeer, hg, error, util, wireproto, node
+from mercurial import util, sshpeer, hg, error, util, wireproto, node, httppeer
 import os, socket, lz4, time, grp, io
 import errno
+import itertools
 
 # Statistics for debugging
 fetchcost = 0
@@ -121,6 +122,28 @@ class cacheconnection(object):
 
 def _getfilesbatch(
         remote, receivemissing, progresstick, missed, idmap, batchsize):
+    # Over http(s), iterbatch is a streamy method and we can start
+    # looking at results early. This means we send one (potentially
+    # large) request, but then we show nice progress as we process
+    # file results, rather than showing chunks of $batchsize in
+    # progress.
+    #
+    # Over ssh, iterbatch isn't streamy not because batch() wasn't
+    # explicitly designed as a streaming method. In the future we
+    # should probably introduce a streambatch() method upstream and
+    # use that for this.
+    if (getattr(remote, 'iterbatch', False) and remote.capable('httppostargs')
+        and isinstance(remote, httppeer.httppeer)):
+        b = remote.iterbatch()
+        for m in missed:
+            file_ = idmap[m]
+            node = m[-40:]
+            b.getfile(file_, node)
+        b.submit()
+        for m, r in itertools.izip(missed, b.results()):
+            receivemissing(io.BytesIO('%d\n%s' % (len(r), r)), m)
+            progresstick()
+        return
     while missed:
         chunk, missed = missed[:batchsize], missed[batchsize:]
         b = remote.batch()