[PATCH 5 of 5] exchange: refactor APIs to obtain bundle data (API)

Sun Sep 25 16:42:17 EDT 2016

# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1474830761 25200
#      Sun Sep 25 12:12:41 2016 -0700
# Node ID f6bb4ff0a8c47b099157e615a2874c193ac77512
# Parent  2dc93677a8836d2365b561d1ba79f62ff68a4f23
exchange: refactor APIs to obtain bundle data (API)

Currently, exchange.getbundle() returns either a cg1unpacker or a
util.chunkbuffer (in the case of bundle2). This is kinda OK, as
both expose a .read() to consumers. However, localpeer.getbundle()
has code inferring what the response type is based on arguments and
converts the util.chunkbuffer returned in the bundle2 case to a
bundle2.unbundle20 instance. This is a sign that the API for
exchange.getbundle() is not ideal because it doesn't consistently
return an "unbundler" instance.

In addition, unbundlers mask the fact that there is an underlying
generator of changegroup data. In both cg1 and bundle2, this generator
is being fed into a util.chunkbuffer so it can be re-exposed as a
file object.

util.chunkbuffer is a nice abstraction. However, it should only be
used "at the edges." This is because keeping data as a generator is
more efficient than converting it to a chunkbuffer, especially if we
convert that chunkbuffer back to a generator (as is the case in some
code paths currently).

This patch splits exchange.getbundle() into 2 functions. 1 returns
an iterator over raw chunks (along with a flag saying whether it is
bundle2). The other returns an "unbundler" instance.

Callers of exchange.getbundle() have been updated to use the
appropriate new API.

There is a minor change of behavior in test-getbundle.t. This is
because `hg debuggetbundle` isn't defining bundlecaps. As a result,
a cg1 data stream and unpacker is being produced. This is getting fed
into a new bundle20 instance via bundle2.writebundle(), which uses
a backchannel mechanism between changegroup generation to add the
"nbchanges" part parameter. I never liked this backchannel mechanism
and I plan to remove it someday. `hg bundle` still produces the
"nbchanges" part parameter, so there should be no user-visible
change of behavior. I consider this "regression" a bug in
`hg debuggetbundle`. And that bug is captured by an existing
"TODO" in the code to use bundle2 capabilities.

diff --git a/mercurial/exchange.py b/mercurial/exchange.py
--- a/mercurial/exchange.py
+++ b/mercurial/exchange.py
@@ -1526,43 +1526,38 @@ def getbundle2partsgenerator(stepname, i
         return func
     return dec
 
 def bundle2requested(bundlecaps):
     if bundlecaps is not None:
         return any(cap.startswith('HG2') for cap in bundlecaps)
     return False
 
-def getbundle(repo, source, heads=None, common=None, bundlecaps=None,
-              **kwargs):
-    """return a full bundle (with potentially multiple kind of parts)
+def getbundlechunks(repo, source, heads=None, common=None, bundlecaps=None,
+                    **kwargs):
+    """Return chunks constituting a bundle's raw data.
 
     Could be a bundle HG10 or a bundle HG20 depending on bundlecaps
-    passed. For now, the bundle can contain only changegroup, but this will
-    changes when more part type will be available for bundle2.
+    passed.
 
-    This is different from changegroup.getchangegroup that only returns an HG10
-    changegroup bundle. They may eventually get reunited in the future when we
-    have a clearer idea of the API we what to query different data.
-
-    The implementation is at a very early stage and will get massive rework
-    when the API of bundle is refined.
+    Returns a 2-tuple of (isbundle2, chunks) with the 2nd item being an
+    iterator over raw chunks (of varying sizes).
     """
     usebundle2 = bundle2requested(bundlecaps)
     # bundle10 case
     if not usebundle2:
         if bundlecaps and not kwargs.get('cg', True):
             raise ValueError(_('request for bundle10 must include changegroup'))
 
         if kwargs:
             raise ValueError(_('unsupported getbundle arguments: %s')
                              % ', '.join(sorted(kwargs.keys())))
         outgoing = _computeoutgoing(repo, heads, common)
-        return changegroup.getchangegroup(repo, source, outgoing,
-                                          bundlecaps=bundlecaps)
+        bundler = changegroup.getbundler('01', repo, bundlecaps)
+        return False, changegroup.getsubsetraw(repo, outgoing, bundler, source)
 
     # bundle20 case
     b2caps = {}
     for bcaps in bundlecaps:
         if bcaps.startswith('bundle2='):
             blob = urlreq.unquote(bcaps[len('bundle2='):])
             b2caps.update(bundle2.decodecaps(blob))
     bundler = bundle2.bundle20(repo.ui, b2caps)
@@ -1570,17 +1565,32 @@ def getbundle(repo, source, heads=None, 
     kwargs['heads'] = heads
     kwargs['common'] = common
 
     for name in getbundle2partsorder:
         func = getbundle2partsmapping[name]
         func(bundler, repo, source, bundlecaps=bundlecaps, b2caps=b2caps,
              **kwargs)
 
-    return util.chunkbuffer(bundler.getchunks())
+    return usebundle2, bundler.getchunks()
+
+def getunbundler(repo, *args, **kwargs):
+    """Obtain an unbundler from arguments.
+
+    This is essentially a wrapper around ``getbundlechunks()`` that converts
+    the result to an unbundler instance.
+
+    Where performance is critical, ``getbundlechunks()`` should be used.
+    """
+    isbundle2, chunks = getbundlechunks(repo, *args, **kwargs)
+
+    if isbundle2:
+        return bundle2.getunbundler(repo.ui, util.chunkbuffer(chunks))
+    else:
+        return changegroup.getunbundler('01', util.chunkbuffer(chunks), None)
 
 @getbundle2partsgenerator('changegroup')
 def _getbundlechangegrouppart(bundler, repo, source, bundlecaps=None,
                               b2caps=None, heads=None, common=None, **kwargs):
     """add a changegroup part to the requested bundle"""
     cg = None
     if kwargs.get('cg', True):
         # build changegroup bundle here.
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -144,24 +144,19 @@ class localpeer(peer.peerrepository):
     def heads(self):
         return self._repo.heads()
 
     def known(self, nodes):
         return self._repo.known(nodes)
 
     def getbundle(self, source, heads=None, common=None, bundlecaps=None,
                   **kwargs):
-        cg = exchange.getbundle(self._repo, source, heads=heads,
-                                common=common, bundlecaps=bundlecaps, **kwargs)
-        if bundlecaps is not None and 'HG20' in bundlecaps:
-            # When requesting a bundle2, getbundle returns a stream to make the
-            # wire level function happier. We need to build a proper object
-            # from it in local peer.
-            cg = bundle2.getunbundler(self.ui, cg)
-        return cg
+        return exchange.getunbundler(self._repo, source, heads=heads,
+                                     common=common, bundlecaps=bundlecaps,
+                                     **kwargs)
 
     # TODO We might want to move the next two calls into legacypeer and add
     # unbundle instead.
 
     def unbundle(self, cg, heads, url):
         """apply a bundle on a repo
 
         This function handles the repo locking itself."""
diff --git a/mercurial/wireproto.py b/mercurial/wireproto.py
--- a/mercurial/wireproto.py
+++ b/mercurial/wireproto.py
@@ -767,18 +767,20 @@ def getbundle(repo, proto, others):
         elif keytype != 'plain':
             raise KeyError('unknown getbundle option type %s'
                            % keytype)
 
     if not bundle1allowed(repo, 'pull'):
         if not exchange.bundle2requested(opts.get('bundlecaps')):
             return ooberror(bundle2required)
 
-    cg = exchange.getbundle(repo, 'serve', **opts)
-    return streamres(proto.groupchunks(cg))
+    isbundle2, chunks = exchange.getbundlechunks(repo, 'serve', **opts)
+    # TODO avoid util.chunkbuffer() here since it is adding overhead to
+    # what is fundamentally a generator proxying operation.
+    return streamres(proto.groupchunks(util.chunkbuffer(chunks)))
 
 @wireprotocommand('heads')
 def heads(repo, proto):
     h = repo.heads()
     return encodelist(h) + "\n"
 
 @wireprotocommand('hello')
 def hello(repo, proto):
diff --git a/tests/test-getbundle.t b/tests/test-getbundle.t
--- a/tests/test-getbundle.t
+++ b/tests/test-getbundle.t
@@ -165,17 +165,17 @@ Get branch and merge:
   8365676dbab05860ce0d9110f2af51368b961bbd
   0b2f73f04880d9cb6a5cd8a757f0db0ad01e32c3
 
 = Test bundle2 =
 
   $ hg debuggetbundle repo bundle -t bundle2
   $ hg debugbundle bundle
   Stream params: {}
-  changegroup -- "sortdict([('version', '01'), ('nbchanges', '18')])"
+  changegroup -- "sortdict([('version', '01')])"
       7704483d56b2a7b5db54dcee7c62378ac629b348
       29a4d1f17bd3f0779ca0525bebb1cfb51067c738
       713346a995c363120712aed1aee7e04afd867638
       d5f6e1ea452285324836a49d7d3c2a63cfed1d31
       ff42371d57168345fdf1a3aac66a51f6a45d41d2
       bac16991d12ff45f9dc43c52da1946dfadb83e80
       6621d79f61b23ec74cf4b69464343d9e0980ec8b
       8931463777131cd73923e560b760061f2aa8a4bc