[PATCH 7 of 7 iterbatch] http: support sending hgargs via POST body instead of in GET or headers
Pierre-Yves David
pierre-yves.david at ens-lyon.org
Fri Mar 11 10:51:15 EST 2016
On 03/08/2016 04:25 AM, Augie Fackler wrote:
> # HG changeset patch
> # User Augie Fackler <augie at google.com>
> # Date 1456956982 18000
> # Wed Mar 02 17:16:22 2016 -0500
> # Node ID 421b80f47c322fa5e4f6b498221c7530f6da36f5
> # Parent 484a8853c4ada5e028fa74920932d4ad5290e6be
> # EXP-Topic batch
> http: support sending hgargs via POST body instead of in GET or headers
>
> narrowhg (for its narrow spec) and remotefilelog (for its large batch
> requests) would like to be able to make requests with argument sets so
> absurdly large that they blow out total request size limit on some
> http servers. As a workaround, support stuffing args at the start
> of the POST body.
>
> We will probably want to leave this behavior off by default in servers
> forever, because it makes the old "POSTs are only for writes"
> assumption wrong, which might break some of the simpler authentication
> configurations.
The feature seems fine and the implementation seems okay. I've a couple
of style nits including a question about isinstance usage that I'm a bit
puzzled about.
> diff --git a/mercurial/hgweb/protocol.py b/mercurial/hgweb/protocol.py
> --- a/mercurial/hgweb/protocol.py
> +++ b/mercurial/hgweb/protocol.py
> @@ -45,6 +45,9 @@ class webproto(wireproto.abstractserverp
> return [data[k] for k in keys]
> def _args(self):
> args = self.req.form.copy()
> + postlen = int(self.req.env.get('HTTP_X_HGARGS_POST', 0))
> + if postlen:
> + return cgi.parse_qs(self.req.read(postlen))
> chunks = []
> i = 1
> while True:
> diff --git a/mercurial/httppeer.py b/mercurial/httppeer.py
> --- a/mercurial/httppeer.py
> +++ b/mercurial/httppeer.py
> @@ -92,41 +92,55 @@ class httppeer(wireproto.wirepeer):
> if cmd == 'pushkey':
> args['data'] = ''
> data = args.pop('data', None)
> + headers = args.pop('headers', {})
> +
> + self.ui.debug("sending %s command\n" % cmd)
> + q = [('cmd', cmd)]
> + headersize = 0
> + # TODO: support for httppostargs when data is a file-like object
> + #
> + # Important: don't use self.capable() here or else you end up
> + # with infinite recursion when trying to look up capabilities
> + # for the first time.
> + if self.caps is not None and 'httppostargs' in self.caps and (
> + not data or isinstance(data, basestring)):
small nits That conditional is getting pretty hard to follow. Can we
have it computed as a temporary boolean variable with an explicit name?
Why do we need to use isinstance on that "data" what other type can we have?
> + strargs = urllib.urlencode(sorted(args.items()))
> + if strargs:
> + if not data:
> + data = strargs
> + elif isinstance(data, basestring):
Same question about isinstance
> + data = strargs + data
> + headers['X-HgArgs-Post'] = len(strargs)
> + else:
> + if len(args) > 0:
Why not: "if args:"?
> + httpheader = self.capable('httpheader')
> + if httpheader:
> + headersize = int(httpheader.split(',')[0])
small-nits: if you care about first item only consider using
".split(',', 1)".
> + if headersize > 0:
> + # The headers can typically carry more data than the URL.
> + encargs = urllib.urlencode(sorted(args.items()))
> + headerfmt = 'X-HgArg-%s'
> + contentlen = headersize - len(headerfmt % '000' + ': \r\n')
> + headernum = 0
> + for i in xrange(0, len(encargs), contentlen):
> + headernum += 1
> + header = headerfmt % str(headernum)
> + headers[header] = encargs[i:i + contentlen]
> + varyheaders = [
> + headerfmt % str(h) for h in range(1, headernum + 1)]
small nits: I would align
varyheaders = [headerfmt % str(h)
for h in range(1, headernum + 1)]
So that it stand out more from the previous loop.
> + headers['Vary'] = ','.join(varyheaders)
> + else:
> + q += sorted(args.items())
> size = 0
> if util.safehasattr(data, 'length'):
> size = data.length
> elif data is not None:
> size = len(data)
> - headers = args.pop('headers', {})
> if data is not None and 'Content-Type' not in headers:
> headers['Content-Type'] = 'application/mercurial-0.1'
> -
> -
> if size and self.ui.configbool('ui', 'usehttp2', False):
> headers['Expect'] = '100-Continue'
> headers['X-HgHttp2'] = '1'
> -
> - self.ui.debug("sending %s command\n" % cmd)
> - q = [('cmd', cmd)]
> - headersize = 0
> - if len(args) > 0:
> - httpheader = self.capable('httpheader')
> - if httpheader:
> - headersize = int(httpheader.split(',')[0])
> - if headersize > 0:
> - # The headers can typically carry more data than the URL.
> - encargs = urllib.urlencode(sorted(args.items()))
> - headerfmt = 'X-HgArg-%s'
> - contentlen = headersize - len(headerfmt % '000' + ': \r\n')
> - headernum = 0
> - for i in xrange(0, len(encargs), contentlen):
> - headernum += 1
> - header = headerfmt % str(headernum)
> - headers[header] = encargs[i:i + contentlen]
> - varyheaders = [headerfmt % str(h) for h in range(1, headernum + 1)]
> - headers['Vary'] = ','.join(varyheaders)
> - else:
> - q += sorted(args.items())
I would use "q.extend(…)" over "q +="
> qs = '?%s' % urllib.urlencode(q)
> cu = "%s%s" % (self._url, qs)
> req = self.requestbuilder(cu, data, headers)
> diff --git a/mercurial/wireproto.py b/mercurial/wireproto.py
> --- a/mercurial/wireproto.py
> +++ b/mercurial/wireproto.py
> @@ -688,6 +688,8 @@ def _capabilities(repo, proto):
> caps.append('unbundle=%s' % ','.join(changegroupmod.bundlepriority))
> caps.append(
> 'httpheader=%d' % repo.ui.configint('server', 'maxhttpheaderlen', 1024))
> + if repo.ui.configbool('experimental', 'httppostargs', False):
> + caps.append('httppostargs')
> return caps
>
> # If you are writing an extension and consider wrapping this function. Wrap
> diff --git a/tests/test-wireproto.t b/tests/test-wireproto.t
> --- a/tests/test-wireproto.t
> +++ b/tests/test-wireproto.t
> @@ -19,7 +19,12 @@ Local:
>
> HTTP:
>
> - $ hg serve -R repo -p $HGPORT -d --pid-file=hg1.pid -E error.log -A access.log
> + $ cat >> $HGRCPATH <<EOF
> + > [experimental]
> + > httppostargs = true
> + > EOF
> +
> + $ hg serve -R repo -p $HGPORT -d --pid-file=hg1.pid -E error.log -A access.log
> $ cat hg1.pid >> $DAEMON_PIDS
>
> $ hg debugwireargs http://localhost:$HGPORT/ un deux trois quatre
> @@ -37,6 +42,66 @@ HTTP:
> $ cat error.log
> $ cat access.log
> * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:39 (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:39 (glob)
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:43 (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:43 (glob)
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:27 (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:27 (glob)
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:1033 (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:1033 (glob)
> +
> + $ cat >> $HGRCPATH <<EOF
> + > [experimental]
> + > httppostargs = false
> + > EOF
> +
> +HTTP without args-in-POST:
> + $ hg serve -R repo -p $HGPORT1 -d --pid-file=hg1.pid -E error.log -A access.log
> + $ cat hg1.pid >> $DAEMON_PIDS
> +
> + $ hg debugwireargs http://localhost:$HGPORT1/ un deux trois quatre
> + un deux trois quatre None
> + $ hg debugwireargs http://localhost:$HGPORT1/ \ un deux trois\ qu\ \ atre
> + un deux trois qu atre None
> + $ hg debugwireargs http://localhost:$HGPORT1/ eins zwei --four vier
> + eins zwei None vier None
> + $ hg debugwireargs http://localhost:$HGPORT1/ eins zwei
> + eins zwei None None None
> + $ hg debugwireargs http://localhost:$HGPORT1/ eins zwei --five fuenf
> + eins zwei None None None
> + $ hg debugwireargs http://localhost:$HGPORT1/ un deux trois onethousandcharactersxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
x
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> + un deux trois onethousandcharactersxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
x
xxxxxxxxxxxxxxxxxxxx None
> + $ cat error.log
> + $ cat access.log
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:39 (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:39 (glob)
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:43 (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:43 (glob)
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:27 (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:27 (glob)
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:1033 (glob)
> + * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:1033 (glob)
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-hgarg-1:four=quatre&one=un&three=trois&two=deux (glob)
> * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-hgarg-1:four=quatre&one=un&three=trois&two=deux (glob)
> * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>
--
Pierre-Yves David
More information about the Mercurial-devel
mailing list