[PATCH 7 of 7 iterbatch] http: support sending hgargs via POST body instead of in GET or headers

Pierre-Yves David pierre-yves.david at ens-lyon.org
Fri Mar 11 10:51:15 EST 2016



On 03/08/2016 04:25 AM, Augie Fackler wrote:
> # HG changeset patch
> # User Augie Fackler <augie at google.com>
> # Date 1456956982 18000
> #      Wed Mar 02 17:16:22 2016 -0500
> # Node ID 421b80f47c322fa5e4f6b498221c7530f6da36f5
> # Parent  484a8853c4ada5e028fa74920932d4ad5290e6be
> # EXP-Topic batch
> http: support sending hgargs via POST body instead of in GET or headers
>
> narrowhg (for its narrow spec) and remotefilelog (for its large batch
> requests) would like to be able to make requests with argument sets so
> absurdly large that they blow out total request size limit on some
> http servers. As a workaround, support stuffing args at the start
> of the POST body.
>
> We will probably want to leave this behavior off by default in servers
> forever, because it makes the old "POSTs are only for writes"
> assumption wrong, which might break some of the simpler authentication
> configurations.

The feature seems fine and the implementation seems okay. I've a couple 
of style nits including a question about isinstance usage that I'm a bit 
puzzled about.

> diff --git a/mercurial/hgweb/protocol.py b/mercurial/hgweb/protocol.py
> --- a/mercurial/hgweb/protocol.py
> +++ b/mercurial/hgweb/protocol.py
> @@ -45,6 +45,9 @@ class webproto(wireproto.abstractserverp
>           return [data[k] for k in keys]
>       def _args(self):
>           args = self.req.form.copy()
> +        postlen = int(self.req.env.get('HTTP_X_HGARGS_POST', 0))
> +        if postlen:
> +            return cgi.parse_qs(self.req.read(postlen))
>           chunks = []
>           i = 1
>           while True:
> diff --git a/mercurial/httppeer.py b/mercurial/httppeer.py
> --- a/mercurial/httppeer.py
> +++ b/mercurial/httppeer.py
> @@ -92,41 +92,55 @@ class httppeer(wireproto.wirepeer):
>           if cmd == 'pushkey':
>               args['data'] = ''
>           data = args.pop('data', None)
> +        headers = args.pop('headers', {})
> +
> +        self.ui.debug("sending %s command\n" % cmd)
> +        q = [('cmd', cmd)]
> +        headersize = 0
> +        # TODO: support for httppostargs when data is a file-like object
> +        #
> +        # Important: don't use self.capable() here or else you end up
> +        # with infinite recursion when trying to look up capabilities
> +        # for the first time.
> +        if self.caps is not None and 'httppostargs' in self.caps and (
> +                not data or isinstance(data, basestring)):

small nits That conditional is getting pretty hard to follow. Can we 
have it computed as a temporary boolean variable with an explicit name?

Why do we need to use isinstance on that "data" what other type can we have?

> +            strargs = urllib.urlencode(sorted(args.items()))
> +            if strargs:
> +                if not data:
> +                    data = strargs
> +                elif isinstance(data, basestring):

Same question about isinstance

> +                    data = strargs + data
> +                headers['X-HgArgs-Post'] = len(strargs)
> +        else:
> +            if len(args) > 0:

Why not: "if args:"?

> +                httpheader = self.capable('httpheader')
> +                if httpheader:
> +                    headersize = int(httpheader.split(',')[0])

small-nits: if you care about first item only consider using 
".split(',', 1)".


> +            if headersize > 0:
> +                # The headers can typically carry more data than the URL.
> +                encargs = urllib.urlencode(sorted(args.items()))
> +                headerfmt = 'X-HgArg-%s'
> +                contentlen = headersize - len(headerfmt % '000' + ': \r\n')
> +                headernum = 0
> +                for i in xrange(0, len(encargs), contentlen):
> +                    headernum += 1
> +                    header = headerfmt % str(headernum)
> +                    headers[header] = encargs[i:i + contentlen]
> +                varyheaders = [
> +                    headerfmt % str(h) for h in range(1, headernum + 1)]

small nits: I would align

varyheaders = [headerfmt % str(h)
                for h in range(1, headernum + 1)]

So that it stand out more from the previous loop.


> +                headers['Vary'] = ','.join(varyheaders)
> +            else:
> +                q += sorted(args.items())
>           size = 0
>           if util.safehasattr(data, 'length'):
>               size = data.length
>           elif data is not None:
>               size = len(data)
> -        headers = args.pop('headers', {})
>           if data is not None and 'Content-Type' not in headers:
>               headers['Content-Type'] = 'application/mercurial-0.1'
> -
> -
>           if size and self.ui.configbool('ui', 'usehttp2', False):
>               headers['Expect'] = '100-Continue'
>               headers['X-HgHttp2'] = '1'
> -
> -        self.ui.debug("sending %s command\n" % cmd)
> -        q = [('cmd', cmd)]
> -        headersize = 0
> -        if len(args) > 0:
> -            httpheader = self.capable('httpheader')
> -            if httpheader:
> -                headersize = int(httpheader.split(',')[0])
> -        if headersize > 0:
> -            # The headers can typically carry more data than the URL.
> -            encargs = urllib.urlencode(sorted(args.items()))
> -            headerfmt = 'X-HgArg-%s'
> -            contentlen = headersize - len(headerfmt % '000' + ': \r\n')
> -            headernum = 0
> -            for i in xrange(0, len(encargs), contentlen):
> -                headernum += 1
> -                header = headerfmt % str(headernum)
> -                headers[header] = encargs[i:i + contentlen]
> -            varyheaders = [headerfmt % str(h) for h in range(1, headernum + 1)]
> -            headers['Vary'] = ','.join(varyheaders)
> -        else:
> -            q += sorted(args.items())

I would use "q.extend(…)" over "q +="

>           qs = '?%s' % urllib.urlencode(q)
>           cu = "%s%s" % (self._url, qs)
>           req = self.requestbuilder(cu, data, headers)
> diff --git a/mercurial/wireproto.py b/mercurial/wireproto.py
> --- a/mercurial/wireproto.py
> +++ b/mercurial/wireproto.py
> @@ -688,6 +688,8 @@ def _capabilities(repo, proto):
>       caps.append('unbundle=%s' % ','.join(changegroupmod.bundlepriority))
>       caps.append(
>           'httpheader=%d' % repo.ui.configint('server', 'maxhttpheaderlen', 1024))
> +    if repo.ui.configbool('experimental', 'httppostargs', False):
> +        caps.append('httppostargs')
>       return caps
>
>   # If you are writing an extension and consider wrapping this function. Wrap
> diff --git a/tests/test-wireproto.t b/tests/test-wireproto.t
> --- a/tests/test-wireproto.t
> +++ b/tests/test-wireproto.t
> @@ -19,7 +19,12 @@ Local:
>
>   HTTP:
>
> -  $ hg serve -R repo -p $HGPORT -d --pid-file=hg1.pid -E error.log -A access.log
> +  $ cat >> $HGRCPATH <<EOF
> +  > [experimental]
> +  > httppostargs = true
> +  > EOF
> +
> +  $ hg  serve -R repo -p $HGPORT -d --pid-file=hg1.pid -E error.log -A access.log
>     $ cat hg1.pid >> $DAEMON_PIDS
>
>     $ hg debugwireargs http://localhost:$HGPORT/ un deux trois quatre
> @@ -37,6 +42,66 @@ HTTP:
>     $ cat error.log
>     $ cat access.log
>     * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:39 (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:39 (glob)
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:43 (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:43 (glob)
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:27 (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:27 (glob)
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:1033 (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:1033 (glob)
> +
> +  $ cat >> $HGRCPATH <<EOF
> +  > [experimental]
> +  > httppostargs = false
> +  > EOF
> +
> +HTTP without args-in-POST:
> +  $ hg serve -R repo -p $HGPORT1 -d --pid-file=hg1.pid -E error.log -A access.log
> +  $ cat hg1.pid >> $DAEMON_PIDS
> +
> +  $ hg debugwireargs http://localhost:$HGPORT1/ un deux trois quatre
> +  un deux trois quatre None
> +  $ hg debugwireargs http://localhost:$HGPORT1/ \ un deux trois\  qu\ \ atre
> +   un deux trois  qu  atre None
> +  $ hg debugwireargs http://localhost:$HGPORT1/ eins zwei --four vier
> +  eins zwei None vier None
> +  $ hg debugwireargs http://localhost:$HGPORT1/ eins zwei
> +  eins zwei None None None
> +  $ hg debugwireargs http://localhost:$HGPORT1/ eins zwei --five fuenf
> +  eins zwei None None None
> +  $ hg debugwireargs http://localhost:$HGPORT1/ un deux trois onethousandcharactersxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 x
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> +  un deux trois onethousandcharactersxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 x
xxxxxxxxxxxxxxxxxxxx None
> +  $ cat error.log
> +  $ cat access.log
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:39 (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:39 (glob)
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:43 (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:43 (glob)
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:27 (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:27 (glob)
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:17 (glob)
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:1033 (glob)
> +  * - - [*] "POST /?cmd=debugwireargs HTTP/1.1" 200 - x-hgargs-post:1033 (glob)
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
>     * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-hgarg-1:four=quatre&one=un&three=trois&two=deux (glob)
>     * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-hgarg-1:four=quatre&one=un&three=trois&two=deux (glob)
>     * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>

-- 
Pierre-Yves David


More information about the Mercurial-devel mailing list