[PATCH] httprepo: long arguments support (issue2126)

Matt Mackall mpm at selenic.com
Sat Apr 30 07:54:05 CDT 2011


On Sat, 2011-04-30 at 20:35 +0800, Steven Brown wrote:
> # HG changeset patch
> # User Steven Brown <StevenGBrown at gmail.com>
> # Date 1304163787 -28800
> # Node ID 9ae060e1ef6955a05965dde63a4782a78065b6cf
> # Parent  139fb11210bb4fc1206d69bbe56b3a15b934bc11
> httprepo: long arguments support (issue2126)
> 
> Send the command arguments in the HTTP headers. The command is still part
> of the URL. If the server does not have the 'httpheader' capability, the
> client will send the command arguments in the URL as it did previously.
> 
> Web servers typically allow more data to be placed within the headers than
> in the URL, so this approach will:
> - Avoid HTTP errors due to using a URL that is too large.
> - Allow Mercurial to implement a more efficient wire protocol.
> 
> An alternate approach is to send the arguments as part of the request body.
> This approach has been rejected because it requires the use of POST
> requests, so it would break any existing configuration that relies on the
> request type for authentication or caching.
> 
> Extensibility:
> - The header size is provided by the server, which makes it possible to
>   introduce an hgrc setting for it.
> - The client ignores the capability value after the first comma, which
>   allows more information to be included in the future.

I don't know if this format is compact enough. While IIS has a limit
based on total request size, Apache has a fairly small limit on number
of header lines (~100 by default), though it allows those lines to be
quite long (~8k by default). 

In your approach, we're using one arg per header, which means we have a
limited number of args, with a limited size each. We can't have one 50k
arg and we can't have 500 100-byte args.

Instead, we probably want to build one big request string and then break
it across a bunch of large headers. Then the receive can combine them
all and unpack.

> diff -r 139fb11210bb -r 9ae060e1ef69 mercurial/hgweb/protocol.py
> --- a/mercurial/hgweb/protocol.py	Sat Apr 30 11:16:52 2011 +0200
> +++ b/mercurial/hgweb/protocol.py	Sat Apr 30 19:43:07 2011 +0800
> @@ -5,12 +5,14 @@
>  # This software may be used and distributed according to the terms of the
>  # GNU General Public License version 2 or any later version.
>  
> -import cStringIO, zlib, sys, urllib
> +import cStringIO, itertools, re, zlib, sys, urllib
>  from mercurial import util, wireproto
>  from common import HTTP_OK
>  
>  HGTYPE = 'application/mercurial-0.1'
>  
> +argprefix = 'HTTP_X_ARG_'
> +
>  class webproto(object):
>      def __init__(self, req):
>          self.req = req
> @@ -21,13 +23,33 @@
>          for k in keys:
>              if k == '*':
>                  star = {}
> -                for key in self.req.form.keys():
> +                for key in self._allargs():
>                      if key != 'cmd' and key not in keys:
> -                        star[key] = self.req.form[key][0]
> +                        star[key] = self._arg(key)
>                  data['*'] = star
>              else:
> -                data[k] = self.req.form[k][0]
> +                data[k] = self._arg(k)
>          return [data[k] for k in keys]
> +    def _allargs(self):
> +        args = set(self.req.form.keys())
> +        r = re.compile(re.escape(argprefix) + '([^_]*)')
> +        for header in self.req.env.keys():
> +            m = r.match(header)
> +            if m:
> +                args.add(m.group(1).lower())
> +        return args
> +    def _arg(self, argname):
> +        if argname in self.req.form:
> +            return self.req.form[argname][0]
> +        chunks = []
> +        for i in itertools.count(1):
> +            h = self.req.env.get(argprefix + argname.upper() + '_' + str(i))
> +            if h is None:
> +                break
> +            chunks += [h]
> +        if not chunks:
> +            raise KeyError(argname)
> +        return urllib.unquote_plus(''.join(chunks)[1:])
>      def getfile(self, fp):
>          length = int(self.req.env['CONTENT_LENGTH'])
>          for s in util.filechunkiter(self.req, limit=length):
> diff -r 139fb11210bb -r 9ae060e1ef69 mercurial/hgweb/server.py
> --- a/mercurial/hgweb/server.py	Sat Apr 30 11:16:52 2011 +0200
> +++ b/mercurial/hgweb/server.py	Sat Apr 30 19:43:07 2011 +0800
> @@ -59,6 +59,12 @@
>      def log_message(self, format, *args):
>          self._log_any(self.server.accesslog, format, *args)
>  
> +    def log_request(self, code='-', size='-'):
> +        xheaders = [h for h in self.headers.items() if h[0].startswith('x-')]
> +        self.log_message('"%s" %s %s%s',
> +                         self.requestline, str(code), str(size),
> +                         ''.join([' %s:%s' % h for h in sorted(xheaders)]))
> +
>      def do_write(self):
>          try:
>              self.do_hgweb()
> diff -r 139fb11210bb -r 9ae060e1ef69 mercurial/httprepo.py
> --- a/mercurial/httprepo.py	Sat Apr 30 11:16:52 2011 +0200
> +++ b/mercurial/httprepo.py	Sat Apr 30 19:43:07 2011 +0800
> @@ -76,7 +76,31 @@
>          data = args.pop('data', None)
>          headers = args.pop('headers', {})
>          self.ui.debug("sending %s command\n" % cmd)
> -        q = [('cmd', cmd)] + sorted(args.items())
> +        q = [('cmd', cmd)]
> +        headersize = 0
> +        if len(args) > 0:
> +            httpheader = self.capable('httpheader')
> +            if httpheader:
> +                headersize = int(httpheader.split(',')[0])
> +        if headersize > 0:
> +            # The headers can typically carry more data than the URL.
> +            prefix = 'X-Arg-'
> +            argheaders = []
> +            for name, value in args.items():
> +                # The header content must: a) be non-empty b) contain only
> +                # ISO 8859-1 characters and c) contain no spaces.
> +                encvalue = '.' + urllib.quote_plus(value)
> +                headerfmt = prefix + name + '-%s'
> +                contentlen = headersize - len(headerfmt % '000' + ': \r\n')
> +                headerindex = 1
> +                for i in xrange(0, len(encvalue), contentlen):
> +                    header = headerfmt % str(headerindex)
> +                    headerindex += 1
> +                    argheaders += [header]
> +                    headers[header] = encvalue[i:i + contentlen]
> +            headers['Vary'] = ','.join(sorted(argheaders))
> +        else:
> +            q += sorted(args.items())
>          qs = '?%s' % urllib.urlencode(q)
>          cu = "%s%s" % (self._url, qs)
>          req = urllib2.Request(cu, data, headers)
> diff -r 139fb11210bb -r 9ae060e1ef69 mercurial/wireproto.py
> --- a/mercurial/wireproto.py	Sat Apr 30 11:16:52 2011 +0200
> +++ b/mercurial/wireproto.py	Sat Apr 30 19:43:07 2011 +0800
> @@ -233,6 +233,7 @@
>          else:
>              caps.append('streamreqs=%s' % ','.join(requiredformats))
>      caps.append('unbundle=%s' % ','.join(changegroupmod.bundlepriority))
> +    caps.append('httpheader=1024')
>      return ' '.join(caps)
>  
>  def changegroup(repo, proto, roots):
> diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-getbundle.t
> --- a/tests/test-getbundle.t	Sat Apr 30 11:16:52 2011 +0200
> +++ b/tests/test-getbundle.t	Sat Apr 30 19:43:07 2011 +0800
> @@ -247,7 +247,7 @@
>    * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
>    * - - [*] "GET /?cmd=getbundle HTTP/1.1" 200 - (glob)
>    * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> -  * - - [*] "GET /?cmd=getbundle&common=c1818a9f5977dd4139a48f93f5425c67d44a9368+ea919464b16e003894c48b6cb68df3cd9411b544&heads=6b57ee934bb2996050540f84cdfc8dcad1e7267d+2114148793524fd045998f71a45b0aaf139f752b HTTP/1.1" 200 - (glob)
> +  * - - [*] "GET /?cmd=getbundle HTTP/1.1" 200 - x-arg-common-1:.c1818a9f5977dd4139a48f93f5425c67d44a9368+ea919464b16e003894c48b6cb68df3cd9411b544 x-arg-heads-1:.6b57ee934bb2996050540f84cdfc8dcad1e7267d+2114148793524fd045998f71a45b0aaf139f752b (glob)
>  
>    $ cat error.log
>  
> diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-hgweb-commands.t
> --- a/tests/test-hgweb-commands.t	Sat Apr 30 11:16:52 2011 +0200
> +++ b/tests/test-hgweb-commands.t	Sat Apr 30 19:43:07 2011 +0800
> @@ -952,7 +952,7 @@
>    $ "$TESTDIR/get-with-headers.py" 127.0.0.1:$HGPORT '?cmd=capabilities'; echo
>    200 Script output follows
>    
> -  lookup changegroupsubset branchmap pushkey known getbundle unbundlehash unbundle=HG10GZ,HG10BZ,HG10UN
> +  lookup changegroupsubset branchmap pushkey known getbundle unbundlehash unbundle=HG10GZ,HG10BZ,HG10UN httpheader=1024
>  
>  heads
>  
> diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-http-proxy.t
> --- a/tests/test-http-proxy.t	Sat Apr 30 11:16:52 2011 +0200
> +++ b/tests/test-http-proxy.t	Sat Apr 30 19:43:07 2011 +0800
> @@ -100,21 +100,21 @@
>    $ cat proxy.log
>    * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
>    * - - [*] "GET http://localhost:$HGPORT/?cmd=stream_out HTTP/1.1" - - (glob)
> -  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
> +  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys HTTP/1.1" - - x-arg-namespace-1:.bookmarks (glob)
>    * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
>    * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
> -  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
> -  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
> +  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle HTTP/1.1" - - x-arg-common-1:.0000000000000000000000000000000000000000 x-arg-heads-1:.83180e7845de420a1bb46896fd5fe05294f8d629 (glob)
> +  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys HTTP/1.1" - - x-arg-namespace-1:.bookmarks (glob)
>    * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
>    * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
> -  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
> -  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
> +  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle HTTP/1.1" - - x-arg-common-1:.0000000000000000000000000000000000000000 x-arg-heads-1:.83180e7845de420a1bb46896fd5fe05294f8d629 (glob)
> +  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys HTTP/1.1" - - x-arg-namespace-1:.bookmarks (glob)
>    * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
>    * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
> -  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
> -  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
> +  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle HTTP/1.1" - - x-arg-common-1:.0000000000000000000000000000000000000000 x-arg-heads-1:.83180e7845de420a1bb46896fd5fe05294f8d629 (glob)
> +  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys HTTP/1.1" - - x-arg-namespace-1:.bookmarks (glob)
>    * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
>    * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
> -  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
> -  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
> +  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle HTTP/1.1" - - x-arg-common-1:.0000000000000000000000000000000000000000 x-arg-heads-1:.83180e7845de420a1bb46896fd5fe05294f8d629 (glob)
> +  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys HTTP/1.1" - - x-arg-namespace-1:.bookmarks (glob)
>  
> diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-push-http.t
> --- a/tests/test-push-http.t	Sat Apr 30 11:16:52 2011 +0200
> +++ b/tests/test-push-http.t	Sat Apr 30 19:43:07 2011 +0800
> @@ -66,6 +66,23 @@
>    repository tip rolled back to revision 0 (undo serve)
>    working directory now based on revision 0
>  
> +expect success, server lacks the httpheader capability
> +
> +  $ CAP=httpheader
> +  $ . "$TESTDIR/notcapable"
> +  $ req
> +  pushing to http://localhost:$HGPORT/
> +  searching for changes
> +  remote: adding changesets
> +  remote: adding manifests
> +  remote: adding file changes
> +  remote: added 1 changesets with 1 changes to 1 files
> +  remote: changegroup hook: HG_NODE=ba677d0156c1196c1a699fa53f390dcfc3ce3872 HG_SOURCE=serve HG_URL=remote:http:*:  (glob)
> +  % serve errors
> +  $ hg rollback
> +  repository tip rolled back to revision 0 (undo serve)
> +  working directory now based on revision 0
> +
>  expect success, server lacks the unbundlehash capability
>  
>    $ CAP=unbundlehash
> diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-unbundlehash.t
> --- a/tests/test-unbundlehash.t	Sat Apr 30 11:16:52 2011 +0200
> +++ b/tests/test-unbundlehash.t	Sat Apr 30 19:43:07 2011 +0800
> @@ -28,4 +28,4 @@
>  The hash here is always the same since the remote repository only has the null head.
>  
>    $ cat access.log | grep unbundle
> -  * - - [*] "POST /?cmd=unbundle&heads=686173686564+6768033e216468247bd031a0a2d9876d79818f8f HTTP/1.1" 200 - (glob)
> +  * - - [*] "POST /?cmd=unbundle HTTP/1.1" 200 - x-arg-heads-1:.686173686564+6768033e216468247bd031a0a2d9876d79818f8f (glob)
> diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-wireproto.t
> --- a/tests/test-wireproto.t	Sat Apr 30 11:16:52 2011 +0200
> +++ b/tests/test-wireproto.t	Sat Apr 30 19:43:07 2011 +0800
> @@ -23,6 +23,8 @@
>  
>    $ hg debugwireargs http://localhost:$HGPORT/ un deux trois quatre
>    un deux trois quatre None
> +  $ hg debugwireargs http://localhost:$HGPORT/ \ un deux trois\  qu\ \ atre
> +   un deux trois  qu  atre None
>    $ hg debugwireargs http://localhost:$HGPORT/ eins zwei --four vier
>    eins zwei None vier None
>    $ hg debugwireargs http://localhost:$HGPORT/ eins zwei
> @@ -31,6 +33,40 @@
>    eins zwei None None None
>    $ cat access.log
>    * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.quatre x-arg-one-1:.un x-arg-three-1:.trois x-arg-two-1:.deux (glob)
> +  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.quatre x-arg-one-1:.un x-arg-three-1:.trois x-arg-two-1:.deux (glob)
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.qu++atre x-arg-one-1:.+un x-arg-three-1:.trois+ x-arg-two-1:.deux (glob)
> +  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.qu++atre x-arg-one-1:.+un x-arg-three-1:.trois+ x-arg-two-1:.deux (glob)
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.vier x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
> +  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.vier x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
> +  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> +  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
> +  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
> +
> +HTTP without the httpheader capability:
> +
> +  $ HGRCPATH="`pwd`/repo/.hgrc"
> +  $ CAP=httpheader
> +  $ . "$TESTDIR/notcapable"
> +
> +  $ hg serve -R repo -p $HGPORT2 -d --pid-file=hg2.pid -E error2.log -A access2.log
> +  $ cat hg2.pid >> $DAEMON_PIDS
> +
> +  $ hg debugwireargs http://localhost:$HGPORT2/ un deux trois quatre
> +  un deux trois quatre None
> +  $ hg debugwireargs http://localhost:$HGPORT2/ eins zwei --four vier
> +  eins zwei None vier None
> +  $ hg debugwireargs http://localhost:$HGPORT2/ eins zwei
> +  eins zwei None None None
> +  $ hg debugwireargs http://localhost:$HGPORT2/ eins zwei --five fuenf
> +  eins zwei None None None
> +  $ cat access2.log
> +  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
>    * - - [*] "GET /?cmd=debugwireargs&four=quatre&one=un&three=trois&two=deux HTTP/1.1" 200 - (glob)
>    * - - [*] "GET /?cmd=debugwireargs&four=quatre&one=un&three=trois&two=deux HTTP/1.1" 200 - (glob)
>    * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> diff -r 139fb11210bb -r 9ae060e1ef69 tests/tinyproxy.py
> --- a/tests/tinyproxy.py	Sat Apr 30 11:16:52 2011 +0200
> +++ b/tests/tinyproxy.py	Sat Apr 30 19:43:07 2011 +0800
> @@ -30,6 +30,12 @@
>          else:
>              self.__base_handle()
>  
> +    def log_request(self, code='-', size='-'):
> +        xheaders = [h for h in self.headers.items() if h[0].startswith('x-')]
> +        self.log_message('"%s" %s %s%s',
> +                         self.requestline, str(code), str(size),
> +                         ''.join([' %s:%s' % h for h in sorted(xheaders)]))
> +
>      def _connect_to(self, netloc, soc):
>          i = netloc.find(':')
>          if i >= 0:


-- 
Mathematics is the supreme nostalgia of our time.




More information about the Mercurial-devel mailing list