[PATCH] httprepo: long arguments support (issue2126)
Matt Mackall
mpm at selenic.com
Sat Apr 30 07:54:05 CDT 2011
On Sat, 2011-04-30 at 20:35 +0800, Steven Brown wrote:
> # HG changeset patch
> # User Steven Brown <StevenGBrown at gmail.com>
> # Date 1304163787 -28800
> # Node ID 9ae060e1ef6955a05965dde63a4782a78065b6cf
> # Parent 139fb11210bb4fc1206d69bbe56b3a15b934bc11
> httprepo: long arguments support (issue2126)
>
> Send the command arguments in the HTTP headers. The command is still part
> of the URL. If the server does not have the 'httpheader' capability, the
> client will send the command arguments in the URL as it did previously.
>
> Web servers typically allow more data to be placed within the headers than
> in the URL, so this approach will:
> - Avoid HTTP errors due to using a URL that is too large.
> - Allow Mercurial to implement a more efficient wire protocol.
>
> An alternate approach is to send the arguments as part of the request body.
> This approach has been rejected because it requires the use of POST
> requests, so it would break any existing configuration that relies on the
> request type for authentication or caching.
>
> Extensibility:
> - The header size is provided by the server, which makes it possible to
> introduce an hgrc setting for it.
> - The client ignores the capability value after the first comma, which
> allows more information to be included in the future.
I don't know if this format is compact enough. While IIS has a limit
based on total request size, Apache has a fairly small limit on number
of header lines (~100 by default), though it allows those lines to be
quite long (~8k by default).
In your approach, we're using one arg per header, which means we have a
limited number of args, with a limited size each. We can't have one 50k
arg and we can't have 500 100-byte args.
Instead, we probably want to build one big request string and then break
it across a bunch of large headers. Then the receive can combine them
all and unpack.
> diff -r 139fb11210bb -r 9ae060e1ef69 mercurial/hgweb/protocol.py
> --- a/mercurial/hgweb/protocol.py Sat Apr 30 11:16:52 2011 +0200
> +++ b/mercurial/hgweb/protocol.py Sat Apr 30 19:43:07 2011 +0800
> @@ -5,12 +5,14 @@
> # This software may be used and distributed according to the terms of the
> # GNU General Public License version 2 or any later version.
>
> -import cStringIO, zlib, sys, urllib
> +import cStringIO, itertools, re, zlib, sys, urllib
> from mercurial import util, wireproto
> from common import HTTP_OK
>
> HGTYPE = 'application/mercurial-0.1'
>
> +argprefix = 'HTTP_X_ARG_'
> +
> class webproto(object):
> def __init__(self, req):
> self.req = req
> @@ -21,13 +23,33 @@
> for k in keys:
> if k == '*':
> star = {}
> - for key in self.req.form.keys():
> + for key in self._allargs():
> if key != 'cmd' and key not in keys:
> - star[key] = self.req.form[key][0]
> + star[key] = self._arg(key)
> data['*'] = star
> else:
> - data[k] = self.req.form[k][0]
> + data[k] = self._arg(k)
> return [data[k] for k in keys]
> + def _allargs(self):
> + args = set(self.req.form.keys())
> + r = re.compile(re.escape(argprefix) + '([^_]*)')
> + for header in self.req.env.keys():
> + m = r.match(header)
> + if m:
> + args.add(m.group(1).lower())
> + return args
> + def _arg(self, argname):
> + if argname in self.req.form:
> + return self.req.form[argname][0]
> + chunks = []
> + for i in itertools.count(1):
> + h = self.req.env.get(argprefix + argname.upper() + '_' + str(i))
> + if h is None:
> + break
> + chunks += [h]
> + if not chunks:
> + raise KeyError(argname)
> + return urllib.unquote_plus(''.join(chunks)[1:])
> def getfile(self, fp):
> length = int(self.req.env['CONTENT_LENGTH'])
> for s in util.filechunkiter(self.req, limit=length):
> diff -r 139fb11210bb -r 9ae060e1ef69 mercurial/hgweb/server.py
> --- a/mercurial/hgweb/server.py Sat Apr 30 11:16:52 2011 +0200
> +++ b/mercurial/hgweb/server.py Sat Apr 30 19:43:07 2011 +0800
> @@ -59,6 +59,12 @@
> def log_message(self, format, *args):
> self._log_any(self.server.accesslog, format, *args)
>
> + def log_request(self, code='-', size='-'):
> + xheaders = [h for h in self.headers.items() if h[0].startswith('x-')]
> + self.log_message('"%s" %s %s%s',
> + self.requestline, str(code), str(size),
> + ''.join([' %s:%s' % h for h in sorted(xheaders)]))
> +
> def do_write(self):
> try:
> self.do_hgweb()
> diff -r 139fb11210bb -r 9ae060e1ef69 mercurial/httprepo.py
> --- a/mercurial/httprepo.py Sat Apr 30 11:16:52 2011 +0200
> +++ b/mercurial/httprepo.py Sat Apr 30 19:43:07 2011 +0800
> @@ -76,7 +76,31 @@
> data = args.pop('data', None)
> headers = args.pop('headers', {})
> self.ui.debug("sending %s command\n" % cmd)
> - q = [('cmd', cmd)] + sorted(args.items())
> + q = [('cmd', cmd)]
> + headersize = 0
> + if len(args) > 0:
> + httpheader = self.capable('httpheader')
> + if httpheader:
> + headersize = int(httpheader.split(',')[0])
> + if headersize > 0:
> + # The headers can typically carry more data than the URL.
> + prefix = 'X-Arg-'
> + argheaders = []
> + for name, value in args.items():
> + # The header content must: a) be non-empty b) contain only
> + # ISO 8859-1 characters and c) contain no spaces.
> + encvalue = '.' + urllib.quote_plus(value)
> + headerfmt = prefix + name + '-%s'
> + contentlen = headersize - len(headerfmt % '000' + ': \r\n')
> + headerindex = 1
> + for i in xrange(0, len(encvalue), contentlen):
> + header = headerfmt % str(headerindex)
> + headerindex += 1
> + argheaders += [header]
> + headers[header] = encvalue[i:i + contentlen]
> + headers['Vary'] = ','.join(sorted(argheaders))
> + else:
> + q += sorted(args.items())
> qs = '?%s' % urllib.urlencode(q)
> cu = "%s%s" % (self._url, qs)
> req = urllib2.Request(cu, data, headers)
> diff -r 139fb11210bb -r 9ae060e1ef69 mercurial/wireproto.py
> --- a/mercurial/wireproto.py Sat Apr 30 11:16:52 2011 +0200
> +++ b/mercurial/wireproto.py Sat Apr 30 19:43:07 2011 +0800
> @@ -233,6 +233,7 @@
> else:
> caps.append('streamreqs=%s' % ','.join(requiredformats))
> caps.append('unbundle=%s' % ','.join(changegroupmod.bundlepriority))
> + caps.append('httpheader=1024')
> return ' '.join(caps)
>
> def changegroup(repo, proto, roots):
> diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-getbundle.t
> --- a/tests/test-getbundle.t Sat Apr 30 11:16:52 2011 +0200
> +++ b/tests/test-getbundle.t Sat Apr 30 19:43:07 2011 +0800
> @@ -247,7 +247,7 @@
> * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> * - - [*] "GET /?cmd=getbundle HTTP/1.1" 200 - (glob)
> * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> - * - - [*] "GET /?cmd=getbundle&common=c1818a9f5977dd4139a48f93f5425c67d44a9368+ea919464b16e003894c48b6cb68df3cd9411b544&heads=6b57ee934bb2996050540f84cdfc8dcad1e7267d+2114148793524fd045998f71a45b0aaf139f752b HTTP/1.1" 200 - (glob)
> + * - - [*] "GET /?cmd=getbundle HTTP/1.1" 200 - x-arg-common-1:.c1818a9f5977dd4139a48f93f5425c67d44a9368+ea919464b16e003894c48b6cb68df3cd9411b544 x-arg-heads-1:.6b57ee934bb2996050540f84cdfc8dcad1e7267d+2114148793524fd045998f71a45b0aaf139f752b (glob)
>
> $ cat error.log
>
> diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-hgweb-commands.t
> --- a/tests/test-hgweb-commands.t Sat Apr 30 11:16:52 2011 +0200
> +++ b/tests/test-hgweb-commands.t Sat Apr 30 19:43:07 2011 +0800
> @@ -952,7 +952,7 @@
> $ "$TESTDIR/get-with-headers.py" 127.0.0.1:$HGPORT '?cmd=capabilities'; echo
> 200 Script output follows
>
> - lookup changegroupsubset branchmap pushkey known getbundle unbundlehash unbundle=HG10GZ,HG10BZ,HG10UN
> + lookup changegroupsubset branchmap pushkey known getbundle unbundlehash unbundle=HG10GZ,HG10BZ,HG10UN httpheader=1024
>
> heads
>
> diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-http-proxy.t
> --- a/tests/test-http-proxy.t Sat Apr 30 11:16:52 2011 +0200
> +++ b/tests/test-http-proxy.t Sat Apr 30 19:43:07 2011 +0800
> @@ -100,21 +100,21 @@
> $ cat proxy.log
> * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
> * - - [*] "GET http://localhost:$HGPORT/?cmd=stream_out HTTP/1.1" - - (glob)
> - * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
> + * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys HTTP/1.1" - - x-arg-namespace-1:.bookmarks (glob)
> * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
> * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
> - * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
> - * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
> + * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle HTTP/1.1" - - x-arg-common-1:.0000000000000000000000000000000000000000 x-arg-heads-1:.83180e7845de420a1bb46896fd5fe05294f8d629 (glob)
> + * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys HTTP/1.1" - - x-arg-namespace-1:.bookmarks (glob)
> * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
> * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
> - * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
> - * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
> + * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle HTTP/1.1" - - x-arg-common-1:.0000000000000000000000000000000000000000 x-arg-heads-1:.83180e7845de420a1bb46896fd5fe05294f8d629 (glob)
> + * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys HTTP/1.1" - - x-arg-namespace-1:.bookmarks (glob)
> * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
> * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
> - * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
> - * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
> + * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle HTTP/1.1" - - x-arg-common-1:.0000000000000000000000000000000000000000 x-arg-heads-1:.83180e7845de420a1bb46896fd5fe05294f8d629 (glob)
> + * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys HTTP/1.1" - - x-arg-namespace-1:.bookmarks (glob)
> * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
> * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
> - * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
> - * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
> + * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle HTTP/1.1" - - x-arg-common-1:.0000000000000000000000000000000000000000 x-arg-heads-1:.83180e7845de420a1bb46896fd5fe05294f8d629 (glob)
> + * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys HTTP/1.1" - - x-arg-namespace-1:.bookmarks (glob)
>
> diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-push-http.t
> --- a/tests/test-push-http.t Sat Apr 30 11:16:52 2011 +0200
> +++ b/tests/test-push-http.t Sat Apr 30 19:43:07 2011 +0800
> @@ -66,6 +66,23 @@
> repository tip rolled back to revision 0 (undo serve)
> working directory now based on revision 0
>
> +expect success, server lacks the httpheader capability
> +
> + $ CAP=httpheader
> + $ . "$TESTDIR/notcapable"
> + $ req
> + pushing to http://localhost:$HGPORT/
> + searching for changes
> + remote: adding changesets
> + remote: adding manifests
> + remote: adding file changes
> + remote: added 1 changesets with 1 changes to 1 files
> + remote: changegroup hook: HG_NODE=ba677d0156c1196c1a699fa53f390dcfc3ce3872 HG_SOURCE=serve HG_URL=remote:http:*: (glob)
> + % serve errors
> + $ hg rollback
> + repository tip rolled back to revision 0 (undo serve)
> + working directory now based on revision 0
> +
> expect success, server lacks the unbundlehash capability
>
> $ CAP=unbundlehash
> diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-unbundlehash.t
> --- a/tests/test-unbundlehash.t Sat Apr 30 11:16:52 2011 +0200
> +++ b/tests/test-unbundlehash.t Sat Apr 30 19:43:07 2011 +0800
> @@ -28,4 +28,4 @@
> The hash here is always the same since the remote repository only has the null head.
>
> $ cat access.log | grep unbundle
> - * - - [*] "POST /?cmd=unbundle&heads=686173686564+6768033e216468247bd031a0a2d9876d79818f8f HTTP/1.1" 200 - (glob)
> + * - - [*] "POST /?cmd=unbundle HTTP/1.1" 200 - x-arg-heads-1:.686173686564+6768033e216468247bd031a0a2d9876d79818f8f (glob)
> diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-wireproto.t
> --- a/tests/test-wireproto.t Sat Apr 30 11:16:52 2011 +0200
> +++ b/tests/test-wireproto.t Sat Apr 30 19:43:07 2011 +0800
> @@ -23,6 +23,8 @@
>
> $ hg debugwireargs http://localhost:$HGPORT/ un deux trois quatre
> un deux trois quatre None
> + $ hg debugwireargs http://localhost:$HGPORT/ \ un deux trois\ qu\ \ atre
> + un deux trois qu atre None
> $ hg debugwireargs http://localhost:$HGPORT/ eins zwei --four vier
> eins zwei None vier None
> $ hg debugwireargs http://localhost:$HGPORT/ eins zwei
> @@ -31,6 +33,40 @@
> eins zwei None None None
> $ cat access.log
> * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.quatre x-arg-one-1:.un x-arg-three-1:.trois x-arg-two-1:.deux (glob)
> + * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.quatre x-arg-one-1:.un x-arg-three-1:.trois x-arg-two-1:.deux (glob)
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.qu++atre x-arg-one-1:.+un x-arg-three-1:.trois+ x-arg-two-1:.deux (glob)
> + * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.qu++atre x-arg-one-1:.+un x-arg-three-1:.trois+ x-arg-two-1:.deux (glob)
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.vier x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
> + * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.vier x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
> + * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> + * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
> + * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
> +
> +HTTP without the httpheader capability:
> +
> + $ HGRCPATH="`pwd`/repo/.hgrc"
> + $ CAP=httpheader
> + $ . "$TESTDIR/notcapable"
> +
> + $ hg serve -R repo -p $HGPORT2 -d --pid-file=hg2.pid -E error2.log -A access2.log
> + $ cat hg2.pid >> $DAEMON_PIDS
> +
> + $ hg debugwireargs http://localhost:$HGPORT2/ un deux trois quatre
> + un deux trois quatre None
> + $ hg debugwireargs http://localhost:$HGPORT2/ eins zwei --four vier
> + eins zwei None vier None
> + $ hg debugwireargs http://localhost:$HGPORT2/ eins zwei
> + eins zwei None None None
> + $ hg debugwireargs http://localhost:$HGPORT2/ eins zwei --five fuenf
> + eins zwei None None None
> + $ cat access2.log
> + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> * - - [*] "GET /?cmd=debugwireargs&four=quatre&one=un&three=trois&two=deux HTTP/1.1" 200 - (glob)
> * - - [*] "GET /?cmd=debugwireargs&four=quatre&one=un&three=trois&two=deux HTTP/1.1" 200 - (glob)
> * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
> diff -r 139fb11210bb -r 9ae060e1ef69 tests/tinyproxy.py
> --- a/tests/tinyproxy.py Sat Apr 30 11:16:52 2011 +0200
> +++ b/tests/tinyproxy.py Sat Apr 30 19:43:07 2011 +0800
> @@ -30,6 +30,12 @@
> else:
> self.__base_handle()
>
> + def log_request(self, code='-', size='-'):
> + xheaders = [h for h in self.headers.items() if h[0].startswith('x-')]
> + self.log_message('"%s" %s %s%s',
> + self.requestline, str(code), str(size),
> + ''.join([' %s:%s' % h for h in sorted(xheaders)]))
> +
> def _connect_to(self, netloc, soc):
> i = netloc.find(':')
> if i >= 0:
--
Mathematics is the supreme nostalgia of our time.
More information about the Mercurial-devel
mailing list