[PATCH] httprepo: long arguments support (issue2126)

Steven Brown stevengbrown at gmail.com
Sat Apr 30 07:35:56 CDT 2011


# HG changeset patch
# User Steven Brown <StevenGBrown at gmail.com>
# Date 1304163787 -28800
# Node ID 9ae060e1ef6955a05965dde63a4782a78065b6cf
# Parent  139fb11210bb4fc1206d69bbe56b3a15b934bc11
httprepo: long arguments support (issue2126)

Send the command arguments in the HTTP headers. The command is still part
of the URL. If the server does not have the 'httpheader' capability, the
client will send the command arguments in the URL as it did previously.

Web servers typically allow more data to be placed within the headers than
in the URL, so this approach will:
- Avoid HTTP errors due to using a URL that is too large.
- Allow Mercurial to implement a more efficient wire protocol.

An alternate approach is to send the arguments as part of the request body.
This approach has been rejected because it requires the use of POST
requests, so it would break any existing configuration that relies on the
request type for authentication or caching.

Extensibility:
- The header size is provided by the server, which makes it possible to
  introduce an hgrc setting for it.
- The client ignores the capability value after the first comma, which
  allows more information to be included in the future.

diff -r 139fb11210bb -r 9ae060e1ef69 mercurial/hgweb/protocol.py
--- a/mercurial/hgweb/protocol.py	Sat Apr 30 11:16:52 2011 +0200
+++ b/mercurial/hgweb/protocol.py	Sat Apr 30 19:43:07 2011 +0800
@@ -5,12 +5,14 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
-import cStringIO, zlib, sys, urllib
+import cStringIO, itertools, re, zlib, sys, urllib
 from mercurial import util, wireproto
 from common import HTTP_OK
 
 HGTYPE = 'application/mercurial-0.1'
 
+argprefix = 'HTTP_X_ARG_'
+
 class webproto(object):
     def __init__(self, req):
         self.req = req
@@ -21,13 +23,33 @@
         for k in keys:
             if k == '*':
                 star = {}
-                for key in self.req.form.keys():
+                for key in self._allargs():
                     if key != 'cmd' and key not in keys:
-                        star[key] = self.req.form[key][0]
+                        star[key] = self._arg(key)
                 data['*'] = star
             else:
-                data[k] = self.req.form[k][0]
+                data[k] = self._arg(k)
         return [data[k] for k in keys]
+    def _allargs(self):
+        args = set(self.req.form.keys())
+        r = re.compile(re.escape(argprefix) + '([^_]*)')
+        for header in self.req.env.keys():
+            m = r.match(header)
+            if m:
+                args.add(m.group(1).lower())
+        return args
+    def _arg(self, argname):
+        if argname in self.req.form:
+            return self.req.form[argname][0]
+        chunks = []
+        for i in itertools.count(1):
+            h = self.req.env.get(argprefix + argname.upper() + '_' + str(i))
+            if h is None:
+                break
+            chunks += [h]
+        if not chunks:
+            raise KeyError(argname)
+        return urllib.unquote_plus(''.join(chunks)[1:])
     def getfile(self, fp):
         length = int(self.req.env['CONTENT_LENGTH'])
         for s in util.filechunkiter(self.req, limit=length):
diff -r 139fb11210bb -r 9ae060e1ef69 mercurial/hgweb/server.py
--- a/mercurial/hgweb/server.py	Sat Apr 30 11:16:52 2011 +0200
+++ b/mercurial/hgweb/server.py	Sat Apr 30 19:43:07 2011 +0800
@@ -59,6 +59,12 @@
     def log_message(self, format, *args):
         self._log_any(self.server.accesslog, format, *args)
 
+    def log_request(self, code='-', size='-'):
+        xheaders = [h for h in self.headers.items() if h[0].startswith('x-')]
+        self.log_message('"%s" %s %s%s',
+                         self.requestline, str(code), str(size),
+                         ''.join([' %s:%s' % h for h in sorted(xheaders)]))
+
     def do_write(self):
         try:
             self.do_hgweb()
diff -r 139fb11210bb -r 9ae060e1ef69 mercurial/httprepo.py
--- a/mercurial/httprepo.py	Sat Apr 30 11:16:52 2011 +0200
+++ b/mercurial/httprepo.py	Sat Apr 30 19:43:07 2011 +0800
@@ -76,7 +76,31 @@
         data = args.pop('data', None)
         headers = args.pop('headers', {})
         self.ui.debug("sending %s command\n" % cmd)
-        q = [('cmd', cmd)] + sorted(args.items())
+        q = [('cmd', cmd)]
+        headersize = 0
+        if len(args) > 0:
+            httpheader = self.capable('httpheader')
+            if httpheader:
+                headersize = int(httpheader.split(',')[0])
+        if headersize > 0:
+            # The headers can typically carry more data than the URL.
+            prefix = 'X-Arg-'
+            argheaders = []
+            for name, value in args.items():
+                # The header content must: a) be non-empty b) contain only
+                # ISO 8859-1 characters and c) contain no spaces.
+                encvalue = '.' + urllib.quote_plus(value)
+                headerfmt = prefix + name + '-%s'
+                contentlen = headersize - len(headerfmt % '000' + ': \r\n')
+                headerindex = 1
+                for i in xrange(0, len(encvalue), contentlen):
+                    header = headerfmt % str(headerindex)
+                    headerindex += 1
+                    argheaders += [header]
+                    headers[header] = encvalue[i:i + contentlen]
+            headers['Vary'] = ','.join(sorted(argheaders))
+        else:
+            q += sorted(args.items())
         qs = '?%s' % urllib.urlencode(q)
         cu = "%s%s" % (self._url, qs)
         req = urllib2.Request(cu, data, headers)
diff -r 139fb11210bb -r 9ae060e1ef69 mercurial/wireproto.py
--- a/mercurial/wireproto.py	Sat Apr 30 11:16:52 2011 +0200
+++ b/mercurial/wireproto.py	Sat Apr 30 19:43:07 2011 +0800
@@ -233,6 +233,7 @@
         else:
             caps.append('streamreqs=%s' % ','.join(requiredformats))
     caps.append('unbundle=%s' % ','.join(changegroupmod.bundlepriority))
+    caps.append('httpheader=1024')
     return ' '.join(caps)
 
 def changegroup(repo, proto, roots):
diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-getbundle.t
--- a/tests/test-getbundle.t	Sat Apr 30 11:16:52 2011 +0200
+++ b/tests/test-getbundle.t	Sat Apr 30 19:43:07 2011 +0800
@@ -247,7 +247,7 @@
   * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
   * - - [*] "GET /?cmd=getbundle HTTP/1.1" 200 - (glob)
   * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
-  * - - [*] "GET /?cmd=getbundle&common=c1818a9f5977dd4139a48f93f5425c67d44a9368+ea919464b16e003894c48b6cb68df3cd9411b544&heads=6b57ee934bb2996050540f84cdfc8dcad1e7267d+2114148793524fd045998f71a45b0aaf139f752b HTTP/1.1" 200 - (glob)
+  * - - [*] "GET /?cmd=getbundle HTTP/1.1" 200 - x-arg-common-1:.c1818a9f5977dd4139a48f93f5425c67d44a9368+ea919464b16e003894c48b6cb68df3cd9411b544 x-arg-heads-1:.6b57ee934bb2996050540f84cdfc8dcad1e7267d+2114148793524fd045998f71a45b0aaf139f752b (glob)
 
   $ cat error.log
 
diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-hgweb-commands.t
--- a/tests/test-hgweb-commands.t	Sat Apr 30 11:16:52 2011 +0200
+++ b/tests/test-hgweb-commands.t	Sat Apr 30 19:43:07 2011 +0800
@@ -952,7 +952,7 @@
   $ "$TESTDIR/get-with-headers.py" 127.0.0.1:$HGPORT '?cmd=capabilities'; echo
   200 Script output follows
   
-  lookup changegroupsubset branchmap pushkey known getbundle unbundlehash unbundle=HG10GZ,HG10BZ,HG10UN
+  lookup changegroupsubset branchmap pushkey known getbundle unbundlehash unbundle=HG10GZ,HG10BZ,HG10UN httpheader=1024
 
 heads
 
diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-http-proxy.t
--- a/tests/test-http-proxy.t	Sat Apr 30 11:16:52 2011 +0200
+++ b/tests/test-http-proxy.t	Sat Apr 30 19:43:07 2011 +0800
@@ -100,21 +100,21 @@
   $ cat proxy.log
   * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=stream_out HTTP/1.1" - - (glob)
-  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
+  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys HTTP/1.1" - - x-arg-namespace-1:.bookmarks (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
-  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
-  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
+  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle HTTP/1.1" - - x-arg-common-1:.0000000000000000000000000000000000000000 x-arg-heads-1:.83180e7845de420a1bb46896fd5fe05294f8d629 (glob)
+  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys HTTP/1.1" - - x-arg-namespace-1:.bookmarks (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
-  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
-  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
+  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle HTTP/1.1" - - x-arg-common-1:.0000000000000000000000000000000000000000 x-arg-heads-1:.83180e7845de420a1bb46896fd5fe05294f8d629 (glob)
+  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys HTTP/1.1" - - x-arg-namespace-1:.bookmarks (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
-  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
-  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
+  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle HTTP/1.1" - - x-arg-common-1:.0000000000000000000000000000000000000000 x-arg-heads-1:.83180e7845de420a1bb46896fd5fe05294f8d629 (glob)
+  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys HTTP/1.1" - - x-arg-namespace-1:.bookmarks (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
-  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
-  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
+  * - - [*] "GET http://localhost:$HGPORT/?cmd=getbundle HTTP/1.1" - - x-arg-common-1:.0000000000000000000000000000000000000000 x-arg-heads-1:.83180e7845de420a1bb46896fd5fe05294f8d629 (glob)
+  * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys HTTP/1.1" - - x-arg-namespace-1:.bookmarks (glob)
 
diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-push-http.t
--- a/tests/test-push-http.t	Sat Apr 30 11:16:52 2011 +0200
+++ b/tests/test-push-http.t	Sat Apr 30 19:43:07 2011 +0800
@@ -66,6 +66,23 @@
   repository tip rolled back to revision 0 (undo serve)
   working directory now based on revision 0
 
+expect success, server lacks the httpheader capability
+
+  $ CAP=httpheader
+  $ . "$TESTDIR/notcapable"
+  $ req
+  pushing to http://localhost:$HGPORT/
+  searching for changes
+  remote: adding changesets
+  remote: adding manifests
+  remote: adding file changes
+  remote: added 1 changesets with 1 changes to 1 files
+  remote: changegroup hook: HG_NODE=ba677d0156c1196c1a699fa53f390dcfc3ce3872 HG_SOURCE=serve HG_URL=remote:http:*:  (glob)
+  % serve errors
+  $ hg rollback
+  repository tip rolled back to revision 0 (undo serve)
+  working directory now based on revision 0
+
 expect success, server lacks the unbundlehash capability
 
   $ CAP=unbundlehash
diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-unbundlehash.t
--- a/tests/test-unbundlehash.t	Sat Apr 30 11:16:52 2011 +0200
+++ b/tests/test-unbundlehash.t	Sat Apr 30 19:43:07 2011 +0800
@@ -28,4 +28,4 @@
 The hash here is always the same since the remote repository only has the null head.
 
   $ cat access.log | grep unbundle
-  * - - [*] "POST /?cmd=unbundle&heads=686173686564+6768033e216468247bd031a0a2d9876d79818f8f HTTP/1.1" 200 - (glob)
+  * - - [*] "POST /?cmd=unbundle HTTP/1.1" 200 - x-arg-heads-1:.686173686564+6768033e216468247bd031a0a2d9876d79818f8f (glob)
diff -r 139fb11210bb -r 9ae060e1ef69 tests/test-wireproto.t
--- a/tests/test-wireproto.t	Sat Apr 30 11:16:52 2011 +0200
+++ b/tests/test-wireproto.t	Sat Apr 30 19:43:07 2011 +0800
@@ -23,6 +23,8 @@
 
   $ hg debugwireargs http://localhost:$HGPORT/ un deux trois quatre
   un deux trois quatre None
+  $ hg debugwireargs http://localhost:$HGPORT/ \ un deux trois\  qu\ \ atre
+   un deux trois  qu  atre None
   $ hg debugwireargs http://localhost:$HGPORT/ eins zwei --four vier
   eins zwei None vier None
   $ hg debugwireargs http://localhost:$HGPORT/ eins zwei
@@ -31,6 +33,40 @@
   eins zwei None None None
   $ cat access.log
   * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
+  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.quatre x-arg-one-1:.un x-arg-three-1:.trois x-arg-two-1:.deux (glob)
+  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.quatre x-arg-one-1:.un x-arg-three-1:.trois x-arg-two-1:.deux (glob)
+  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
+  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.qu++atre x-arg-one-1:.+un x-arg-three-1:.trois+ x-arg-two-1:.deux (glob)
+  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.qu++atre x-arg-one-1:.+un x-arg-three-1:.trois+ x-arg-two-1:.deux (glob)
+  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
+  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.vier x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
+  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-four-1:.vier x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
+  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
+  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
+  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
+  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
+  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
+  * - - [*] "GET /?cmd=debugwireargs HTTP/1.1" 200 - x-arg-one-1:.eins x-arg-two-1:.zwei (glob)
+
+HTTP without the httpheader capability:
+
+  $ HGRCPATH="`pwd`/repo/.hgrc"
+  $ CAP=httpheader
+  $ . "$TESTDIR/notcapable"
+
+  $ hg serve -R repo -p $HGPORT2 -d --pid-file=hg2.pid -E error2.log -A access2.log
+  $ cat hg2.pid >> $DAEMON_PIDS
+
+  $ hg debugwireargs http://localhost:$HGPORT2/ un deux trois quatre
+  un deux trois quatre None
+  $ hg debugwireargs http://localhost:$HGPORT2/ eins zwei --four vier
+  eins zwei None vier None
+  $ hg debugwireargs http://localhost:$HGPORT2/ eins zwei
+  eins zwei None None None
+  $ hg debugwireargs http://localhost:$HGPORT2/ eins zwei --five fuenf
+  eins zwei None None None
+  $ cat access2.log
+  * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
   * - - [*] "GET /?cmd=debugwireargs&four=quatre&one=un&three=trois&two=deux HTTP/1.1" 200 - (glob)
   * - - [*] "GET /?cmd=debugwireargs&four=quatre&one=un&three=trois&two=deux HTTP/1.1" 200 - (glob)
   * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob)
diff -r 139fb11210bb -r 9ae060e1ef69 tests/tinyproxy.py
--- a/tests/tinyproxy.py	Sat Apr 30 11:16:52 2011 +0200
+++ b/tests/tinyproxy.py	Sat Apr 30 19:43:07 2011 +0800
@@ -30,6 +30,12 @@
         else:
             self.__base_handle()
 
+    def log_request(self, code='-', size='-'):
+        xheaders = [h for h in self.headers.items() if h[0].startswith('x-')]
+        self.log_message('"%s" %s %s%s',
+                         self.requestline, str(code), str(size),
+                         ''.join([' %s:%s' % h for h in sorted(xheaders)]))
+
     def _connect_to(self, netloc, soc):
         i = netloc.find(':')
         if i >= 0:


More information about the Mercurial-devel mailing list