[PATCH 4 of 5 RFC] url: use new http support if requested by the user

Augie Fackler durin42 at gmail.com
Thu May 5 13:35:42 CDT 2011


# HG changeset patch
# User Augie Fackler <durin42 at gmail.com>
# Date 1304608214 18000
# Node ID 1e533203663e316ce2b01a781d981c4a3518132b
# Parent  ff30e8f54664ee31134cc24dba154d6bf6b8f741
url: use new http support if requested by the user

The new http library is wired in via an extra module, as it requires
similar but different plumbing to connect the library to Mercurial's
internals and urllib2.

diff --git a/mercurial/http2.py b/mercurial/http2.py
new file mode 100644
--- /dev/null
+++ b/mercurial/http2.py
@@ -0,0 +1,239 @@
+import httplib
+import logging
+import socket
+import urllib
+import urllib2
+import os
+
+from mercurial import http
+from mercurial import sslutil
+from mercurial import util
+from mercurial.i18n import _
+
+# moved here from url.py to avoid a cycle
+class httpsendfile(object):
+    """This is a wrapper around the objects returned by python's "open".
+
+    Its purpose is to send file-like objects via HTTP and, to do so, it
+    defines a __len__ attribute to feed the Content-Length header.
+    """
+
+    def __init__(self, ui, *args, **kwargs):
+        # We can't just "self._data = open(*args, **kwargs)" here because there
+        # is an "open" function defined in this module that shadows the global
+        # one
+        self.ui = ui
+        self._data = open(*args, **kwargs)
+        self.seek = self._data.seek
+        self.close = self._data.close
+        self.write = self._data.write
+        self._len = os.fstat(self._data.fileno()).st_size
+        self._pos = 0
+        self._total = len(self) / 1024 * 2
+
+    def read(self, *args, **kwargs):
+        try:
+            ret = self._data.read(*args, **kwargs)
+        except EOFError:
+            self.ui.progress(_('sending'), None)
+        self._pos += len(ret)
+        # We pass double the max for total because we currently have
+        # to send the bundle twice in the case of a server that
+        # requires authentication. Since we can't know until we try
+        # once whether authentication will be required, just lie to
+        # the user and maybe the push succeeds suddenly at 50%.
+        self.ui.progress(_('sending'), self._pos / 1024,
+                         unit=_('kb'), total=self._total)
+        return ret
+
+    def __len__(self):
+        return self._len
+
+# Mercurial (at least until we can remove the old codepath) requires
+# that the http response object be sufficiently file-like, so we
+# provide a close() method here.
+class HTTPResponse(http.HTTPResponse):
+    def close(self):
+        pass
+
+class HTTPConnection(http.HTTPConnection):
+    response_class = HTTPResponse
+    def request(self, method, uri, body=None, headers={}):
+        if isinstance(body, httpsendfile):
+            body.seek(0)
+        http.HTTPConnection.request(self, method, uri, body=body,
+                                    headers=headers)
+
+
+_configuredlogging = False
+# Subclass BOTH of these because otherwise urllib2 "helpfully"
+# reinserts them since it notices we don't include any subclasses of
+# them.
+class http2handler(urllib2.HTTPHandler, urllib2.HTTPSHandler):
+    def __init__(self, ui, pwmgr):
+        global _configuredlogging
+        urllib2.AbstractHTTPHandler.__init__(self)
+        self.ui = ui
+        self.pwmgr = pwmgr
+        self._connections = {}
+        loglevel = ui.config('ui', 'http2debuglevel', default=None)
+        if loglevel and not _configuredlogging:
+            _configuredlogging = True
+            logger = logging.getLogger('mercurial.http')
+            logger.setLevel(getattr(logging, loglevel.upper()))
+            logger.addHandler(logging.StreamHandler())
+
+    def close_all(self):
+        """Close and remove all connection objects being kept for reuse."""
+        for openconns in self._connections.values():
+            for conn in openconns:
+                conn.close()
+        self._connections = {}
+
+    # shamelessly borrowed from urllib2.AbstractHTTPHandler
+    def do_open(self, http_class, req):
+        """Return an addinfourl object for the request, using http_class.
+
+        http_class must implement the HTTPConnection API from httplib.
+        The addinfourl return value is a file-like object.  It also
+        has methods and attributes including:
+            - info(): return a mimetools.Message object for the headers
+            - geturl(): return the original request URL
+            - code: HTTP status code
+        """
+        # If using a proxy, the host returned by get_host() is
+        # actually the proxy. On Python 2.6.1, the real destination
+        # hostname is encoded in the URI in the urllib2 request
+        # object. On Python 2.6.5, it's stored in the _tunnel_host
+        # attribute which has no accessor.
+        tunhost = getattr(req, '_tunnel_host', None)
+        host = req.get_host()
+        if tunhost:
+            proxyhost = host
+            host = tunhost
+        elif req.has_proxy():
+            proxyhost = req.get_host()
+            host = req.get_selector().split('://', 1)[1].split('/', 1)[0]
+        else:
+            proxyhost = None
+
+        if proxyhost:
+            if ':' in proxyhost:
+                # Note: this means we'll explode if we try and use an
+                # IPv6 http proxy. This isn't a regression, so we
+                # won't worry about it for now.
+                proxyhost, proxyport = proxyhost.rsplit(':', 1)
+            else:
+                proxyport = 3128 # squid default
+            proxy = (proxyhost, proxyport)
+        else:
+            proxy = None
+
+        if not host:
+            raise urllib2.URLError('no host given')
+
+        allconns = self._connections.get((host, proxy), [])
+        conns = [c for c in allconns if not c.busy()]
+        if conns:
+            h = conns[0]
+        else:
+            if allconns:
+                self.ui.debug('all connections for %s busy, making a new '
+                              'one\n' % host)
+            timeout = None
+            if req.timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+                timeout = req.timeout
+            h = http_class(host, timeout=timeout, proxy_hostport=proxy)
+            self._connections.setdefault((host, proxy), []).append(h)
+
+        headers = dict(req.headers)
+        headers.update(req.unredirected_hdrs)
+        headers = dict(
+            (name.title(), val) for name, val in headers.items())
+        try:
+            path = req.get_selector()
+            if '://' in path:
+                path = path.split('://', 1)[1].split('/', 1)[1]
+            if path[0] != '/':
+                path = '/' + path
+            h.request(req.get_method(), path, req.data, headers)
+            r = h.getresponse()
+        except socket.error, err: # XXX what error?
+            raise urllib2.URLError(err)
+
+        # Pick apart the HTTPResponse object to get the addinfourl
+        # object initialized properly.
+
+        # Wrap the HTTPResponse object in socket's file object adapter
+        # for Windows.  That adapter calls recv(), so delegate recv()
+        # to read().  This weird wrapping allows the returned object to
+        # have readline() and readlines() methods.
+
+        # XXX It might be better to extract the read buffering code
+        # out of socket._fileobject() and into a base class.
+
+        r.recv = r.read
+        fp = socket._fileobject(r, close=True)
+
+        resp = urllib.addinfourl(r, r.headers, req.get_full_url())
+        resp.code = r.status
+        resp.msg = r.reason
+        return resp
+
+    # httplib always uses the given host/port as the socket connect
+    # target, and then allows full URIs in the request path, which it
+    # then observes and treats as a signal to do proxying instead.
+    def http_open(self, req):
+        # on Python 2.6.(3,4) and earlier:
+        # get_host is the proxy host iff we use a proxy
+        # get_origin_req_host is the original host if we use a proxy
+        # on python 2.6.5:
+        # ???
+        if req.get_full_url().startswith('https'):
+            return self.https_open(req)
+        return self.do_open(HTTPConnection, req)
+
+    def https_open(self, req):
+        from mercurial import url # TODO circular ref
+        res = url.readauthforuri(self.ui, req.get_full_url())
+        if res:
+            group, auth = res
+            self.auth = auth
+            self.ui.debug("using auth.%s.* for authentication\n" % group)
+        else:
+            self.auth = None
+        return self.do_open(self._makesslconnection, req)
+
+    def _makesslconnection(self, host, port=443, *args, **kwargs):
+        keyfile = None
+        certfile = None
+
+        if args: # key_file
+            keyfile = args.pop(0)
+        if args: # cert_file
+            certfile = args.pop(0)
+
+        # if the user has specified different key/cert files in
+        # hgrc, we prefer these
+        if self.auth and 'key' in self.auth and 'cert' in self.auth:
+            keyfile = self.auth['key']
+            certfile = self.auth['cert']
+
+        # let host port take precedence
+        if ':' in host and '[' not in host or ']:' in host:
+            host, port = host.rsplit(':', 1)
+            port = int(port)
+            if '[' in host:
+                host = host[1:-1]
+
+        if keyfile:
+            kwargs['keyfile'] = keyfile
+        if certfile:
+            kwargs['certfile'] = certfile
+
+        kwargs.update(sslutil.sslkwargs(self.ui, host))
+
+        con = HTTPConnection(host, port, use_ssl=True,
+                             ssl_validator=sslutil.validator(self.ui, host),
+                             **kwargs)
+        return con
diff --git a/mercurial/httprepo.py b/mercurial/httprepo.py
--- a/mercurial/httprepo.py
+++ b/mercurial/httprepo.py
@@ -8,7 +8,7 @@
 
 from node import nullid
 from i18n import _
-import changegroup, statichttprepo, error, url, util, wireproto
+import changegroup, statichttprepo, error, http2, url, util, wireproto
 import os, urllib, urllib2, zlib, httplib
 import errno, socket
 
@@ -180,7 +180,7 @@
                 break
 
         tempname = changegroup.writebundle(cg, None, type)
-        fp = url.httpsendfile(self.ui, tempname, "rb")
+        fp = http2.httpsendfile(self.ui, tempname, "rb")
         headers = {'Content-Type': 'application/mercurial-0.1'}
 
         try:
diff --git a/mercurial/url.py b/mercurial/url.py
--- a/mercurial/url.py
+++ b/mercurial/url.py
@@ -10,7 +10,7 @@
 import urllib, urllib2, httplib, os, socket, cStringIO
 import __builtin__
 from i18n import _
-import keepalive, util, sslutil
+import keepalive, util, sslutil, http2
 
 def readauthforuri(ui, uri):
     # Read configuration
@@ -149,48 +149,10 @@
 
         return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_)
 
-class httpsendfile(object):
-    """This is a wrapper around the objects returned by python's "open".
-
-    Its purpose is to send file-like objects via HTTP and, to do so, it
-    defines a __len__ attribute to feed the Content-Length header.
-    """
-
-    def __init__(self, ui, *args, **kwargs):
-        # We can't just "self._data = open(*args, **kwargs)" here because there
-        # is an "open" function defined in this module that shadows the global
-        # one
-        self.ui = ui
-        self._data = __builtin__.open(*args, **kwargs)
-        self.seek = self._data.seek
-        self.close = self._data.close
-        self.write = self._data.write
-        self._len = os.fstat(self._data.fileno()).st_size
-        self._pos = 0
-        self._total = len(self) / 1024 * 2
-
-    def read(self, *args, **kwargs):
-        try:
-            ret = self._data.read(*args, **kwargs)
-        except EOFError:
-            self.ui.progress(_('sending'), None)
-        self._pos += len(ret)
-        # We pass double the max for total because we currently have
-        # to send the bundle twice in the case of a server that
-        # requires authentication. Since we can't know until we try
-        # once whether authentication will be required, just lie to
-        # the user and maybe the push succeeds suddenly at 50%.
-        self.ui.progress(_('sending'), self._pos / 1024,
-                         unit=_('kb'), total=self._total)
-        return ret
-
-    def __len__(self):
-        return self._len
-
 def _gen_sendfile(orgsend):
     def _sendfile(self, data):
         # send a file
-        if isinstance(data, httpsendfile):
+        if isinstance(data, http2.httpsendfile):
             # if auth required, some data sent twice, so rewind here
             data.seek(0)
             for chunk in util.filechunkiter(data):
@@ -495,9 +457,12 @@
     construct an opener suitable for urllib2
     authinfo will be added to the password manager
     '''
-    handlers = [httphandler()]
-    if has_https:
-        handlers.append(httpshandler(ui))
+    if ui.configbool('ui', 'http2', False):
+        handlers = [http2.http2handler(ui, passwordmgr(ui))]
+    else:
+        handlers = [httphandler()]
+        if has_https:
+            handlers.append(httpshandler(ui))
 
     handlers.append(proxyhandler(ui))
 


More information about the Mercurial-devel mailing list