D2822: hgweb: support constructing URLs from an alternate base URL

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Mon Mar 12 17:38:35 EDT 2018


This revision was automatically updated to reflect the committed changes.
Closed by commit rHG219b23359f4c: hgweb: support constructing URLs from an alternate base URL (authored by indygreg, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2822?vs=6888&id=6952

REVISION DETAIL
  https://phab.mercurial-scm.org/D2822

AFFECTED FILES
  mercurial/hgweb/hgwebdir_mod.py
  mercurial/hgweb/request.py
  tests/test-wsgirequest.py

CHANGE DETAILS

diff --git a/tests/test-wsgirequest.py b/tests/test-wsgirequest.py
--- a/tests/test-wsgirequest.py
+++ b/tests/test-wsgirequest.py
@@ -23,11 +23,12 @@
     r'wsgi.run_once': False,
 }
 
-def parse(env, bodyfh=None, reponame=None, extra=None):
+def parse(env, bodyfh=None, reponame=None, altbaseurl=None, extra=None):
     env = dict(env)
     env.update(extra or {})
 
-    return requestmod.parserequestfromenv(env, bodyfh, reponame=reponame)
+    return requestmod.parserequestfromenv(env, bodyfh, reponame=reponame,
+                                          altbaseurl=altbaseurl)
 
 class ParseRequestTests(unittest.TestCase):
     def testdefault(self):
@@ -242,6 +243,174 @@
         self.assertEqual(r.dispatchpath, b'path1/path2')
         self.assertEqual(r.reponame, b'prefix/repo')
 
+    def testaltbaseurl(self):
+        # Simple hostname remap.
+        r = parse(DEFAULT_ENV, altbaseurl='http://altserver')
+
+        self.assertEqual(r.url, b'http://testserver')
+        self.assertEqual(r.baseurl, b'http://testserver')
+        self.assertEqual(r.advertisedurl, b'http://altserver')
+        self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+        self.assertEqual(r.urlscheme, b'http')
+        self.assertEqual(r.apppath, b'')
+        self.assertEqual(r.dispatchparts, [])
+        self.assertIsNone(r.dispatchpath)
+        self.assertIsNone(r.reponame)
+
+        # With a custom port.
+        r = parse(DEFAULT_ENV, altbaseurl='http://altserver:8000')
+        self.assertEqual(r.url, b'http://testserver')
+        self.assertEqual(r.baseurl, b'http://testserver')
+        self.assertEqual(r.advertisedurl, b'http://altserver:8000')
+        self.assertEqual(r.advertisedbaseurl, b'http://altserver:8000')
+        self.assertEqual(r.urlscheme, b'http')
+        self.assertEqual(r.apppath, b'')
+        self.assertEqual(r.dispatchparts, [])
+        self.assertIsNone(r.dispatchpath)
+        self.assertIsNone(r.reponame)
+
+        # With a changed protocol.
+        r = parse(DEFAULT_ENV, altbaseurl='https://altserver')
+        self.assertEqual(r.url, b'http://testserver')
+        self.assertEqual(r.baseurl, b'http://testserver')
+        self.assertEqual(r.advertisedurl, b'https://altserver')
+        self.assertEqual(r.advertisedbaseurl, b'https://altserver')
+        # URL scheme is defined as the actual scheme, not advertised.
+        self.assertEqual(r.urlscheme, b'http')
+        self.assertEqual(r.apppath, b'')
+        self.assertEqual(r.dispatchparts, [])
+        self.assertIsNone(r.dispatchpath)
+        self.assertIsNone(r.reponame)
+
+        # Need to specify explicit port number for proper https:// alt URLs.
+        r = parse(DEFAULT_ENV, altbaseurl='https://altserver:443')
+        self.assertEqual(r.url, b'http://testserver')
+        self.assertEqual(r.baseurl, b'http://testserver')
+        self.assertEqual(r.advertisedurl, b'https://altserver')
+        self.assertEqual(r.advertisedbaseurl, b'https://altserver')
+        self.assertEqual(r.urlscheme, b'http')
+        self.assertEqual(r.apppath, b'')
+        self.assertEqual(r.dispatchparts, [])
+        self.assertIsNone(r.dispatchpath)
+        self.assertIsNone(r.reponame)
+
+        # With only PATH_INFO defined.
+        r = parse(DEFAULT_ENV, altbaseurl='http://altserver', extra={
+            r'PATH_INFO': r'/path1/path2',
+        })
+        self.assertEqual(r.url, b'http://testserver/path1/path2')
+        self.assertEqual(r.baseurl, b'http://testserver')
+        self.assertEqual(r.advertisedurl, b'http://altserver/path1/path2')
+        self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+        self.assertEqual(r.urlscheme, b'http')
+        self.assertEqual(r.apppath, b'')
+        self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
+        self.assertEqual(r.dispatchpath, b'path1/path2')
+        self.assertIsNone(r.reponame)
+
+        # Path on alt URL.
+        r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath')
+        self.assertEqual(r.url, b'http://testserver')
+        self.assertEqual(r.baseurl, b'http://testserver')
+        self.assertEqual(r.advertisedurl, b'http://altserver/altpath')
+        self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+        self.assertEqual(r.urlscheme, b'http')
+        self.assertEqual(r.apppath, b'/altpath')
+        self.assertEqual(r.dispatchparts, [])
+        self.assertIsNone(r.dispatchpath)
+        self.assertIsNone(r.reponame)
+
+        # With a trailing slash.
+        r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath/')
+        self.assertEqual(r.url, b'http://testserver')
+        self.assertEqual(r.baseurl, b'http://testserver')
+        self.assertEqual(r.advertisedurl, b'http://altserver/altpath/')
+        self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+        self.assertEqual(r.urlscheme, b'http')
+        self.assertEqual(r.apppath, b'/altpath/')
+        self.assertEqual(r.dispatchparts, [])
+        self.assertIsNone(r.dispatchpath)
+        self.assertIsNone(r.reponame)
+
+        # PATH_INFO + path on alt URL.
+        r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath', extra={
+            r'PATH_INFO': r'/path1/path2',
+        })
+        self.assertEqual(r.url, b'http://testserver/path1/path2')
+        self.assertEqual(r.baseurl, b'http://testserver')
+        self.assertEqual(r.advertisedurl,
+                         b'http://altserver/altpath/path1/path2')
+        self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+        self.assertEqual(r.urlscheme, b'http')
+        self.assertEqual(r.apppath, b'/altpath')
+        self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
+        self.assertEqual(r.dispatchpath, b'path1/path2')
+        self.assertIsNone(r.reponame)
+
+        # PATH_INFO + path on alt URL with trailing slash.
+        r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath/', extra={
+            r'PATH_INFO': r'/path1/path2',
+        })
+        self.assertEqual(r.url, b'http://testserver/path1/path2')
+        self.assertEqual(r.baseurl, b'http://testserver')
+        self.assertEqual(r.advertisedurl,
+                         b'http://altserver/altpath//path1/path2')
+        self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+        self.assertEqual(r.urlscheme, b'http')
+        self.assertEqual(r.apppath, b'/altpath/')
+        self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
+        self.assertEqual(r.dispatchpath, b'path1/path2')
+        self.assertIsNone(r.reponame)
+
+        # Local SCRIPT_NAME is ignored.
+        r = parse(DEFAULT_ENV, altbaseurl='http://altserver', extra={
+            r'SCRIPT_NAME': r'/script',
+            r'PATH_INFO': r'/path1/path2',
+        })
+        self.assertEqual(r.url, b'http://testserver/script/path1/path2')
+        self.assertEqual(r.baseurl, b'http://testserver')
+        self.assertEqual(r.advertisedurl, b'http://altserver/path1/path2')
+        self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+        self.assertEqual(r.urlscheme, b'http')
+        self.assertEqual(r.apppath, b'')
+        self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
+        self.assertEqual(r.dispatchpath, b'path1/path2')
+        self.assertIsNone(r.reponame)
+
+        # Use remote's path for script name, app path
+        r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altroot', extra={
+            r'SCRIPT_NAME': r'/script',
+            r'PATH_INFO': r'/path1/path2',
+        })
+        self.assertEqual(r.url, b'http://testserver/script/path1/path2')
+        self.assertEqual(r.baseurl, b'http://testserver')
+        self.assertEqual(r.advertisedurl,
+                         b'http://altserver/altroot/path1/path2')
+        self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+        self.assertEqual(r.urlscheme, b'http')
+        self.assertEqual(r.apppath, b'/altroot')
+        self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
+        self.assertEqual(r.dispatchpath, b'path1/path2')
+        self.assertIsNone(r.reponame)
+
+        # reponame is factored in properly.
+        r = parse(DEFAULT_ENV, reponame=b'repo',
+                  altbaseurl='http://altserver/altroot',
+                  extra={
+                r'SCRIPT_NAME': r'/script',
+                r'PATH_INFO': r'/repo/path1/path2',
+            })
+
+        self.assertEqual(r.url, b'http://testserver/script/repo/path1/path2')
+        self.assertEqual(r.baseurl, b'http://testserver')
+        self.assertEqual(r.advertisedurl,
+                         b'http://altserver/altroot/repo/path1/path2')
+        self.assertEqual(r.advertisedbaseurl, b'http://altserver')
+        self.assertEqual(r.apppath, b'/altroot/repo')
+        self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
+        self.assertEqual(r.dispatchpath, b'path1/path2')
+        self.assertEqual(r.reponame, b'repo')
+
 if __name__ == '__main__':
     import silenttestrunner
     silenttestrunner.main(__name__)
diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -157,7 +157,7 @@
     # Request body input stream.
     bodyfh = attr.ib()
 
-def parserequestfromenv(env, bodyfh, reponame=None):
+def parserequestfromenv(env, bodyfh, reponame=None, altbaseurl=None):
     """Parse URL components from environment variables.
 
     WSGI defines request attributes via environment variables. This function
@@ -167,8 +167,18 @@
     string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
     This simulates the world view of a WSGI application that processes
     requests from the base URL of a repo.
+
+    If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
+    is defined, it is used - instead of the WSGI environment variables - for
+    constructing URL components up to and including the WSGI application path.
+    For example, if the current WSGI application is at ``/repo`` and a request
+    is made to ``/rev/@`` with this argument set to
+    ``http://myserver:9000/prefix``, the URL and path components will resolve as
+    if the request were to ``http://myserver:9000/prefix/rev/@``. In other
+    words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
+    ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
     """
-    # PEP-0333 defines the WSGI spec and is a useful reference for this code.
+    # PEP 3333 defines the WSGI spec and is a useful reference for this code.
 
     # We first validate that the incoming object conforms with the WSGI spec.
     # We only want to be dealing with spec-conforming WSGI implementations.
@@ -184,38 +194,67 @@
         env = {k: v.encode('latin-1') if isinstance(v, str) else v
                for k, v in env.iteritems()}
 
+    if altbaseurl:
+        altbaseurl = util.url(altbaseurl)
+
     # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
     # the environment variables.
     # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
     # how URLs are reconstructed.
     fullurl = env['wsgi.url_scheme'] + '://'
-    advertisedfullurl = fullurl
+
+    if altbaseurl and altbaseurl.scheme:
+        advertisedfullurl = altbaseurl.scheme + '://'
+    else:
+        advertisedfullurl = fullurl
 
-    def addport(s):
-        if env['wsgi.url_scheme'] == 'https':
-            if env['SERVER_PORT'] != '443':
-                s += ':' + env['SERVER_PORT']
+    def addport(s, port):
+        if s.startswith('https://'):
+            if port != '443':
+                s += ':' + port
         else:
-            if env['SERVER_PORT'] != '80':
-                s += ':' + env['SERVER_PORT']
+            if port != '80':
+                s += ':' + port
 
         return s
 
     if env.get('HTTP_HOST'):
         fullurl += env['HTTP_HOST']
     else:
         fullurl += env['SERVER_NAME']
-        fullurl = addport(fullurl)
+        fullurl = addport(fullurl, env['SERVER_PORT'])
+
+    if altbaseurl and altbaseurl.host:
+        advertisedfullurl += altbaseurl.host
 
-    advertisedfullurl += env['SERVER_NAME']
-    advertisedfullurl = addport(advertisedfullurl)
+        if altbaseurl.port:
+            port = altbaseurl.port
+        elif altbaseurl.scheme == 'http' and not altbaseurl.port:
+            port = '80'
+        elif altbaseurl.scheme == 'https' and not altbaseurl.port:
+            port = '443'
+        else:
+            port = env['SERVER_PORT']
+
+        advertisedfullurl = addport(advertisedfullurl, port)
+    else:
+        advertisedfullurl += env['SERVER_NAME']
+        advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
 
     baseurl = fullurl
     advertisedbaseurl = advertisedfullurl
 
     fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
-    advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
     fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
+
+    if altbaseurl:
+        path = altbaseurl.path or ''
+        if path and not path.startswith('/'):
+            path = '/' + path
+        advertisedfullurl += util.urlreq.quote(path)
+    else:
+        advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
+
     advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
 
     if env.get('QUERY_STRING'):
@@ -226,7 +265,12 @@
     # that represents the repository being dispatched to. When computing
     # the dispatch info, we ignore these leading path components.
 
-    apppath = env.get('SCRIPT_NAME', '')
+    if altbaseurl:
+        apppath = altbaseurl.path or ''
+        if apppath and not apppath.startswith('/'):
+            apppath = '/' + apppath
+    else:
+        apppath = env.get('SCRIPT_NAME', '')
 
     if reponame:
         repoprefix = '/' + reponame.strip('/')
@@ -545,7 +589,7 @@
     instantiate instances of this class, which provides higher-level APIs
     for obtaining request parameters, writing HTTP output, etc.
     """
-    def __init__(self, wsgienv, start_response):
+    def __init__(self, wsgienv, start_response, altbaseurl=None):
         version = wsgienv[r'wsgi.version']
         if (version < (1, 0)) or (version >= (2, 0)):
             raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
@@ -563,7 +607,7 @@
         self.multiprocess = wsgienv[r'wsgi.multiprocess']
         self.run_once = wsgienv[r'wsgi.run_once']
         self.env = wsgienv
-        self.req = parserequestfromenv(wsgienv, inp)
+        self.req = parserequestfromenv(wsgienv, inp, altbaseurl=altbaseurl)
         self.res = wsgiresponse(self.req, start_response)
         self._start_response = start_response
         self.server_write = None
diff --git a/mercurial/hgweb/hgwebdir_mod.py b/mercurial/hgweb/hgwebdir_mod.py
--- a/mercurial/hgweb/hgwebdir_mod.py
+++ b/mercurial/hgweb/hgwebdir_mod.py
@@ -33,7 +33,6 @@
     error,
     hg,
     profiling,
-    pycompat,
     scmutil,
     templater,
     ui as uimod,
@@ -83,33 +82,6 @@
         yield (prefix + '/' +
                util.pconvert(path[len(roothead):]).lstrip('/')).strip('/'), path
 
-def geturlcgivars(baseurl, port):
-    """
-    Extract CGI variables from baseurl
-
-    >>> geturlcgivars(b"http://host.org/base", b"80")
-    ('host.org', '80', '/base')
-    >>> geturlcgivars(b"http://host.org:8000/base", b"80")
-    ('host.org', '8000', '/base')
-    >>> geturlcgivars(b'/base', 8000)
-    ('', '8000', '/base')
-    >>> geturlcgivars(b"base", b'8000')
-    ('', '8000', '/base')
-    >>> geturlcgivars(b"http://host", b'8000')
-    ('host', '8000', '/')
-    >>> geturlcgivars(b"http://host/", b'8000')
-    ('host', '8000', '/')
-    """
-    u = util.url(baseurl)
-    name = u.host or ''
-    if u.port:
-        port = u.port
-    path = u.path or ""
-    if not path.startswith('/'):
-        path = '/' + path
-
-    return name, pycompat.bytestr(port), path
-
 def readallowed(ui, req):
     """Check allow_read and deny_read config options of a repo's ui object
     to determine user permissions.  By default, with neither option set (or
@@ -359,7 +331,6 @@
         self.stripecount = self.ui.config('web', 'stripes')
         if self.stripecount:
             self.stripecount = int(self.stripecount)
-        self._baseurl = self.ui.config('web', 'baseurl')
         prefix = self.ui.config('web', 'prefix')
         if prefix.startswith('/'):
             prefix = prefix[1:]
@@ -376,7 +347,8 @@
         wsgicgi.launch(self)
 
     def __call__(self, env, respond):
-        wsgireq = requestmod.wsgirequest(env, respond)
+        baseurl = self.ui.config('web', 'baseurl')
+        wsgireq = requestmod.wsgirequest(env, respond, altbaseurl=baseurl)
         return self.run_wsgi(wsgireq)
 
     def run_wsgi(self, wsgireq):
@@ -455,7 +427,8 @@
                     # Re-parse the WSGI environment to take into account our
                     # repository path component.
                     wsgireq.req = requestmod.parserequestfromenv(
-                        wsgireq.env, wsgireq.req.bodyfh, reponame=virtualrepo)
+                        wsgireq.env, wsgireq.req.bodyfh, reponame=virtualrepo,
+                        altbaseurl=self.ui.config('web', 'baseurl'))
                     try:
                         # ensure caller gets private copy of ui
                         repo = hg.repository(self.ui.copy(), real)
@@ -502,7 +475,6 @@
                 for column in sortable]
 
         self.refresh()
-        self.updatereqenv(wsgireq.env)
 
         entries = indexentries(self.ui, self.repos, wsgireq, req,
                                self.stripecount, sortcolumn=sortcolumn,
@@ -524,8 +496,6 @@
         def config(section, name, default=uimod._unset, untrusted=True):
             return self.ui.config(section, name, default, untrusted)
 
-        self.updatereqenv(wsgireq.env)
-
         url = wsgireq.env.get('SCRIPT_NAME', '')
         if not url.endswith('/'):
             url += '/'
@@ -557,10 +527,3 @@
         }
         tmpl = templater.templater.frommapfile(mapfile, defaults=defaults)
         return tmpl
-
-    def updatereqenv(self, env):
-        if self._baseurl is not None:
-            name, port, path = geturlcgivars(self._baseurl, env['SERVER_PORT'])
-            env['SERVER_NAME'] = name
-            env['SERVER_PORT'] = port
-            env['SCRIPT_NAME'] = path



To: indygreg, #hg-reviewers, durin42
Cc: mercurial-devel


More information about the Mercurial-devel mailing list