[PATCH] hgweb: fix url base generation to handle absolute paths in baseurl

Dirkjan Ochtman dirkjan at ochtman.nl
Wed May 20 08:48:30 CDT 2009


Okay, this is a hard problem... I'm not sure the patch gets everything 
right yet. Let's see...

On 14/05/2009 22:14, Alexander Solovyov wrote:
> -        req.url = req.env['SCRIPT_NAME']
> +        req.url = self.config('web', 'baseurl', req.env['SCRIPT_NAME'])

Can you explain what SCRIPT_NAME looks like?

>           if not req.url.endswith('/'):
>               req.url += '/'
>           if 'REPO_NAME' in req.env:
> @@ -200,17 +200,24 @@ class hgweb(object):
>           # determine scheme, port and server name
>           # this is needed to create absolute urls
>
> -        proto = req.env.get('wsgi.url_scheme')
> -        if proto == 'https':
> -            proto = 'https'
> -            default_port = "443"
> +        if not '://' in req.url:
> +            proto = req.env.get('wsgi.url_scheme')
> +            if proto == 'https':
> +                proto = 'https'
> +                default_port = "443"
> +            else:
> +                proto = 'http'
> +                default_port = "80"
> +
> +            if not req.url.startswith('//'):
> +                port = req.env["SERVER_PORT"]
> +                port = port != default_port and (":" + port) or ""
> +                urlbase = '%s://%s%s' % (proto, req.env['SERVER_NAME'], port)
> +            else:
> +                urlbase = proto + '://'

It seems to me that this last case doesn't produce a usable urlbase, 
right? I'm presuming that urlbase should be something that probably 
points to either the hgweb or the hgwebdir root. I'm also not sure in 
what case req.url would ever start with '//'. Also, to eliminate some of 
the nesting, this if clause would benefit (in two places) from a x = 
default; if y: x = something instead of if y: x = something; else: x = 
default kind of style.

>           else:
> -            proto = 'http'
> -            default_port = "80"
> +            urlbase = ''
>
> -        port = req.env["SERVER_PORT"]
> -        port = port != default_port and (":" + port) or ""
> -        urlbase = '%s://%s%s' % (proto, req.env['SERVER_NAME'], port)
>           staticurl = self.config("web", "staticurl") or req.url + 'static/'
>           if not staticurl.endswith('/'):
>               staticurl += '/'
> diff --git a/mercurial/hgweb/hgwebdir_mod.py b/mercurial/hgweb/hgwebdir_mod.py
> --- a/mercurial/hgweb/hgwebdir_mod.py
> +++ b/mercurial/hgweb/hgwebdir_mod.py
> @@ -52,7 +52,6 @@ class hgwebdir(object):
>           self.stripecount = self.ui.config('web', 'stripes', 1)
>           if self.stripecount:
>               self.stripecount = int(self.stripecount)
> -        self._baseurl = self.ui.config('web', 'baseurl')
>
>           if self.repos:
>               return
> @@ -216,11 +215,15 @@ class hgwebdir(object):
>                       continue
>
>                   parts = [name]
> +                baseurl = self.ui.config('web', 'baseurl')
>                   if 'PATH_INFO' in req.env:

The indentation seems off here?

>                       parts.insert(0, req.env['PATH_INFO'].rstrip('/'))
> -                if req.env['SCRIPT_NAME']:
> +                if req.env['SCRIPT_NAME'] and baseurl is None:
>                       parts.insert(0, req.env['SCRIPT_NAME'])
> -                url = ('/'.join(parts).replace("//", "/")) + '/'
> +                url = ('/'.join(parts)).replace('//', '/') + '/'
> +                if baseurl is not None:
> +                    baseurl = baseurl.rstrip('/')
> +                    url = baseurl + url
>
>                   # update time with local timezone
>                   try:
> @@ -272,8 +275,6 @@ class hgwebdir(object):
>                   for column in sortable]
>
>           self.refresh()
> -        if self._baseurl is not None:
> -            req.env['SCRIPT_NAME'] = self._baseurl
>
>           return tmpl("index", entries=entries, subdir=subdir,
>                       sortcolumn=sortcolumn, descending=descending,
> @@ -296,10 +297,7 @@ class hgwebdir(object):
>           def config(section, name, default=None, untrusted=True):
>               return self.ui.config(section, name, default, untrusted)
>
> -        if self._baseurl is not None:
> -            req.env['SCRIPT_NAME'] = self._baseurl
> -
> -        url = req.env.get('SCRIPT_NAME', '')
> +        url = self.ui.config('web', 'baseurl', req.env.get('SCRIPT_NAME', ''))

Indentation here also looks weird.

>           if not url.endswith('/'):
>               url += '/'
>
> diff --git a/tests/test-hgweb b/tests/test-hgweb
> --- a/tests/test-hgweb
> +++ b/tests/test-hgweb
> @@ -42,3 +42,17 @@ echo % static file
>
>   echo % errors
>   cat errors.log
> +
> +echo % relative baseurl
> +"$TESTDIR/killdaemons.py"
> +echo "[web]\nbaseurl=/prefix/">  .hg/hgrc
> +hg serve -p $HGPORT -d --pid-file=hg.pid -A access.log
> +cat hg.pid>>  $DAEMON_PIDS
> +("$TESTDIR/get-with-headers.py" localhost:$HGPORT '/file/tip/foo')
> +
> +echo % absolute baseurl
> +"$TESTDIR/killdaemons.py"
> +echo "[web]\nbaseurl=http://example.com">  .hg/hgrc
> +hg serve -p $HGPORT -d --pid-file=hg.pid -A access.log
> +cat hg.pid>>  $DAEMON_PIDS
> +("$TESTDIR/get-with-headers.py" localhost:$HGPORT '/file/tip/foo')

Put the killdaemons before the echo % explanation, please. And leave cat 
errors.log + preceding comment as the last bit in the whole file (and 
put -E in your hg serve lines to feed to it; this helps debugging test 
failures).

I'm sorry this is such a tired, slow-going project, it's pretty hard to 
wrap my head around the issues you're having.

Cheers,

Dirkjan

> diff --git a/tests/test-hgweb.out b/tests/test-hgweb.out
> --- a/tests/test-hgweb.out
> +++ b/tests/test-hgweb.out
> @@ -277,3 +277,175 @@ ul#graphnodes li .info {
>   	font-style: italic;
>   }
>   % errors
> +% relative baseurl
> +200 Script output follows
> +
> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US">
> +<head>
> +<link rel="icon" href="/prefix/static/hgicon.png" type="image/png" />
> +<meta name="robots" content="index, nofollow" />
> +<link rel="stylesheet" href="/prefix/static/style-paper.css" type="text/css" />
> +
> +<title>prefix: 2ef0ac749a14 foo</title>
> +</head>
> +<body>
> +
> +<div class="container">
> +<div class="menu">
> +<div class="logo">
> +<a href="http://www.selenic.com/mercurial/">
> +<img src="/prefix/static/hglogo.png" alt="mercurial" /></a>
> +</div>
> +<ul>
> +<li><a href="/prefix/shortlog/2ef0ac749a14">log</a></li>
> +<li><a href="/prefix/graph/2ef0ac749a14">graph</a></li>
> +<li><a href="/prefix/tags">tags</a></li>
> +<li><a href="/prefix/branches">branches</a></li>
> +</ul>
> +<ul>
> +<li><a href="/prefix/rev/2ef0ac749a14">changeset</a></li>
> +<li><a href="/prefix/file/2ef0ac749a14/">browse</a></li>
> +</ul>
> +<ul>
> +<li class="active">file</li>
> +<li><a href="/prefix/diff/2ef0ac749a14/foo">diff</a></li>
> +<li><a href="/prefix/annotate/2ef0ac749a14/foo">annotate</a></li>
> +<li><a href="/prefix/log/2ef0ac749a14/foo">file log</a></li>
> +<li><a href="/prefix/raw-file/2ef0ac749a14/foo">raw</a></li>
> +</ul>
> +</div>
> +
> +<div class="main">
> +<h2><a href="/prefix/">prefix</a></h2>
> +<h3>view foo @ 0:2ef0ac749a14</h3>
> +
> +<form class="search" action="/prefix/log">
> +
> +<p><input name="rev" id="search1" type="text" size="30" /></p>
> +<div id="hint">find changesets by author, revision,
> +files, or words in the commit message</div>
> +</form>
> +
> +<div class="description">base</div>
> +
> +<table id="changesetEntry">
> +<tr>
> +<th class="author">author</th>
> +<td class="author">&#116;&#101;&#115;&#116;</td>
> +</tr>
> +<tr>
> +<th class="date">date</th>
> +<td class="date">Thu Jan 01 00:00:00 1970 +0000 (many years ago)</td>
> +</tr>
> +<tr>
> +<th class="author">parents</th>
> +<td class="author"></td>
> +</tr>
> +<tr>
> +<th class="author">children</th>
> +<td class="author"></td>
> +</tr>
> +
> +</table>
> +
> +<div class="overflow">
> +<div class="sourcefirst">  line source</div>
> +<div class="parity0 source"><a href="#l1" id="l1">      1</a>  foo
> +</div>
> +<div class="sourcelast"></div>
> +</div>
> +</div>
> +</div>
> +
> +
> +
> +</body>
> +</html>
> +
> +% absolute baseurl
> +200 Script output follows
> +
> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US">
> +<head>
> +<link rel="icon" href="http://example.com/static/hgicon.png" type="image/png" />
> +<meta name="robots" content="index, nofollow" />
> +<link rel="stylesheet" href="http://example.com/static/style-paper.css" type="text/css" />
> +
> +<title>http://example.com: 2ef0ac749a14 foo</title>
> +</head>
> +<body>
> +
> +<div class="container">
> +<div class="menu">
> +<div class="logo">
> +<a href="http://www.selenic.com/mercurial/">
> +<img src="http://example.com/static/hglogo.png" alt="mercurial" /></a>
> +</div>
> +<ul>
> +<li><a href="http://example.com/shortlog/2ef0ac749a14">log</a></li>
> +<li><a href="http://example.com/graph/2ef0ac749a14">graph</a></li>
> +<li><a href="http://example.com/tags">tags</a></li>
> +<li><a href="http://example.com/branches">branches</a></li>
> +</ul>
> +<ul>
> +<li><a href="http://example.com/rev/2ef0ac749a14">changeset</a></li>
> +<li><a href="http://example.com/file/2ef0ac749a14/">browse</a></li>
> +</ul>
> +<ul>
> +<li class="active">file</li>
> +<li><a href="http://example.com/diff/2ef0ac749a14/foo">diff</a></li>
> +<li><a href="http://example.com/annotate/2ef0ac749a14/foo">annotate</a></li>
> +<li><a href="http://example.com/log/2ef0ac749a14/foo">file log</a></li>
> +<li><a href="http://example.com/raw-file/2ef0ac749a14/foo">raw</a></li>
> +</ul>
> +</div>
> +
> +<div class="main">
> +<h2><a href="http://example.com/">http://example.com</a></h2>
> +<h3>view foo @ 0:2ef0ac749a14</h3>
> +
> +<form class="search" action="http://example.com/log">
> +
> +<p><input name="rev" id="search1" type="text" size="30" /></p>
> +<div id="hint">find changesets by author, revision,
> +files, or words in the commit message</div>
> +</form>
> +
> +<div class="description">base</div>
> +
> +<table id="changesetEntry">
> +<tr>
> +<th class="author">author</th>
> +<td class="author">&#116;&#101;&#115;&#116;</td>
> +</tr>
> +<tr>
> +<th class="date">date</th>
> +<td class="date">Thu Jan 01 00:00:00 1970 +0000 (many years ago)</td>
> +</tr>
> +<tr>
> +<th class="author">parents</th>
> +<td class="author"></td>
> +</tr>
> +<tr>
> +<th class="author">children</th>
> +<td class="author"></td>
> +</tr>
> +
> +</table>
> +
> +<div class="overflow">
> +<div class="sourcefirst">  line source</div>
> +<div class="parity0 source"><a href="#l1" id="l1">      1</a>  foo
> +</div>
> +<div class="sourcelast"></div>
> +</div>
> +</div>
> +</div>
> +
> +
> +
> +</body>
> +</html>
> +
> diff --git a/tests/test-hgwebdir b/tests/test-hgwebdir
> --- a/tests/test-hgwebdir
> +++ b/tests/test-hgwebdir
> @@ -82,6 +82,13 @@ echo % should succeed, slashy names
>   "$TESTDIR/get-with-headers.py" localhost:$HGPORT1 '/rcoll/?style=raw'
>   "$TESTDIR/get-with-headers.py" localhost:$HGPORT1 '/rcoll/b/d/file/tip/d?style=raw'
>
> +echo % baseurl
> +"$TESTDIR/killdaemons.py"
> +echo '\n[web]\nbaseurl=http://example.com/'>>  collections.conf
> +hg serve -p $HGPORT -d --pid-file=hg.pid --webdir-conf collections.conf \
> +    -A access-collections.log -E error-collections.log
> +cat hg.pid>>  $DAEMON_PIDS
> +"$TESTDIR/get-with-headers.py" localhost:$HGPORT '/error404/'
>
>   cat>  collections.conf<<EOF
>   [collections]
> diff --git a/tests/test-hgwebdir.out b/tests/test-hgwebdir.out
> --- a/tests/test-hgwebdir.out
> +++ b/tests/test-hgwebdir.out
> @@ -307,6 +307,31 @@ 200 Script output follows
>   200 Script output follows
>
>   d
> +% baseurl
> +404 Not Found
> +
> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US">
> +<head>
> +<link rel="icon" href="http://example.com/static/hgicon.png" type="image/png" />
> +<meta name="robots" content="index, nofollow" />
> +<link rel="stylesheet" href="http://example.com/static/style-paper.css" type="text/css" />
> +
> +<title>Mercurial repository not found</title>
> +</head>
> +<body>
> +
> +<h2>Mercurial repository not found</h2>
> +
> +The specified repository "error404" is unknown, sorry.
> +
> +Please go back to the main repository list page.
> +
> +
> +
> +</body>
> +</html>
> +
>   % collections: should succeed
>   200 Script output follows



More information about the Mercurial-devel mailing list