[PATCH V3] hgweb: teach archive how to handle file patterns

Brodie Rao brodie at sf.io
Sat Feb 9 17:15:58 CST 2013


On Sat, Feb 9, 2013 at 8:42 PM, Angel Ezquerra <angel.ezquerra at gmail.com> wrote:
> # HG changeset patch
> # User Angel Ezquerra <angel.ezquerra at gmail.com>
> # Date 1360141605 -3600
> # Node ID c0509330eb416104bed8cc7d0a4038f818dd5e5d
> # Parent  b6c8e79948a05d01920392cd12ac5c8279d5c62b
> hgweb: teach archive how to handle file patterns
>
> The archive web command now takes into account the "file" request entry, if one
> is provided.
>
> The provided "file" is processed as a "path" pattern by default, which makes it
> easy to only archive a certain file or directory. However, it is possible to
> specify a different type of pattern, such as relglob by specifying it
> explicitly on the query URL. Note that only "safe" patterns are allowed. Safe
> patterns are 'path', 'relpath', 'glog' and 'relglob'. Other pattern types are
> not allowed because they could be expensive to calculate.
>
> With this change hgweb can to process requests such as:
>
> 1. http://mercurial.selenic.com/hg/tip.zip/mercurial/templates
>
>     This will download all files on the mercurial/templates directory as a zip
>     file

The format for this URL seems a little strange. What happens if you do
'curl -O http://mercurial.selenic.com/hg/tip.zip/mercurial/templates'?
Does it figure out that the filename should be tip.zip? And what
happens if you want to download an archive for a branch/bookmark/tag
that has forward slashes in it?

I wonder if making this a GET parameter might be cleaner.

> 2. http://mercurial.selenic.com/hg/tip.tar.gz/relglob:*.py
>
>     This will download all *.py files in the repository into a tar.gz file.
>
> An so forth.
>
> Note that this is a first step to add support for downloading directories from
> the web interface. Currently the only way to use this feature is by manually
> constructing the URL that you want to download. We will have to modify the
> archiveentry map entry on the different templates so that it adds the current
> folder path to the archive links.
>
> This revision also adds a test for this feature to test-archive.t.
>
> diff --git a/mercurial/hgweb/webcommands.py b/mercurial/hgweb/webcommands.py
> --- a/mercurial/hgweb/webcommands.py
> +++ b/mercurial/hgweb/webcommands.py
> @@ -795,7 +795,7 @@
>      if not ((type_ in allowed or
>          web.configbool("web", "allow" + type_, False))):
>          msg = 'Archive type not allowed: %s' % type_
> -        raise ErrorResponse(HTTP_FORBIDDEN, msg)
> +        #raise ErrorResponse(HTTP_FORBIDDEN, msg)
>
>      reponame = re.sub(r"\W+", "-", os.path.basename(web.reponame))
>      cnode = web.repo.lookup(key)
> @@ -803,6 +803,17 @@
>      if cnode == key or key == 'tip':
>          arch_version = short(cnode)
>      name = "%s-%s" % (reponame, arch_version)
> +
> +    ctx = webutil.changectx(web.repo, req)
> +    pats = []
> +    file = req.form.get('file', None)
> +    defaultpat = 'path'
> +    if file:
> +        pats = [req.form['file'][0]]
> +        if not scmutil.patsaresafe(pats, defaultpat):
> +            msg = 'Archive pattern not allowed: %s' % pats[0]
> +            raise ErrorResponse(HTTP_FORBIDDEN, msg)
> +
>      mimetype, artype, extension, encoding = web.archive_specs[type_]
>      headers = [
>          ('Content-Disposition', 'attachment; filename=%s%s' % (name, extension))
> @@ -811,10 +822,9 @@
>          headers.append(('Content-Encoding', encoding))
>      req.headers.extend(headers)
>      req.respond(HTTP_OK, mimetype)
> -
> -    ctx = webutil.changectx(web.repo, req)
> +    matchfn = scmutil.match(ctx, pats, default=defaultpat)
>      archival.archive(web.repo, req, cnode, artype, prefix=name,
> -                     matchfn=scmutil.match(ctx, []),
> +                     matchfn=matchfn,
>                       subrepos=web.configbool("web", "archivesubrepos"))
>      return []
>
> diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py
> --- a/mercurial/scmutil.py
> +++ b/mercurial/scmutil.py
> @@ -682,6 +682,15 @@
>
>      return l
>
> +def patsaresafe(pats, defaultpattype):
> +    for pat in pats:
> +        pattype = defaultpattype
> +        if ':' in pat:
> +            pattype = pat.split(':')[0]
> +        if pattype.lower() not in ('path', 'relpath', 'glog', 'relglob'):
> +            return False
> +    return True
> +
>  def expandpats(pats):
>      if not util.expandglobs:
>          return list(pats)
> diff --git a/tests/test-archive.t b/tests/test-archive.t
> --- a/tests/test-archive.t
> +++ b/tests/test-archive.t
> @@ -69,9 +69,14 @@
>    >     msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
>    > except ImportError:
>    >     pass
> -  > node, archive = sys.argv[1:]
> -  > f = urllib2.urlopen('http://127.0.0.1:%s/?cmd=archive;node=%s;type=%s'
> -  >                     % (os.environ['HGPORT'], node, archive))
> +  > if len(sys.argv) <= 3:
> +  >     node, archive = sys.argv[1:]
> +  >     requeststr = 'cmd=archive;node=%s;type=%s' % (node, archive)
> +  > else:
> +  >     node, archive, file = sys.argv[1:]
> +  >     requeststr = 'cmd=archive;node=%s;type=%s;file=%s' % (node, archive, file)
> +  > f = urllib2.urlopen('http://127.0.0.1:%s/?%s'
> +  >                     % (os.environ['HGPORT'], requeststr))
>    > sys.stdout.write(f.read())
>    > EOF
>    $ python getarchive.py "$TIP" gz | gunzip | tar tf - 2>/dev/null
> @@ -92,6 +97,8 @@
>        testing: test-archive-2c0277f05ed4/baz/bletch   OK
>        testing: test-archive-2c0277f05ed4/foo   OK
>    No errors detected in compressed data of archive.zip.
> +  $ python getarchive.py "$TIP" gz baz | gunzip | tar tf - 2>/dev/null
> +  test-archive-2c0277f05ed4/baz/bletch
>
>    $ "$TESTDIR/killdaemons.py" $DAEMON_PIDS
>
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel


More information about the Mercurial-devel mailing list