[PATCH] hgweb: return data in increasing chunk sizes
Brendan Cully
brendan at kublai.com
Fri Nov 21 17:58:50 CST 2008
On Wednesday, 19 November 2008 at 18:37, Brendan Cully wrote:
> # HG changeset patch
> # User Brendan Cully <brendan at kublai.com>
> # Date 1227148600 28800
> # Node ID 8c4bdff155f5a9c19c9e58f2900a2816758a7312
> # Parent 92c952c4470c41647a30184b21466554021d580e
> hgweb: return data in increasing chunk sizes
Here's an alternative version that buries the buffer code inside
templater.__call__, so that all template users get it and it doesn't
layer on top of the protocol functions.
-------------- next part --------------
# HG changeset patch
# User Brendan Cully <brendan at kublai.com>
# Date 1227311500 28800
# Node ID 368594f505148b5dbe60b3dd74080474334d23dc
# Parent 03b60f2f90bf14594ffd733cace1102c5c10499b
hgweb: return data in increasing chunk sizes
Currently hgweb is not streaming its output -- it accumulates the
entire response before sending it. This patch restores streaming
behaviour, buffering the output sensibly (currently, it produces 1K,
then 2K, then 4K up to a maximum buffer size of 64K). Local testing of
a fetch of a 100,000 line file with wget produces a slight slowdown
overall (up from 6.5 seconds to 7.2 seconds), but instead of waiting 6
seconds for headers to arrive, output begins immediately.
diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py
--- a/mercurial/hgweb/hgweb_mod.py
+++ b/mercurial/hgweb/hgweb_mod.py
@@ -182,20 +182,20 @@
content = getattr(webcommands, cmd)(self, req, tmpl)
req.respond(HTTP_OK, ctype)
- return ''.join(content),
+ return content
except revlog.LookupError, err:
req.respond(HTTP_NOT_FOUND, ctype)
msg = str(err)
if 'manifest' not in msg:
msg = 'revision not found: %s' % err.name
- return ''.join(tmpl('error', error=msg)),
+ return tmpl('error', error=msg)
except (RepoError, revlog.RevlogError), inst:
req.respond(HTTP_SERVER_ERROR, ctype)
- return ''.join(tmpl('error', error=str(inst))),
+ return tmpl('error', error=str(inst))
except ErrorResponse, inst:
req.respond(inst.code, ctype)
- return ''.join(tmpl('error', error=inst.message)),
+ return tmpl('error', error=inst.message)
def templater(self, req):
diff --git a/mercurial/hgweb/hgwebdir_mod.py b/mercurial/hgweb/hgwebdir_mod.py
--- a/mercurial/hgweb/hgwebdir_mod.py
+++ b/mercurial/hgweb/hgwebdir_mod.py
@@ -116,7 +116,7 @@
# top-level index
elif not virtual:
req.respond(HTTP_OK, ctype)
- return ''.join(self.makeindex(req, tmpl)),
+ return self.makeindex(req, tmpl)
# nested indexes and hgwebs
@@ -138,7 +138,7 @@
subdir = virtual + '/'
if [r for r in repos if r.startswith(subdir)]:
req.respond(HTTP_OK, ctype)
- return ''.join(self.makeindex(req, tmpl, subdir)),
+ return self.makeindex(req, tmpl, subdir)
up = virtual.rfind('/')
if up < 0:
@@ -147,11 +147,11 @@
# prefixes not found
req.respond(HTTP_NOT_FOUND, ctype)
- return ''.join(tmpl("notfound", repo=virtual)),
+ return tmpl("notfound", repo=virtual)
except ErrorResponse, err:
req.respond(err.code, ctype)
- return ''.join(tmpl('error', error=err.message or '')),
+ return tmpl('error', error=err.message or '')
finally:
tmpl = None
diff --git a/mercurial/templater.py b/mercurial/templater.py
--- a/mercurial/templater.py
+++ b/mercurial/templater.py
@@ -44,7 +44,8 @@
template_re = re.compile(r"(?:(?:#(?=[\w\|%]+#))|(?:{(?=[\w\|%]+})))"
r"(\w+)(?:(?:%(\w+))|((?:\|\w+)*))[#}]")
- def __init__(self, mapfile, filters={}, defaults={}, cache={}):
+ def __init__(self, mapfile, filters={}, defaults={}, cache={},
+ minchunk=1024, maxchunk=65536):
'''set up template engine.
mapfile is name of file to read map definitions from.
filters is dict of functions. each transforms a value into another.
@@ -55,6 +56,8 @@
self.base = (mapfile and os.path.dirname(mapfile)) or ''
self.filters = filters
self.defaults = defaults
+ self.minchunk = minchunk
+ self.maxchunk = maxchunk
if not mapfile:
return
@@ -130,6 +133,13 @@
yield v
def __call__(self, t, **map):
+ stream = self.expand(t, **map)
+ if self.minchunk:
+ stream = util.increasingchunks(stream, min=self.minchunk,
+ max=self.maxchunk)
+ return stream
+
+ def expand(self, t, **map):
'''Perform expansion. t is name of map element to expand. map contains
added elements for use during expansion. Is a generator.'''
tmpl = self._template(t)
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -290,6 +290,37 @@
l.sort()
return l
+def increasingchunks(source, min=1024, max=65536):
+ '''return no less than min bytes per chunk while data remains,
+ doubling min after each chunk until it reaches max'''
+ def log2(x):
+ if not x:
+ return 0
+ i = 0
+ while x:
+ x >>= 1
+ i += 1
+ return i - 1
+
+ buf = []
+ blen = 0
+ for chunk in source:
+ buf.append(chunk)
+ blen += len(chunk)
+ if blen >= min:
+ if min < max:
+ min = min << 1
+ nmin = 1 << log2(blen)
+ if nmin > min:
+ min = nmin
+ if min > max:
+ min = max
+ yield ''.join(buf)
+ blen = 0
+ buf = []
+ if buf:
+ yield ''.join(buf)
+
class Abort(Exception):
"""Raised if a command needs to print an error and exit."""
More information about the Mercurial-devel
mailing list