[PATCH] hgweb: return data in increasing chunk sizes

Brendan Cully brendan at kublai.com
Fri Nov 21 17:58:50 CST 2008


On Wednesday, 19 November 2008 at 18:37, Brendan Cully wrote:
> # HG changeset patch
> # User Brendan Cully <brendan at kublai.com>
> # Date 1227148600 28800
> # Node ID 8c4bdff155f5a9c19c9e58f2900a2816758a7312
> # Parent  92c952c4470c41647a30184b21466554021d580e
> hgweb: return data in increasing chunk sizes

Here's an alternative version that buries the buffer code inside
templater.__call__, so that all template users get it and it doesn't
layer on top of the protocol functions.
-------------- next part --------------
# HG changeset patch
# User Brendan Cully <brendan at kublai.com>
# Date 1227311500 28800
# Node ID 368594f505148b5dbe60b3dd74080474334d23dc
# Parent  03b60f2f90bf14594ffd733cace1102c5c10499b
hgweb: return data in increasing chunk sizes

Currently hgweb is not streaming its output -- it accumulates the
entire response before sending it. This patch restores streaming
behaviour, buffering the output sensibly (currently, it produces 1K,
then 2K, then 4K up to a maximum buffer size of 64K). Local testing of
a fetch of a 100,000 line file with wget produces a slight slowdown
overall (up from 6.5 seconds to 7.2 seconds), but instead of waiting 6
seconds for headers to arrive, output begins immediately.

diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py
--- a/mercurial/hgweb/hgweb_mod.py
+++ b/mercurial/hgweb/hgweb_mod.py
@@ -182,20 +182,20 @@
                 content = getattr(webcommands, cmd)(self, req, tmpl)
                 req.respond(HTTP_OK, ctype)
 
-            return ''.join(content),
+            return content
 
         except revlog.LookupError, err:
             req.respond(HTTP_NOT_FOUND, ctype)
             msg = str(err)
             if 'manifest' not in msg:
                 msg = 'revision not found: %s' % err.name
-            return ''.join(tmpl('error', error=msg)),
+            return tmpl('error', error=msg)
         except (RepoError, revlog.RevlogError), inst:
             req.respond(HTTP_SERVER_ERROR, ctype)
-            return ''.join(tmpl('error', error=str(inst))),
+            return tmpl('error', error=str(inst))
         except ErrorResponse, inst:
             req.respond(inst.code, ctype)
-            return ''.join(tmpl('error', error=inst.message)),
+            return tmpl('error', error=inst.message)
 
     def templater(self, req):
 
diff --git a/mercurial/hgweb/hgwebdir_mod.py b/mercurial/hgweb/hgwebdir_mod.py
--- a/mercurial/hgweb/hgwebdir_mod.py
+++ b/mercurial/hgweb/hgwebdir_mod.py
@@ -116,7 +116,7 @@
                 # top-level index
                 elif not virtual:
                     req.respond(HTTP_OK, ctype)
-                    return ''.join(self.makeindex(req, tmpl)),
+                    return self.makeindex(req, tmpl)
 
                 # nested indexes and hgwebs
 
@@ -138,7 +138,7 @@
                     subdir = virtual + '/'
                     if [r for r in repos if r.startswith(subdir)]:
                         req.respond(HTTP_OK, ctype)
-                        return ''.join(self.makeindex(req, tmpl, subdir)),
+                        return self.makeindex(req, tmpl, subdir)
 
                     up = virtual.rfind('/')
                     if up < 0:
@@ -147,11 +147,11 @@
 
                 # prefixes not found
                 req.respond(HTTP_NOT_FOUND, ctype)
-                return ''.join(tmpl("notfound", repo=virtual)),
+                return tmpl("notfound", repo=virtual)
 
             except ErrorResponse, err:
                 req.respond(err.code, ctype)
-                return ''.join(tmpl('error', error=err.message or '')),
+                return tmpl('error', error=err.message or '')
         finally:
             tmpl = None
 
diff --git a/mercurial/templater.py b/mercurial/templater.py
--- a/mercurial/templater.py
+++ b/mercurial/templater.py
@@ -44,7 +44,8 @@
     template_re = re.compile(r"(?:(?:#(?=[\w\|%]+#))|(?:{(?=[\w\|%]+})))"
                              r"(\w+)(?:(?:%(\w+))|((?:\|\w+)*))[#}]")
 
-    def __init__(self, mapfile, filters={}, defaults={}, cache={}):
+    def __init__(self, mapfile, filters={}, defaults={}, cache={},
+                 minchunk=1024, maxchunk=65536):
         '''set up template engine.
         mapfile is name of file to read map definitions from.
         filters is dict of functions. each transforms a value into another.
@@ -55,6 +56,8 @@
         self.base = (mapfile and os.path.dirname(mapfile)) or ''
         self.filters = filters
         self.defaults = defaults
+        self.minchunk = minchunk
+        self.maxchunk = maxchunk
 
         if not mapfile:
             return
@@ -130,6 +133,13 @@
                 yield v
 
     def __call__(self, t, **map):
+        stream = self.expand(t, **map)
+        if self.minchunk:
+            stream = util.increasingchunks(stream, min=self.minchunk,
+                                           max=self.maxchunk)
+        return stream
+        
+    def expand(self, t, **map):
         '''Perform expansion. t is name of map element to expand. map contains
         added elements for use during expansion. Is a generator.'''
         tmpl = self._template(t)
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -290,6 +290,37 @@
     l.sort()
     return l
 
+def increasingchunks(source, min=1024, max=65536):
+    '''return no less than min bytes per chunk while data remains,
+    doubling min after each chunk until it reaches max'''
+    def log2(x):
+        if not x:
+            return 0
+        i = 0
+        while x:
+            x >>= 1
+            i += 1
+        return i - 1
+
+    buf = []
+    blen = 0
+    for chunk in source:
+        buf.append(chunk)
+        blen += len(chunk)
+        if blen >= min:
+            if min < max:
+                min = min << 1
+                nmin = 1 << log2(blen)
+                if nmin > min:
+                    min = nmin
+                if min > max:
+                    min = max
+            yield ''.join(buf)
+            blen = 0
+            buf = []
+    if buf:
+        yield ''.join(buf)
+
 class Abort(Exception):
     """Raised if a command needs to print an error and exit."""
 


More information about the Mercurial-devel mailing list