[PATCH 1 of 1] Created a class in util called chunkbuffer that buffers reads from an

Eric Hopper hopper at omnifarious.org
Sun Sep 4 10:34:35 CDT 2005


iterator over strings (aka chunks).

Also added to util (for future use) is a generator function that
iterates over a file n bytes at a time.

Lastly, localrepo was changed to use this new chunkbuffer class when
reading changegroups form the local repository.


2 files changed, 70 insertions(+), 16 deletions(-)
mercurial/localrepo.py |   16 ----------
mercurial/util.py      |   70 +++++++++++++++++++++++++++++++++++++++++++++++-


# HG changeset patch
# User Eric Hopper <hopper at omnifarious.org>
# Node ID a1a61ebc7c6ba0d6f0971c9c4e70800ca016ee7c
# Parent  a33a7a543803c7383a6918b19797bf9fd6b83e8e
Created a class in util called chunkbuffer that buffers reads from an
iterator over strings (aka chunks).

Also added to util (for future use) is a generator function that
iterates over a file n bytes at a time.

Lastly, localrepo was changed to use this new chunkbuffer class when
reading changegroups form the local repository.

diff -r a33a7a543803 -r a1a61ebc7c6b mercurial/localrepo.py
--- a/mercurial/localrepo.py	Sun Aug 28 06:45:27 2005
+++ b/mercurial/localrepo.py	Sun Sep  4 15:33:54 2005
@@ -886,21 +886,7 @@
         return remote.addchangegroup(cg)
 
     def changegroup(self, basenodes):
-        class genread:
-            def __init__(self, generator):
-                self.g = generator
-                self.buf = ""
-            def fillbuf(self):
-                self.buf += "".join(self.g)
-
-            def read(self, l):
-                while l > len(self.buf):
-                    try:
-                        self.buf += self.g.next()
-                    except StopIteration:
-                        break
-                d, self.buf = self.buf[:l], self.buf[l:]
-                return d
+        genread = util.chunkbuffer
 
         def gengroup():
             nodes = self.newer(basenodes)
diff -r a33a7a543803 -r a1a61ebc7c6b mercurial/util.py
--- a/mercurial/util.py	Sun Aug 28 06:45:27 2005
+++ b/mercurial/util.py	Sun Sep  4 15:33:54 2005
@@ -12,7 +12,7 @@
 
 import os, errno
 from demandload import *
-demandload(globals(), "re")
+demandload(globals(), "re cStringIO")
 
 def binary(s):
     """return true if a string is binary data using diff's heuristic"""
@@ -354,3 +354,71 @@
             val = os.WSTOPSIG(code)
             return "stopped by signal %d" % val, val
         raise ValueError("invalid exit code")
+
+class chunkbuffer(object):
+    """Allow arbitrary sized chunks of data to be efficiently read from an
+    iterator over chunks of arbitrary size."""
+    def __init__(self, in_iter, targetsize = 2**16):
+        """in_iter is the iterator that's iterating over the input chunks.
+        targetsize is how big a buffer to try to maintain."""
+        self.in_iter = iter(in_iter)
+        self.buf = ''
+        targetsize = int(targetsize)
+        if (targetsize <= 0):
+            raise ValueError("targetsize must be greater than 0, was %d" % targetsize)
+        self.targetsize = int(targetsize)
+        self.iterempty = False
+    def fillbuf(self):
+        """x.fillbuf()
+
+        Ignore the target size, and just read every chunk from the iterator
+        until it's empty."""
+        if not self.iterempty:
+            collector = cStringIO.StringIO()
+            collector.write(self.buf)
+            for ch in self.in_iter:
+                collector.write(ch)
+            self.buf = collector.getvalue()
+            collector.close()
+            collector = None
+            self.iterempty = True
+
+    def read(self, l):
+        """x.read(l) -> str
+        Read l bytes of data from the iterator of chunks of data.  Returns less
+        than l bytes if the iterator runs dry."""
+        if l > len(self.buf) and not self.iterempty:
+            # Clamp to a multiple of self.targetsize
+            targetsize = self.targetsize * ((l // self.targetsize) + 1)
+            collector = cStringIO.StringIO()
+            collector.write(self.buf)
+            collected = len(self.buf)
+            for chunk in self.in_iter:
+                collector.write(chunk)
+                collected += len(chunk)
+                if collected >= targetsize:
+                    break
+            if collected < targetsize:
+                self.iterempty = True
+            self.buf = collector.getvalue()
+            collector.close()
+            collector = None
+        s = self.buf[:l]
+        self.buf = buffer(self.buf, l)
+        return s
+    def __repr__(self):
+        return "<%s.%s targetsize = %u buffered = %u bytes>" % \
+               (self.__class__.__module__, self.__class__.__name__,
+                self.targetsize, len(self.buf))
+
+def filechunkiter(f, size = 65536):
+    """filechunkiter(file[, size]) -> generator
+
+    Create a generator that produces all the data in the file size (default
+    65536) bytes at a time.  Chunks may be less than size bytes if the
+    chunk is the last chunk in the file, or the file is a socket or some
+    other type of file that sometimes reads less data than is requested."""
+    s = f.read(size)
+    while len(s) >= 0:
+        yield s
+        s = f.read(size)


More information about the Mercurial mailing list