[PATCH 2 of 2 v2] util: increase filechunkiter size to 128k

Mads Kiilerich mads at kiilerich.com
Thu Oct 13 21:08:25 EDT 2016


# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1476402795 -7200
#      Fri Oct 14 01:53:15 2016 +0200
# Node ID 8909ce1593659bce0563cc66ca3fa34525daed65
# Parent  b7889580507c3d1d40e61904c7a2c2aba335c6cd
util: increase filechunkiter size to 128k

util.filechunkiter has been using a chunk size of 64k for more than 10 years,
also in years where Moore's law still was a law. It is probably ok to bump it
now and perhaps get a slight win in some cases.

Also, largefiles have been using 128k for a long time. Specifying that size
multiple times (or forgetting to do it) seems a bit stupid. Decreasing it to
64k also seems unfortunate.

Thus, we will set the default chunksize to 128k and use the default everywhere.

diff --git a/hgext/largefiles/lfutil.py b/hgext/largefiles/lfutil.py
--- a/hgext/largefiles/lfutil.py
+++ b/hgext/largefiles/lfutil.py
@@ -372,7 +372,7 @@ def hashfile(file):
         return ''
     hasher = hashlib.sha1('')
     with open(file, 'rb') as fd:
-        for data in util.filechunkiter(fd, 128 * 1024):
+        for data in util.filechunkiter(fd):
             hasher.update(data)
     return hasher.hexdigest()
 
diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py
--- a/hgext/largefiles/overrides.py
+++ b/hgext/largefiles/overrides.py
@@ -1356,7 +1356,7 @@ def overridecat(orig, ui, repo, file1, *
                               'downloaded')  % lf)
                 path = lfutil.usercachepath(repo.ui, hash)
                 with open(path, "rb") as fpin:
-                    for chunk in util.filechunkiter(fpin, 128 * 1024):
+                    for chunk in util.filechunkiter(fpin):
                         fp.write(chunk)
         err = 0
     return err
diff --git a/hgext/largefiles/proto.py b/hgext/largefiles/proto.py
--- a/hgext/largefiles/proto.py
+++ b/hgext/largefiles/proto.py
@@ -134,7 +134,7 @@ def wirereposetup(ui, repo):
                                                 length))
 
             # SSH streams will block if reading more than length
-            for chunk in util.filechunkiter(stream, 128 * 1024, length):
+            for chunk in util.filechunkiter(stream, limit=length):
                 yield chunk
             # HTTP streams must hit the end to process the last empty
             # chunk of Chunked-Encoding so the connection can be reused.
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -1684,9 +1684,9 @@ class chunkbuffer(object):
 
         return ''.join(buf)
 
-def filechunkiter(f, size=65536, limit=None):
+def filechunkiter(f, size=131072, limit=None):
     """Create a generator that produces the data in the file size
-    (default 65536) bytes at a time, up to optional limit (default is
+    (default 131072) bytes at a time, up to optional limit (default is
     to read all data).  Chunks may be less than size bytes if the
     chunk is the last chunk in the file, or the file is a socket or
     some other type of file that sometimes reads less data than is
diff --git a/tests/test-largefiles-small-disk.t b/tests/test-largefiles-small-disk.t
--- a/tests/test-largefiles-small-disk.t
+++ b/tests/test-largefiles-small-disk.t
@@ -11,7 +11,7 @@ Test how largefiles abort in case the di
   > shutil.copyfileobj = copyfileobj
   > #
   > # this makes the rewritten code abort:
-  > def filechunkiter(f, size=65536, limit=None):
+  > def filechunkiter(f, size=131072, limit=None):
   >     yield f.read(4)
   >     raise IOError(errno.ENOSPC, os.strerror(errno.ENOSPC))
   > util.filechunkiter = filechunkiter


More information about the Mercurial-devel mailing list