[PATCH] largefiles: use multiple threads for fetching largefiles remotely

Mads Kiilerich mads at kiilerich.com
Thu Oct 9 19:59:53 CDT 2014


# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1412902786 -7200
#      Fri Oct 10 02:59:46 2014 +0200
# Node ID 483463c1d99ba5e5979b756fc3d1255f0a7bd854
# Parent  a1eb21f5caea4366310e32aa85248791d5bbfa0c
largefiles: use multiple threads for fetching largefiles remotely

Largefiles are currently fetched with one request per file. That adds a
constant overhead per file that gives bad network utilization.

To mitigate that, run multiple worker threads when fetching largefiles remotely.
The default is 2 processes, but it can be tweaked with the undocumented config
setting largefiles._remotegetthreads.

Some numbers with a slow server and 50 small files:
  1 thread  36 s
  2 threads 20 s
  3 threads 15 s
  4 threads 12 s

diff --git a/hgext/largefiles/basestore.py b/hgext/largefiles/basestore.py
--- a/hgext/largefiles/basestore.py
+++ b/hgext/largefiles/basestore.py
@@ -8,7 +8,7 @@
 
 '''base class for store implementations and store-related utility code'''
 
-import re
+import re, threading
 
 from mercurial import util, node, hg
 from mercurial.i18n import _
@@ -37,6 +37,7 @@ class basestore(object):
         self.ui = ui
         self.repo = repo
         self.url = url
+        self.threads = 0
 
     def put(self, source, hash):
         '''Put source file into the store so it can be retrieved by hash.'''
@@ -60,24 +61,43 @@ class basestore(object):
         missing = []
         ui = self.ui
 
-        at = 0
         available = self.exists(set(hash for (_filename, hash) in files))
-        for filename, hash in files:
-            ui.progress(_('getting largefiles'), at, unit='lfile',
-                total=len(files))
-            at += 1
-            ui.note(_('getting %s:%s\n') % (filename, hash))
+        tasks = list(enumerate(reversed(files)))
 
-            if not available.get(hash):
-                ui.warn(_('%s: largefile %s not available from %s\n')
-                        % (filename, hash, util.hidepassword(self.url)))
-                missing.append(filename)
-                continue
+        def worker():
+            while True:
+                try:
+                    task = tasks.pop()
+                except IndexError:
+                    return
+                at, (filename, hash) = task
+                ui.progress(_('getting largefiles'), at, unit='lfile',
+                    total=len(files))
+                ui.note(_('getting %s:%s\n') % (filename, hash))
 
-            if self._gethash(filename, hash):
-                success.append((filename, hash))
-            else:
-                missing.append(filename)
+                if available.get(hash):
+                    if self._gethash(filename, hash):
+                        success.append((filename, hash))
+                    else:
+                        missing.append(filename)
+                else:
+                    ui.warn(_('%s: largefile %s not available from %s\n')
+                            % (filename, hash, util.hidepassword(self.url)))
+                    missing.append(filename)
+
+        if self.threads > 1:
+            running = []
+            for i in range(self.threads):
+                t = threading.Thread(target=worker)
+                t.setDaemon(True)
+                t.start()
+                running.append(t)
+
+            for t in running:
+                while t.isAlive():
+                    t.join(0.1)
+        else:
+            worker()
 
         ui.progress(_('getting largefiles'), None)
         return (success, missing)
diff --git a/hgext/largefiles/remotestore.py b/hgext/largefiles/remotestore.py
--- a/hgext/largefiles/remotestore.py
+++ b/hgext/largefiles/remotestore.py
@@ -18,6 +18,7 @@ class remotestore(basestore.basestore):
     '''a largefile store accessed over a network'''
     def __init__(self, ui, repo, url):
         super(remotestore, self).__init__(ui, repo, url)
+        self.threads = ui.configint(lfutil.longname, '_remotegetthreads', 2)
 
     def put(self, source, hash):
         if self.sendfile(source, hash):


More information about the Mercurial-devel mailing list