[PATCH] largefiles: use multiple threads for fetching largefiles remotely

Siddharth Agarwal sid at less-broken.com
Thu Oct 9 20:10:32 CDT 2014


On 10/09/2014 05:59 PM, Mads Kiilerich wrote:
> # HG changeset patch
> # User Mads Kiilerich <madski at unity3d.com>
> # Date 1412902786 -7200
> #      Fri Oct 10 02:59:46 2014 +0200
> # Node ID 483463c1d99ba5e5979b756fc3d1255f0a7bd854
> # Parent  a1eb21f5caea4366310e32aa85248791d5bbfa0c
> largefiles: use multiple threads for fetching largefiles remotely
>
> Largefiles are currently fetched with one request per file. That adds a
> constant overhead per file that gives bad network utilization.
>
> To mitigate that, run multiple worker threads when fetching largefiles remotely.
> The default is 2 processes, but it can be tweaked with the undocumented config
> setting largefiles._remotegetthreads.

Why is this undocumented?

>
> Some numbers with a slow server and 50 small files:
>    1 thread  36 s
>    2 threads 20 s
>    3 threads 15 s
>    4 threads 12 s
>
> diff --git a/hgext/largefiles/basestore.py b/hgext/largefiles/basestore.py
> --- a/hgext/largefiles/basestore.py
> +++ b/hgext/largefiles/basestore.py
> @@ -8,7 +8,7 @@
>   
>   '''base class for store implementations and store-related utility code'''
>   
> -import re
> +import re, threading
>   
>   from mercurial import util, node, hg
>   from mercurial.i18n import _
> @@ -37,6 +37,7 @@ class basestore(object):
>           self.ui = ui
>           self.repo = repo
>           self.url = url
> +        self.threads = 0
>   
>       def put(self, source, hash):
>           '''Put source file into the store so it can be retrieved by hash.'''
> @@ -60,24 +61,43 @@ class basestore(object):
>           missing = []
>           ui = self.ui
>   
> -        at = 0
>           available = self.exists(set(hash for (_filename, hash) in files))
> -        for filename, hash in files:
> -            ui.progress(_('getting largefiles'), at, unit='lfile',
> -                total=len(files))
> -            at += 1
> -            ui.note(_('getting %s:%s\n') % (filename, hash))
> +        tasks = list(enumerate(reversed(files)))
>   
> -            if not available.get(hash):
> -                ui.warn(_('%s: largefile %s not available from %s\n')
> -                        % (filename, hash, util.hidepassword(self.url)))
> -                missing.append(filename)
> -                continue
> +        def worker():
> +            while True:
> +                try:
> +                    task = tasks.pop()
> +                except IndexError:
> +                    return
> +                at, (filename, hash) = task
> +                ui.progress(_('getting largefiles'), at, unit='lfile',
> +                    total=len(files))
> +                ui.note(_('getting %s:%s\n') % (filename, hash))
>   
> -            if self._gethash(filename, hash):
> -                success.append((filename, hash))
> -            else:
> -                missing.append(filename)
> +                if available.get(hash):
> +                    if self._gethash(filename, hash):
> +                        success.append((filename, hash))
> +                    else:
> +                        missing.append(filename)
> +                else:
> +                    ui.warn(_('%s: largefile %s not available from %s\n')
> +                            % (filename, hash, util.hidepassword(self.url)))
> +                    missing.append(filename)
> +
> +        if self.threads > 1:
> +            running = []
> +            for i in range(self.threads):
> +                t = threading.Thread(target=worker)
> +                t.setDaemon(True)
> +                t.start()
> +                running.append(t)
> +
> +            for t in running:
> +                while t.isAlive():
> +                    t.join(0.1)
> +        else:
> +            worker()
>   
>           ui.progress(_('getting largefiles'), None)
>           return (success, missing)
> diff --git a/hgext/largefiles/remotestore.py b/hgext/largefiles/remotestore.py
> --- a/hgext/largefiles/remotestore.py
> +++ b/hgext/largefiles/remotestore.py
> @@ -18,6 +18,7 @@ class remotestore(basestore.basestore):
>       '''a largefile store accessed over a network'''
>       def __init__(self, ui, repo, url):
>           super(remotestore, self).__init__(ui, repo, url)
> +        self.threads = ui.configint(lfutil.longname, '_remotegetthreads', 2)
>   
>       def put(self, source, hash):
>           if self.sendfile(source, hash):
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel



More information about the Mercurial-devel mailing list