[PATCH remotefilelog-ext] Reuse ssh connection across miss fetches
Augie Fackler
raf at durin42.com
Fri Dec 11 09:21:18 CST 2015
On Wed, Dec 09, 2015 at 04:32:50PM -0800, Durham Goode wrote:
> # HG changeset patch
> # User Durham Goode <durham at fb.com>
> # Date 1449707498 28800
> # Wed Dec 09 16:31:38 2015 -0800
> # Node ID 7fde3699448cc3b3b6f3b1b5fe64b856d974fcc3
> # Parent ddb335eb76d96758d3171ada6564d46390166f3c
> Reuse ssh connection across miss fetches
Overall looks fine, but I do have a comment below about code
structuring and some isinstance checks look fishy to me...
>
> Previously we recreated the ssh connection for each prefetch. In the case where
> we were fetching files one by one (like when we forgot to batch request files),
> it results in a 1+ second overhead for each fetch.
>
> This changes makes us hold onto the ssh connection and simply issue new requests
> along the same connection.
>
> diff --git a/remotefilelog/fileserverclient.py b/remotefilelog/fileserverclient.py
> --- a/remotefilelog/fileserverclient.py
> +++ b/remotefilelog/fileserverclient.py
> @@ -133,7 +133,6 @@ def _getfilesbatch(
>
> def _getfiles(
> remote, receivemissing, fallbackpath, progresstick, missed, idmap):
> - remote._callstream("getfiles")
> i = 0
> while i < len(missed):
> # issue a batch of requests
> @@ -152,8 +151,6 @@ def _getfiles(
> for j in range(start, end):
> receivemissing(remote.pipei, missed[j])
> progresstick()
> - remote.cleanup()
> - remote = None
>
> class fileserverclient(object):
> """A client for requesting files from the remote file server.
> @@ -169,6 +166,7 @@ class fileserverclient(object):
>
> self.localcache = localcache(repo)
> self.remotecache = cacheconnection()
> + self.remoteserver = None
>
> def request(self, fileids):
> """Takes a list of filename/node pairs and fetches them from the
> @@ -241,14 +239,26 @@ class fileserverclient(object):
> verbose = self.ui.verbose
> self.ui.verbose = False
> try:
> - if not fallbackpath:
> - raise util.Abort("no remotefilelog server configured - "
> - "is your .hg/hgrc trusted?")
> - remote = hg.peer(self.ui, {}, fallbackpath)
> + newconnection = False
> + if not self.remoteserver:
> + if not fallbackpath:
> + raise util.Abort("no remotefilelog server "
> + "configured - is your .hg/hgrc trusted?")
> + self.remoteserver = hg.peer(self.ui, {}, fallbackpath)
> + newconnection = True
> + elif (isinstance(self.remoteserver, sshpeer.sshpeer) and
> + self.remoteserver.subprocess.poll() != None):
> + # The ssh connection died, so recreate it.
> + self.remoteserver = hg.peer(self.ui, {}, fallbackpath)
> + newconnection = True
I would be inclined to tease this out into some sort of
self._connect() method or something just to try and keep things clear.
What's up with the sshpeer.sshpeer isinstance checks? Nothing else is
assigning to this attribute right now, so it shouldn't matter...
> +
> + remote = self.remoteserver
> # TODO: deduplicate this with the constant in shallowrepo
> if remote.capable("remotefilelog"):
> if not isinstance(remote, sshpeer.sshpeer):
> raise util.Abort('remotefilelog requires ssh servers')
> + if newconnection:
> + remote._callstream("getfiles")
> _getfiles(remote, self.receivemissing, fallbackpath,
> progresstick, missed, idmap)
> elif remote.capable("getfile"):
> @@ -331,6 +341,10 @@ class fileserverclient(object):
> if self.remotecache.connected:
> self.remotecache.close()
>
> + if self.remoteserver and isinstance(self.remoteserver, sshpeer.sshpeer):
> + self.remoteserver.cleanup()
> + self.remoteserver = None
> +
> def prefetch(self, fileids, force=False):
> """downloads the given file versions to the cache
> """
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> https://selenic.com/mailman/listinfo/mercurial-devel
More information about the Mercurial-devel
mailing list