[PATCH 1 of 3] scmutil: support background file closing
Gregory Szorc
gregory.szorc at gmail.com
Thu Jan 14 21:45:28 UTC 2016
# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1452807299 28800
# Thu Jan 14 13:34:59 2016 -0800
# Node ID 1e32fb544d387374d4caab11e9aeef5aadb9c54d
# Parent e6e34c4e391668c5d8af8f98c004a27c77b1e2fa
scmutil: support background file closing
Closing files that have been appended to is relatively slow on
Windows/NTFS. This makes several Mercurial operations slower on
Windows.
The workaround to this issue is conceptually simple: use multiple
threads for I/O. Unfortunately, Python doesn't scale well to multiple
threads because of the GIL. And, refactoring our code to use threads
everywhere would be a huge undertaking. So, we decide to tackle this
problem by starting small: establishing a thread pool for closing
files.
This patch establishes a mechanism for closing file handles on separate
threads. The coordinator object is basically a queue of file handles to
operate on and a thread pool consuming from the queue.
When files are opened through the VFS layer, the caller can specify
that delay closing is allowed.
A proxy class for file handles has been added. We must use a proxy
because it isn't possible to modify __class__ on built-in types. This
adds some overhead. But as future patches will show, this overhead
is cancelled out by the benefit of closing file handles on background
threads.
diff --git a/mercurial/help/config.txt b/mercurial/help/config.txt
--- a/mercurial/help/config.txt
+++ b/mercurial/help/config.txt
@@ -2036,4 +2036,28 @@ helps performance.
``numcpus``
Number of CPUs to use for parallel operations. A zero or
negative value is treated as ``use the default``.
(default: 4 or the number of CPUs on the system, whichever is larger)
+
+``backgroundclose``
+ Whether to enable closing file handles on background threads during certain
+ operations. Some platforms aren't very efficient at closing file
+ handles that have been written or appened to. By performing file closing
+ on background threads, file write rate can increase substantially.
+ (default: true on Windows, false elsewhere)
+
+``backgroundcloseminfilecount``
+ Minimum number of files required to trigger background file closing.
+ Operations not writing this many files won't start background close
+ threads.
+ (default: 2048)
+
+``backgroundclosemaxqueue``
+ The maximum number of opened file handles waiting to be closed in the
+ background. This option only has an effect if ``backgroundclose`` is
+ enabled.
+ (default: 384)
+
+``backgroundclosethreadcount``
+ Number of threads to process background file closes. Only relevant if
+ ``backgroundclose`` is enabled.
+ (default: 4)
diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py
--- a/mercurial/scmutil.py
+++ b/mercurial/scmutil.py
@@ -6,15 +6,18 @@
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
+import Queue
+import contextlib
import errno
import glob
import os
import re
import shutil
import stat
import tempfile
+import threading
from .i18n import _
from .node import wdirrev
from . import (
@@ -253,17 +256,18 @@ class abstractvfs(object):
raise
return []
def open(self, path, mode="r", text=False, atomictemp=False,
- notindexed=False):
+ notindexed=False, backgroundclose=False):
'''Open ``path`` file, which is relative to vfs root.
Newly created directories are marked as "not to be indexed by
the content indexing service", if ``notindexed`` is specified
for "write" mode access.
'''
self.open = self.__call__
- return self.__call__(path, mode, text, atomictemp, notindexed)
+ return self.__call__(path, mode, text, atomictemp, notindexed,
+ backgroundclose=backgroundclose)
def read(self, path):
with self(path, 'rb') as fp:
return fp.read()
@@ -435,8 +439,29 @@ class abstractvfs(object):
prefixlen = len(pathutil.normasprefix(root))
for dirpath, dirs, files in os.walk(self.join(path), onerror=onerror):
yield (dirpath[prefixlen:], dirs, files)
+ @contextlib.contextmanager
+ def backgroundclosing(self, ui, expectedcount=-1):
+ """Allow files to be closed asynchronously.
+
+ When this context manager is active, ``backgroundclose`` can be passed
+ to ``__call__``/``open`` to result in the file possibly being closed
+ asynchronously, on a background thread.
+ """
+ # This is an arbitrary restriction and could be changed if we ever
+ # have a use case.
+ vfs = getattr(self, 'vfs', self)
+ if getattr(vfs, '_backgroundfilecloser', None):
+ raise error.Abort('can only have 1 active background file closer')
+
+ with backgroundfilecloser(ui, expectedcount=expectedcount) as bfc:
+ try:
+ vfs._backgroundfilecloser = bfc
+ yield bfc
+ finally:
+ vfs._backgroundfilecloser = None
+
class vfs(abstractvfs):
'''Operate files relative to a base directory
This class is used to hide the details of COW semantics and
@@ -477,14 +502,27 @@ class vfs(abstractvfs):
return
os.chmod(name, self.createmode & 0o666)
def __call__(self, path, mode="r", text=False, atomictemp=False,
- notindexed=False):
+ notindexed=False, backgroundclose=False):
'''Open ``path`` file, which is relative to vfs root.
Newly created directories are marked as "not to be indexed by
the content indexing service", if ``notindexed`` is specified
for "write" mode access.
+
+ If ``backgroundclose`` is passed, the file may be closed asynchronously.
+ It can only be used if the ``self.backgroundclosing()`` context manager
+ is active. This should only be specified if the following criteria hold:
+
+ 1. There is a potential for writing thousands of files. Unless you
+ are writing thousands of files, the performance benefits of
+ asynchronously closing files is not realized.
+ 2. Files are opened exactly once for the ``backgroundclosing``
+ active duration and are therefore free of race conditions between
+ closing a file on a background thread and reopening it. (If the
+ file were opened multiple times, there could be unflushed data
+ because the original file handle hasn't been flushed/closed yet.)
'''
if self._audit:
r = util.checkosfilename(path)
if r:
@@ -527,8 +565,16 @@ class vfs(abstractvfs):
util.rename(util.mktempcopy(f), f)
fp = util.posixfile(f, mode)
if nlink == 0:
self._fixfilemode(f)
+
+ if backgroundclose:
+ if not self._backgroundfilecloser:
+ raise error.Abort('backgroundclose can only be used when a '
+ 'backgroundclosing context manager is active')
+
+ fp = delayclosedfile(fp, self._backgroundfilecloser)
+
return fp
def symlink(self, src, dst):
self.audit(dst)
@@ -1213,4 +1259,124 @@ def gddeltaconfig(ui):
"""helper function to know if incoming delta should be optimised
"""
# experimental config: format.generaldelta
return ui.configbool('format', 'generaldelta', False)
+
+class delayclosedfile(object):
+ """Proxy for a file object whose close is delayed.
+
+ Do not instantiate outside of the vfs layer.
+ """
+
+ def __init__(self, fh, closer):
+ object.__setattr__(self, '_origfh', fh)
+ object.__setattr__(self, '_closer', closer)
+
+ def __getattr__(self, attr):
+ return getattr(self._origfh, attr)
+
+ def __setattr__(self, attr, value):
+ return setattr(self._origfh, attr, value)
+
+ def __delattr__(self, attr):
+ return delattr(self._origfh, attr)
+
+ def __enter__(self):
+ return self._origfh.__enter__()
+
+ def __exit__(self, exc_type, exc_value, exc_tb):
+ self._closer.close(self._origfh)
+
+ def close(self):
+ self._closer.close(self._origfh)
+
+class backgroundfilecloser(object):
+ """Coordinates background closing of file handles on multiple threads."""
+ def __init__(self, ui, expectedcount=-1):
+ self._running = False
+ self._entered = False
+ self._threads = []
+ self._threadexception = None
+
+ # Only Windows/NTFS has slow file closing. So only enable by default
+ # on that platform. But allow to be enabled elsewhere for testing.
+ defaultenabled = os.name == 'nt'
+ enabled = ui.configbool('worker', 'backgroundclose', defaultenabled)
+
+ if not enabled:
+ return
+
+ # There is overhead to starting and stopping the background threads.
+ # Don't do background processing unless the file count is large enough
+ # to justify it.
+ minfilecount = ui.configint('worker', 'backgroundcloseminfilecount',
+ 2048)
+ # FUTURE dynamically start background threads after minfilecount closes.
+ # (We don't currently have any callers that don't know their file count)
+ if expectedcount > 0 and expectedcount < minfilecount:
+ return
+
+ # Windows defaults to a limit of 512 open files. A buffer of 128
+ # should give us enough headway.
+ maxqueue = ui.configint('worker', 'backgroundclosemaxqueue', 384)
+ threadcount = ui.configint('worker', 'backgroundclosethreadcount', 4)
+
+ ui.debug('starting %d threads for background file closing\n' %
+ threadcount)
+
+ self._queue = Queue.Queue(maxsize=maxqueue)
+ self._running = True
+
+ for i in range(threadcount):
+ t = threading.Thread(target=self._worker, name='backgroundcloser')
+ self._threads.append(t)
+ t.start()
+
+ def __enter__(self):
+ self._entered = True
+ return self
+
+ def __exit__(self, exc_type, exc_value, exc_tb):
+ self._running = False
+
+ # Wait for threads to finish closing so open files don't linger for
+ # longer than lifetime of context manager.
+ for t in self._threads:
+ t.join()
+
+ def _worker(self):
+ """Main routine for worker thread."""
+ while True:
+ try:
+ fh = self._queue.get(block=True, timeout=0.100)
+ # Need to catch or the thread will terminate and
+ # we could orphan file descriptors.
+ try:
+ fh.close()
+ except Exception as e:
+ # Stash so can re-raise from main thread later.
+ self._threadexception = e
+ except Queue.Empty:
+ if not self._running:
+ break
+
+ def close(self, fh):
+ """Schedule a file for closing."""
+ if not self._entered:
+ raise error.Abort('can only call close() when context manager '
+ 'active')
+
+ # If a background thread encountered an exception, raise now so we fail
+ # fast. Otherwise we may potentially go on for minutes until the error
+ # is acted on.
+ if self._threadexception:
+ e = self._threadexception
+ self._threadexception = None
+ raise e
+
+ # If we're not actively running, close synchronously.
+ if not self._running:
+ fh.close()
+ return
+
+ self._queue.put(fh, block=True, timeout=None)
+
More information about the Mercurial-devel
mailing list