[PATCH 1 of 3 RFC] scmutil: support background file closing

Gregory Szorc gregory.szorc at gmail.com
Sun Jan 3 03:15:51 UTC 2016


# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1451790907 28800
#      Sat Jan 02 19:15:07 2016 -0800
# Node ID db897fc9ce7714b016a5a87e239d2e4ff95839bd
# Parent  ca35bce424bebe46e53c3e701322719a959043b8
scmutil: support background file closing

Closing files that have been appended to is relatively slow on
Windows/NTFS. This makes several Mercurial operations slower on
Windows.

The workaround to this issue is conceptually simple: use multiple
threads for I/O. Unfortunately, Python doesn't scale well to multiple
threads because of the GIL. And, refactoring our code to use threads
everywhere would be a huge undertaking. So, we decide to tackle this
problem by starting small: establishing a thread pool for closing
files.

This patch establishes a mechanism for closing file handles on separate
threads. The coordinator object is basically a queue of file handles to
operate on and a thread pool consuming from the queue.

When files are opened through the VFS layer, the caller can specify
a file closer to use for closing file handles. If defined, it will
be used to close files instead of directly calling .close().

A proxy class for file handles has been added. We must use a proxy
because it isn't possible to modify __class__ on built-in types. This
adds some overhead. But as future patches will show, this overhead
is cancelled out by the benefit of closing file handles on background
threads.

diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py
--- a/mercurial/scmutil.py
+++ b/mercurial/scmutil.py
@@ -2,23 +2,25 @@
 #
 #  Copyright Matt Mackall <mpm at selenic.com>
 #
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
 from __future__ import absolute_import
 
+import Queue
 import errno
 import glob
 import os
 import re
 import shutil
 import stat
 import tempfile
+import threading
 
 from .i18n import _
 from .node import wdirrev
 from . import (
     encoding,
     error,
     match as matchmod,
     osutil,
@@ -473,22 +475,26 @@ class vfs(abstractvfs):
         return util.checkexec(self.base)
 
     def _fixfilemode(self, name):
         if self.createmode is None or not self._chmod:
             return
         os.chmod(name, self.createmode & 0o666)
 
     def __call__(self, path, mode="r", text=False, atomictemp=False,
-                 notindexed=False):
+                 notindexed=False, filecloser=None):
         '''Open ``path`` file, which is relative to vfs root.
 
         Newly created directories are marked as "not to be indexed by
         the content indexing service", if ``notindexed`` is specified
         for "write" mode access.
+
+        If ``filecloser`` is passed, it will be used to close the
+        opened file. ``filecloser`` only works if the file is being
+        used as a context manager.
         '''
         if self._audit:
             r = util.checkosfilename(path)
             if r:
                 raise error.Abort("%s: %r" % (r, path))
         self.audit(path)
         f = self.join(path)
 
@@ -523,16 +529,20 @@ class vfs(abstractvfs):
                 if nlink > 0:
                     if self._trustnlink is None:
                         self._trustnlink = nlink > 1 or util.checknlink(f)
                     if nlink > 1 or not self._trustnlink:
                         util.rename(util.mktempcopy(f), f)
         fp = util.posixfile(f, mode)
         if nlink == 0:
             self._fixfilemode(f)
+
+        if filecloser and os.name == 'nt':
+            fp = delayclosedfile(fp, filecloser)
+
         return fp
 
     def symlink(self, src, dst):
         self.audit(dst)
         linkname = self.join(dst)
         try:
             os.unlink(linkname)
         except OSError:
@@ -1189,8 +1199,82 @@ def gdinitconfig(ui):
     return (ui.configbool('format', 'generaldelta', False)
             or ui.configbool('format', 'usegeneraldelta', True))
 
 def gddeltaconfig(ui):
     """helper function to know if incoming delta should be optimised
     """
     # experimental config: format.generaldelta
     return ui.configbool('format', 'generaldelta', False)
+
+class delayclosedfile(object):
+    def __init__(self, fh, closer):
+        object.__setattr__(self, '_origfh', fh)
+        object.__setattr__(self, '_closer', closer)
+
+    def __getattr__(self, attr):
+        return getattr(self._origfh, attr)
+
+    def __setattr__(self, attr, value):
+        return setattr(self._origfh, attr, value)
+
+    def __delattr__(self, attr):
+        return delattr(self._origfh, attr)
+
+    def __enter__(self):
+        return self._origfh.__enter__()
+
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        self._closer.close(self._origfh)
+
+class backgroundfilecloser(object):
+    """Coordinates background closing of file handles on multiple threads."""
+    def __init__(self):
+        self._running = False
+        self._threads = []
+
+        # We currently only do things on Windows.
+        if os.name != 'nt':
+            return
+
+        # Windows defaults to a limit of 512 open files. A buffer of 128
+        # should give us enough headway.
+        self._queue = Queue.Queue(384)
+        self._running = True
+
+        for i in range(4):
+            t = threading.Thread(target=self._worker, name='backgroundcloser')
+            t.start()
+            self._threads.append(t)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        self.finalize()
+
+    def _worker(self):
+        """Main routine for worker thread."""
+        while True:
+            try:
+                fh = self._queue.get(block=True, timeout=0.100)
+                try:
+                    fh.close()
+                except Exception:
+                    # Need to catch or the thread will terminate and
+                    # we could orphan file descriptors.
+                    pass
+            except Queue.Empty:
+                if not self._running:
+                    break
+
+    def close(self, fh):
+        """Schedule a file for closing."""
+        # If we're not actively running, close synchronously.
+        if not self._running:
+            fh.close()
+            return
+
+        self._queue.put(fh, block=True, timeout=None)
+
+    def finalize(self):
+        """Finish performing background tasks."""
+        self._running = False


More information about the Mercurial-devel mailing list