[PATCH 1 of 5] platform: create a file closing utility

Gregory Szorc gregory.szorc at gmail.com
Wed Sep 30 05:36:27 UTC 2015


# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1443551935 25200
#      Tue Sep 29 11:38:55 2015 -0700
# Node ID f578fedbeb3ce63fff0e3fdbe7b2424e9cdd399a
# Parent  46af0adb5c375cc51ce0d29cbdcd8ba843a33425
platform: create a file closing utility

Closing file handles that have been written to on Windows is much slower
than compared to other platforms. So slow, in fact, that waiting for
file closing adds significant wall time to operations (such as
unbundling) that write thousands or even hundreds of thousands of files
(as experienced when cloning large repositories like mozilla-central).

This patch adds a platform-specific interface for performing bulk file
close operations. The implementation on POSIX is essentially a giant
wrapper around `fh.close()` because file closing on POISX platforms is
efficient. On Windows, we start a background thread that closes file
handles as they are added to a queue.

Since there are no consumers of this new class, there is no change of
behavior in this patch.

diff --git a/mercurial/posix.py b/mercurial/posix.py
--- a/mercurial/posix.py
+++ b/mercurial/posix.py
@@ -646,4 +646,42 @@ def readpipe(pipe):
 
         return ''.join(chunks)
     finally:
         fcntl.fcntl(pipe, fcntl.F_SETFL, oldflags)
+
+class filecloser(object):
+    """Manages closing of open file handles.
+
+    This is intended to be used when writing to several files in rapid
+    succession. It is not needed when opening files for reading.
+
+    This doesn't do anything on POSIX because it isn't necessary. See the
+    Windows implementation. It is, however, a good reference for the API.
+    """
+    def __init__(self, ui):
+        pass
+
+    def close(self, fh):
+        """Schedule a file handle for closing.
+
+        Performs synchronous close on POSIX.
+        """
+        fh.close()
+
+    def verifystate(self):
+        """Ensures the instance is in a good state.
+
+        This should be called before opening files that will be closed
+        by this class to ensure we don't overwhelm system resources.
+
+        It can also raise if something is wrong.
+
+        It does nothing on POSIX because we close immediately.
+        """
+
+    def cleanup(self):
+        """Wait for all operations to complete.
+
+        This must be called for every instance that is created.
+
+        It does nothing on POSIX because there is nothing to clean up.
+        """
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -35,8 +35,9 @@ checklink = platform.checklink
 copymode = platform.copymode
 executablepath = platform.executablepath
 expandglobs = platform.expandglobs
 explainexit = platform.explainexit
+filecloser = platform.filecloser
 findexe = platform.findexe
 gethgcmd = platform.gethgcmd
 getuser = platform.getuser
 groupmembers = platform.groupmembers
diff --git a/mercurial/windows.py b/mercurial/windows.py
--- a/mercurial/windows.py
+++ b/mercurial/windows.py
@@ -7,8 +7,10 @@
 
 from i18n import _
 import osutil, encoding
 import errno, msvcrt, os, re, stat, sys, _winreg
+import time
+import threading
 
 import win32
 executablepath = win32.executablepath
 getuser = win32.getuser
@@ -458,4 +460,66 @@ def readpipe(pipe):
             break
         chunks.append(s)
 
     return ''.join(chunks)
+
+class filecloser(object):
+    """Closes file handles efficiently on Windows.
+
+    Closing file handles that have been written to is relatively slow on
+    Windows compared to other I/O operations. Measurements revealed most
+    I/O operations (including writes) to be ~1us. Closes, by contrast, take
+    ~1ms - ~1000x slower.
+
+    This class performs file closing on a separate Python thread so file
+    closing doesn't block the main thread.
+    """
+    def __init__(self, ui):
+        self._queue = []
+        self._error = None
+        self._alive = True
+        self._ev = threading.Event()
+        self._t = threading.Thread(target=self._processcloses,
+                                   name='filecloser')
+        self._t.start()
+
+        # Windows defaults to max of 512 open file descriptors. A buffer of
+        # 128 should be more than sufficient.
+        # experimental config: worker.closefileslimit
+        self._queuelimit = ui.configint('worker', 'closefilelimit', 384)
+
+    def close(self, fh):
+        self._queue.append(fh)
+        self._ev.set()
+
+    def verifystate(self):
+        # Raise an error encountered on background thread.
+        if self._error:
+            raise self._error
+
+        # Don't allow the queue to grow too long.
+        while len(self._queue) >= self._queuelimit:
+            # This will surrender the GIL. 100ms is a bit long. However,
+            # if we have lots of queued operations, they likely won't finish
+            # in 100ms since it takes ~1ms to close a file.
+            time.sleep(0.100)
+
+    def cleanup(self):
+        self._alive = False
+        # This arguably isn't necessary. But it should shave a few ms.
+        self._ev.set()
+        self._t.join()
+
+    def _processcloses(self):
+        """Main function of closing thread."""
+        while self._alive or self._queue:
+            if not self._queue:
+                self._ev.wait(0.100)
+                self._ev.clear()
+
+            while self._queue:
+                fh = self._queue.pop(0)
+                try:
+                    fh.close()
+                except Exception as e:
+                    self._error = e
+                    # Don't raise otherwise we'll leak handles.


More information about the Mercurial-devel mailing list