[PATCH 1 of 3 V2] dirstate: disable gc while parsing the dirstate

Siddharth Agarwal sid0 at fb.com
Sun Feb 10 10:57:34 CST 2013

# HG changeset patch
# User Siddharth Agarwal <sid0 at fb.com>
# Date 1360513394 0
# Node ID f068418a1e6c8ece794a6f0ef7c1c1008eca7a44
# Parent  6fc7952d6819acbc212cdde688edc256c9bd4b2e
dirstate: disable gc while parsing the dirstate

This prevents a performance regression an upcoming patch would otherwise
introduce because it indirectly delays parsing the dirstate a bit.

diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -9,7 +9,7 @@ import errno
 from node import nullid
 from i18n import _
 import scmutil, util, ignore, osutil, parsers, encoding
-import os, stat, errno
+import os, stat, errno, gc
 propertycache = util.propertycache
 filecache = scmutil.filecache
@@ -285,7 +285,23 @@ class dirstate(object):
         if not st:
-        p = parsers.parse_dirstate(self._map, self._copymap, st)
+        # Python's garbage collector triggers a GC each time a certain number
+        # of container objects (the number being defined by
+        # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
+        # for each file in the dirstate. The C version then immediately marks
+        # them as not to be tracked by the collector. However, this has no
+        # effect on when GCs are triggered, only on what objects the GC looks
+        # into. This means that O(number of files) GCs are unavoidable.
+        # Depending on when in the process's lifetime the dirstate is parsed,
+        # this can get very expensive. As a workaround, disable GC while
+        # parsing the dirstate.
+        gcenabled = gc.isenabled()
+        gc.disable()
+        try:
+            p = parsers.parse_dirstate(self._map, self._copymap, st)
+        finally:
+            if gcenabled:
+                gc.enable()
         if not self._dirtypl:
             self._pl = p

More information about the Mercurial-devel mailing list