[PATCH V3] dirstate: track otherparent files same as nonnormal

Durham Goode durham at fb.com
Thu Mar 9 01:36:47 UTC 2017


# HG changeset patch
# User Durham Goode <durham at fb.com>
# Date 1489023320 28800
#      Wed Mar 08 17:35:20 2017 -0800
# Node ID c1f6f1b0b0d5ca351761f82973860256ad63b16f
# Parent  150cd51257221fad5ccba5794e7a21837afba479
dirstate: track otherparent files same as nonnormal

Calling dirstate.setparents() is expensive in a large repo because it iterates
over every file in the dirstate. It does so to undo any merge state or
otherparent state files. Merge state files are already covered by
dirstate._nonnormalset, so we just need to track otherparent files in a similar
manner to avoid the full iteration here.

Fixing this shaves 20-25% off histedit in large repos.

I tested this by adding temporary debug logic to verify that the old files
processed in the loop matched the new files processed in the loop and running
the test suite.

diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -55,10 +55,16 @@ def _getfsnow(vfs):
 def nonnormalentries(dmap):
     '''Compute the nonnormal dirstate entries from the dmap'''
     try:
-        return parsers.nonnormalentries(dmap)
+        return parsers.nonnormalotherparententries(dmap)
     except AttributeError:
-        return set(fname for fname, e in dmap.iteritems()
-                   if e[0] != 'n' or e[3] == -1)
+        nonnorm = set()
+        otherparent = set()
+        for fname, e in dmap.iteritems():
+            if e[0] != 'n' or e[3] == -1:
+                nonnorm.add(fname)
+            if e[0] == 'n' and e[2] == -2:
+                otherparent.add(fname)
+        return nonnorm, otherparent
 
 class dirstate(object):
 
@@ -130,7 +136,15 @@ class dirstate(object):
 
     @propertycache
     def _nonnormalset(self):
-        return nonnormalentries(self._map)
+        nonnorm, otherparents = nonnormalentries(self._map)
+        self._otherparentset = otherparents
+        return nonnorm
+
+    @propertycache
+    def _otherparentset(self):
+        nonnorm, otherparents = nonnormalentries(self._map)
+        self._nonnormalset = nonnorm
+        return otherparents
 
     @propertycache
     def _filefoldmap(self):
@@ -340,7 +354,12 @@ class dirstate(object):
         self._pl = p1, p2
         copies = {}
         if oldp2 != nullid and p2 == nullid:
-            for f, s in self._map.iteritems():
+            candidatefiles = self._nonnormalset.union(self._otherparentset)
+            for f in candidatefiles:
+                s = self._map.get(f)
+                if s is None:
+                    continue
+
                 # Discard 'm' markers when moving away from a merge state
                 if s[0] == 'm':
                     if f in self._copymap:
@@ -426,7 +445,8 @@ class dirstate(object):
 
     def invalidate(self):
         for a in ("_map", "_copymap", "_filefoldmap", "_dirfoldmap", "_branch",
-                  "_pl", "_dirs", "_ignore", "_nonnormalset"):
+                  "_pl", "_dirs", "_ignore", "_nonnormalset",
+                  "_otherparentset"):
             if a in self.__dict__:
                 delattr(self, a)
         self._lastnormaltime = 0
@@ -478,6 +498,8 @@ class dirstate(object):
         self._map[f] = dirstatetuple(state, mode, size, mtime)
         if state != 'n' or mtime == -1:
             self._nonnormalset.add(f)
+        if size == -2:
+            self._otherparentset.add(f)
 
     def normal(self, f):
         '''Mark a file normal and clean.'''
@@ -552,6 +574,7 @@ class dirstate(object):
                 size = -1
             elif entry[0] == 'n' and entry[2] == -2: # other parent
                 size = -2
+                self._otherparentset.add(f)
         self._map[f] = dirstatetuple('r', 0, size, 0)
         self._nonnormalset.add(f)
         if size == 0 and f in self._copymap:
@@ -651,6 +674,7 @@ class dirstate(object):
     def clear(self):
         self._map = {}
         self._nonnormalset = set()
+        self._otherparentset = set()
         if "_dirs" in self.__dict__:
             delattr(self, "_dirs")
         self._copymap = {}
@@ -747,7 +771,7 @@ class dirstate(object):
                     break
 
         st.write(parsers.pack_dirstate(self._map, self._copymap, self._pl, now))
-        self._nonnormalset = nonnormalentries(self._map)
+        self._nonnormalset, self._otherparentset = nonnormalentries(self._map)
         st.close()
         self._lastnormaltime = 0
         self._dirty = self._dirtypl = False
diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -560,11 +560,11 @@ quit:
 }
 
 /*
- * Build a set of non-normal entries from the dirstate dmap
+ * Build a set of non-normal and other parent entries from the dirstate dmap
 */
-static PyObject *nonnormalentries(PyObject *self, PyObject *args)
-{
-	PyObject *dmap, *nonnset = NULL, *fname, *v;
+static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args) {
+	PyObject *dmap, *fname, *v;
+	PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
 	Py_ssize_t pos;
 
 	if (!PyArg_ParseTuple(args, "O!:nonnormalentries",
@@ -575,6 +575,10 @@ static PyObject *nonnormalentries(PyObje
 	if (nonnset == NULL)
 		goto bail;
 
+	otherpset = PySet_New(NULL);
+	if (otherpset == NULL)
+		goto bail;
+
 	pos = 0;
 	while (PyDict_Next(dmap, &pos, &fname, &v)) {
 		dirstateTupleObject *t;
@@ -585,19 +589,53 @@ static PyObject *nonnormalentries(PyObje
 		}
 		t = (dirstateTupleObject *)v;
 
+		if (t->state == 'n' && t->size == -2) {
+			if (PySet_Add(otherpset, fname) == -1) {
+				goto bail;
+			}
+		}
+
 		if (t->state == 'n' && t->mtime != -1)
 			continue;
 		if (PySet_Add(nonnset, fname) == -1)
 			goto bail;
 	}
 
-	return nonnset;
+	result = Py_BuildValue("(OO)", nonnset, otherpset);
+	if (result == NULL)
+		goto bail;
+	return result;
 bail:
 	Py_XDECREF(nonnset);
+	Py_XDECREF(otherpset);
+	Py_XDECREF(result);
 	return NULL;
 }
 
 /*
+ * Build a set of non-normal entries from the dirstate dmap
+*/
+static PyObject *nonnormalentries(PyObject *self, PyObject *args)
+{
+	PyObject *nonnset = NULL, *combined = NULL;
+
+	combined = nonnormalotherparententries(self, args);
+	if (!combined) {
+		return NULL;
+	}
+
+	nonnset = PyTuple_GetItem(combined, 0);
+	if (!nonnset) {
+		Py_DECREF(combined);
+		return NULL;
+	}
+
+	Py_INCREF(nonnset);
+	Py_DECREF(combined);
+	return nonnset;
+}
+
+/*
  * Efficiently pack a dirstate object into its on-disk format.
  */
 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
@@ -2816,6 +2854,9 @@ static PyMethodDef methods[] = {
 	{"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
 	{"nonnormalentries", nonnormalentries, METH_VARARGS,
 	"create a set containing non-normal entries of given dirstate\n"},
+	{"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
+	"create a set containing non-normal and other parent entries of given "
+	"dirstate\n"},
 	{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
 	{"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
 	{"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},


More information about the Mercurial-devel mailing list