[PATCH] dirstate: track non-normal files separately

Laurent Charignon lcharignon at fb.com
Wed Dec 9 23:56:32 UTC 2015


# HG changeset patch
# User Laurent Charignon <lcharignon at fb.com>
# Date 1449705375 28800
#      Wed Dec 09 15:56:15 2015 -0800
# Branch stable
# Node ID 04249d53c997f653ae3488992a49f4df09c6a8b6
# Parent  f5e8cb813a4d5c0665c7e144d96810b4763c42d1
dirstate: track non-normal files separately

This patch introduces a new property of the dirstate "_nonnormal". It is a dict
containing the non-normal files, populated when we parse the dirstate.

Thanks to this property we can shorten the time it takes to run `hg status`
as we don't need to walk through all the files to find the non-normal files.
With another patch on hgwatchman to use that property, on our big repos (the
dirstate takes about ~100Mb on disk), hg status goes from 0.9s to 0.6 seconds
(10 run average) with this patch.

diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -114,6 +114,11 @@ class dirstate(object):
         return self._map
 
     @propertycache
+    def nonnormalmap(self):
+        self._read()
+        return self._nonnormalmap
+
+    @propertycache
     def _copymap(self):
         self._read()
         return self._copymap
@@ -373,6 +378,7 @@ class dirstate(object):
     def _read(self):
         self._map = {}
         self._copymap = {}
+        self._nonnormalmap = {}
         try:
             fp = self._opendirstatefile()
             try:
@@ -412,7 +418,7 @@ class dirstate(object):
         #
         # (we cannot decorate the function directly since it is in a C module)
         parse_dirstate = util.nogc(parsers.parse_dirstate)
-        p = parse_dirstate(self._map, self._copymap, st)
+        p = parse_dirstate(self._map, self._copymap, self._nonnormalmap, st)
         if not self._dirtypl:
             self._pl = p
 
@@ -467,6 +473,8 @@ class dirstate(object):
             self._dirs.addpath(f)
         self._dirty = True
         self._map[f] = dirstatetuple(state, mode, size, mtime)
+        if state != 'n' or mtime == - 1:
+            self._nonnormalmap[f] = dirstatetuple(state, mode, size, mtime)
 
     def normal(self, f):
         '''Mark a file normal and clean.'''
@@ -503,6 +511,8 @@ class dirstate(object):
         self._addpath(f, 'n', 0, -1, -1)
         if f in self._copymap:
             del self._copymap[f]
+        if f in self._nonnormalmap:
+            del self._nonnormalmap[f]
 
     def otherparent(self, f):
         '''Mark as coming from the other parent, always dirty.'''
@@ -538,6 +548,7 @@ class dirstate(object):
             elif entry[0] == 'n' and entry[2] == -2: # other parent
                 size = -2
         self._map[f] = dirstatetuple('r', 0, size, 0)
+        self._nonnormalmap[f] = dirstatetuple('r', 0, size, 0)
         if size == 0 and f in self._copymap:
             del self._copymap[f]
 
@@ -553,6 +564,8 @@ class dirstate(object):
             self._dirty = True
             self._droppath(f)
             del self._map[f]
+        if f in self._nonnormalmap:
+            del self._nonnormalmap[f]
 
     def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
         if exists is None:
@@ -633,6 +646,7 @@ class dirstate(object):
         if "_dirs" in self.__dict__:
             delattr(self, "_dirs")
         self._copymap = {}
+        self._nonnormalmap = {}
         self._pl = [nullid, nullid]
         self._lastnormaltime = 0
         self._dirty = True
diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -465,18 +465,26 @@ PyTypeObject dirstateTupleType = {
 
 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
 {
-	PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
-	PyObject *fname = NULL, *cname = NULL, *entry = NULL;
+	PyObject *dmap, *cmap, *nonnmap, *parents = NULL, *ret = NULL;
+	PyObject *fname = NULL, *cname = NULL, *entry = NULL, *statestring = NULL;
 	char state, *cur, *str, *cpos;
 	int mode, size, mtime;
 	unsigned int flen, len, pos = 40;
 	int readlen;
 
-	if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
+	if (!PyArg_ParseTuple(args, "O!O!O!s#:parse_dirstate",
 			      &PyDict_Type, &dmap,
 			      &PyDict_Type, &cmap,
-			      &str, &readlen))
-		goto quit;
+			      &PyDict_Type, &nonnmap,
+			      &str, &readlen)) {
+		/* It might be the old call signature */
+		nonnmap = NULL;
+		if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
+				      &PyDict_Type, &dmap,
+				      &PyDict_Type, &cmap,
+				      &str, &readlen))
+			goto quit;
+	}
 
 	len = readlen;
 
@@ -514,11 +522,19 @@ static PyObject *parse_dirstate(PyObject
 
 		entry = (PyObject *)make_dirstate_tuple(state, mode, size,
 							mtime);
+		statestring = PyString_FromFormat("%c", state);
 		cpos = memchr(cur, 0, flen);
 		if (cpos) {
 			fname = PyBytes_FromStringAndSize(cur, cpos - cur);
 			cname = PyBytes_FromStringAndSize(cpos + 1,
 							   flen - (cpos - cur) - 1);
+			if (fname && nonnmap) {
+			  if (state != 'n' && PyDict_SetItem(nonnmap, fname, statestring) == -1)
+						goto quit;
+			  if (state == 'n' && mtime == -1
+						&& PyDict_SetItem(nonnmap, fname, statestring) == -1)
+						goto quit;
+			}
 			if (!fname || !cname ||
 			    PyDict_SetItem(cmap, fname, cname) == -1 ||
 			    PyDict_SetItem(dmap, fname, entry) == -1)
@@ -526,13 +542,20 @@ static PyObject *parse_dirstate(PyObject
 			Py_DECREF(cname);
 		} else {
 			fname = PyBytes_FromStringAndSize(cur, flen);
-			if (!fname ||
-			    PyDict_SetItem(dmap, fname, entry) == -1)
+			if (fname && nonnmap) {
+			  if (state != 'n' && PyDict_SetItem(nonnmap, fname, statestring) == -1)
+						goto quit;
+			  if (state == 'n' && mtime == -1
+						&& PyDict_SetItem(nonnmap, fname, statestring) == -1)
+						goto quit;
+			}
+			if (!fname || PyDict_SetItem(dmap, fname, entry) == -1)
 				goto quit;
 		}
 		Py_DECREF(fname);
 		Py_DECREF(entry);
-		fname = cname = entry = NULL;
+		Py_DECREF(statestring);
+		fname = cname = entry = statestring = NULL;
 		pos += flen;
 	}
 
@@ -542,6 +565,7 @@ quit:
 	Py_XDECREF(fname);
 	Py_XDECREF(cname);
 	Py_XDECREF(entry);
+	Py_XDECREF(statestring);
 	Py_XDECREF(parents);
 	return ret;
 }
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -63,7 +63,7 @@ def parse_index2(data, inline):
 
     return index, cache
 
-def parse_dirstate(dmap, copymap, st):
+def parse_dirstate(dmap, copymap, nonnormalmap, st):
     parents = [st[:20], st[20: 40]]
     # dereference fields so they will be local in loop
     format = ">cllll"
@@ -81,6 +81,8 @@ def parse_dirstate(dmap, copymap, st):
             f, c = f.split('\0')
             copymap[f] = c
         dmap[f] = e[:4]
+        if e[0] != 'n' or e[3] == -1:
+            nonnormalmap[f] = e[0]
     return parents
 
 def pack_dirstate(dmap, copymap, pl, now):


More information about the Mercurial-devel mailing list