[PATCH V3] dirstate: track otherparent files same as nonnormal

Augie Fackler raf at durin42.com
Thu Mar 9 19:10:40 EST 2017


On Wed, Mar 08, 2017 at 05:36:47PM -0800, Durham Goode wrote:
> # HG changeset patch
> # User Durham Goode <durham at fb.com>
> # Date 1489023320 28800
> #      Wed Mar 08 17:35:20 2017 -0800
> # Node ID c1f6f1b0b0d5ca351761f82973860256ad63b16f
> # Parent  150cd51257221fad5ccba5794e7a21837afba479
> dirstate: track otherparent files same as nonnormal

Queued, thanks.

>
> Calling dirstate.setparents() is expensive in a large repo because it iterates
> over every file in the dirstate. It does so to undo any merge state or
> otherparent state files. Merge state files are already covered by
> dirstate._nonnormalset, so we just need to track otherparent files in a similar
> manner to avoid the full iteration here.
>
> Fixing this shaves 20-25% off histedit in large repos.
>
> I tested this by adding temporary debug logic to verify that the old files
> processed in the loop matched the new files processed in the loop and running
> the test suite.
>
> diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
> --- a/mercurial/dirstate.py
> +++ b/mercurial/dirstate.py
> @@ -55,10 +55,16 @@ def _getfsnow(vfs):
>  def nonnormalentries(dmap):
>      '''Compute the nonnormal dirstate entries from the dmap'''
>      try:
> -        return parsers.nonnormalentries(dmap)
> +        return parsers.nonnormalotherparententries(dmap)
>      except AttributeError:
> -        return set(fname for fname, e in dmap.iteritems()
> -                   if e[0] != 'n' or e[3] == -1)
> +        nonnorm = set()
> +        otherparent = set()
> +        for fname, e in dmap.iteritems():
> +            if e[0] != 'n' or e[3] == -1:
> +                nonnorm.add(fname)
> +            if e[0] == 'n' and e[2] == -2:
> +                otherparent.add(fname)
> +        return nonnorm, otherparent
>
>  class dirstate(object):
>
> @@ -130,7 +136,15 @@ class dirstate(object):
>
>      @propertycache
>      def _nonnormalset(self):
> -        return nonnormalentries(self._map)
> +        nonnorm, otherparents = nonnormalentries(self._map)
> +        self._otherparentset = otherparents
> +        return nonnorm
> +
> +    @propertycache
> +    def _otherparentset(self):
> +        nonnorm, otherparents = nonnormalentries(self._map)
> +        self._nonnormalset = nonnorm
> +        return otherparents
>
>      @propertycache
>      def _filefoldmap(self):
> @@ -340,7 +354,12 @@ class dirstate(object):
>          self._pl = p1, p2
>          copies = {}
>          if oldp2 != nullid and p2 == nullid:
> -            for f, s in self._map.iteritems():
> +            candidatefiles = self._nonnormalset.union(self._otherparentset)
> +            for f in candidatefiles:
> +                s = self._map.get(f)
> +                if s is None:
> +                    continue
> +
>                  # Discard 'm' markers when moving away from a merge state
>                  if s[0] == 'm':
>                      if f in self._copymap:
> @@ -426,7 +445,8 @@ class dirstate(object):
>
>      def invalidate(self):
>          for a in ("_map", "_copymap", "_filefoldmap", "_dirfoldmap", "_branch",
> -                  "_pl", "_dirs", "_ignore", "_nonnormalset"):
> +                  "_pl", "_dirs", "_ignore", "_nonnormalset",
> +                  "_otherparentset"):
>              if a in self.__dict__:
>                  delattr(self, a)
>          self._lastnormaltime = 0
> @@ -478,6 +498,8 @@ class dirstate(object):
>          self._map[f] = dirstatetuple(state, mode, size, mtime)
>          if state != 'n' or mtime == -1:
>              self._nonnormalset.add(f)
> +        if size == -2:
> +            self._otherparentset.add(f)
>
>      def normal(self, f):
>          '''Mark a file normal and clean.'''
> @@ -552,6 +574,7 @@ class dirstate(object):
>                  size = -1
>              elif entry[0] == 'n' and entry[2] == -2: # other parent
>                  size = -2
> +                self._otherparentset.add(f)
>          self._map[f] = dirstatetuple('r', 0, size, 0)
>          self._nonnormalset.add(f)
>          if size == 0 and f in self._copymap:
> @@ -651,6 +674,7 @@ class dirstate(object):
>      def clear(self):
>          self._map = {}
>          self._nonnormalset = set()
> +        self._otherparentset = set()
>          if "_dirs" in self.__dict__:
>              delattr(self, "_dirs")
>          self._copymap = {}
> @@ -747,7 +771,7 @@ class dirstate(object):
>                      break
>
>          st.write(parsers.pack_dirstate(self._map, self._copymap, self._pl, now))
> -        self._nonnormalset = nonnormalentries(self._map)
> +        self._nonnormalset, self._otherparentset = nonnormalentries(self._map)
>          st.close()
>          self._lastnormaltime = 0
>          self._dirty = self._dirtypl = False
> diff --git a/mercurial/parsers.c b/mercurial/parsers.c
> --- a/mercurial/parsers.c
> +++ b/mercurial/parsers.c
> @@ -560,11 +560,11 @@ quit:
>  }
>
>  /*
> - * Build a set of non-normal entries from the dirstate dmap
> + * Build a set of non-normal and other parent entries from the dirstate dmap
>  */
> -static PyObject *nonnormalentries(PyObject *self, PyObject *args)
> -{
> -	PyObject *dmap, *nonnset = NULL, *fname, *v;
> +static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args) {
> +	PyObject *dmap, *fname, *v;
> +	PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
>       Py_ssize_t pos;
>
>       if (!PyArg_ParseTuple(args, "O!:nonnormalentries",
> @@ -575,6 +575,10 @@ static PyObject *nonnormalentries(PyObje
>       if (nonnset == NULL)
>               goto bail;
>
> +	otherpset = PySet_New(NULL);
> +	if (otherpset == NULL)
> +		goto bail;
> +
>       pos = 0;
>       while (PyDict_Next(dmap, &pos, &fname, &v)) {
>               dirstateTupleObject *t;
> @@ -585,19 +589,53 @@ static PyObject *nonnormalentries(PyObje
>               }
>               t = (dirstateTupleObject *)v;
>
> +		if (t->state == 'n' && t->size == -2) {
> +			if (PySet_Add(otherpset, fname) == -1) {
> +				goto bail;
> +			}
> +		}
> +
>               if (t->state == 'n' && t->mtime != -1)
>                       continue;
>               if (PySet_Add(nonnset, fname) == -1)
>                       goto bail;
>       }
>
> -	return nonnset;
> +	result = Py_BuildValue("(OO)", nonnset, otherpset);
> +	if (result == NULL)
> +		goto bail;
> +	return result;
>  bail:
>       Py_XDECREF(nonnset);
> +	Py_XDECREF(otherpset);
> +	Py_XDECREF(result);
>       return NULL;
>  }
>
>  /*
> + * Build a set of non-normal entries from the dirstate dmap
> +*/
> +static PyObject *nonnormalentries(PyObject *self, PyObject *args)
> +{
> +	PyObject *nonnset = NULL, *combined = NULL;
> +
> +	combined = nonnormalotherparententries(self, args);
> +	if (!combined) {
> +		return NULL;
> +	}
> +
> +	nonnset = PyTuple_GetItem(combined, 0);
> +	if (!nonnset) {
> +		Py_DECREF(combined);
> +		return NULL;
> +	}
> +
> +	Py_INCREF(nonnset);
> +	Py_DECREF(combined);
> +	return nonnset;
> +}
> +
> +/*
>   * Efficiently pack a dirstate object into its on-disk format.
>   */
>  static PyObject *pack_dirstate(PyObject *self, PyObject *args)
> @@ -2816,6 +2854,9 @@ static PyMethodDef methods[] = {
>       {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
>       {"nonnormalentries", nonnormalentries, METH_VARARGS,
>       "create a set containing non-normal entries of given dirstate\n"},
> +	{"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
> +	"create a set containing non-normal and other parent entries of given "
> +	"dirstate\n"},
>       {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
>       {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
>       {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


More information about the Mercurial-devel mailing list