[PATCH V2] dirs: remove mutable string optimization at all

Yuya Nishihara yuya at tcha.org
Mon Oct 14 03:54:04 UTC 2019


# HG changeset patch
# User Yuya Nishihara <yuya at tcha.org>
# Date 1570964769 -32400
#      Sun Oct 13 20:06:09 2019 +0900
# Node ID dd9f93e8af83be1bfd7082026d28f774917f0593
# Parent  feaea7fe888383bcb1d28e69df325e41ef68ece9
dirs: remove mutable string optimization at all

As far as I can see, the optimization trick has been dead since 42e89b87ca79
"dirs: speed up by storing number of direct children per dir". After
42e89b87ca79, the key variable is cleared to NULL at each iteration.

diff --git a/mercurial/cext/dirs.c b/mercurial/cext/dirs.c
--- a/mercurial/cext/dirs.c
+++ b/mercurial/cext/dirs.c
@@ -26,9 +26,6 @@
  *
  * We modify Python integers for refcounting, but those integers are
  * never visible to Python code.
- *
- * We mutate strings in-place, but leave them immutable once they can
- * be seen by Python code.
  */
 typedef struct {
 	PyObject_HEAD
@@ -63,46 +60,18 @@ static int _addpath(PyObject *dirs, PyOb
 	* "protocol" such as mutating immutable objects. But since we only
 	* mutate objects created in this function or in other well-defined
 	* locations, the references are known so these violations should go
-	* unnoticed. The code for adjusting the length of a PyBytesObject is
-	* essentially a minimal version of _PyBytes_Resize. */
+	* unnoticed. */
 	while ((pos = _finddir(cpath, pos - 1)) != -1) {
 		PyObject *val;
 
-		if (pos < 2) {
-			key = PyBytes_FromStringAndSize(cpath, pos);
-			if (key == NULL)
-				goto bail;
-		} else {
-			/* It's likely that every prefix already has an entry
-			   in our dict. Try to avoid allocating and
-			   deallocating a string for each prefix we check. */
-			if (key != NULL)
-				((PyBytesObject *)key)->ob_shash = -1;
-			else {
-				/* We know pos >= 2, so we won't get a small
-				 * shared string. */
-				key = PyBytes_FromStringAndSize(cpath, pos);
-				if (key == NULL)
-					goto bail;
-			}
-			/* Py_SIZE(o) refers to the ob_size member of
-			 * the struct. Yes, assigning to what looks
-			 * like a function seems wrong. */
-			Py_SIZE(key) = pos;
-			((PyBytesObject *)key)->ob_sval[pos] = '\0';
-		}
+		key = PyBytes_FromStringAndSize(cpath, pos);
+		if (key == NULL)
+			goto bail;
 
 		val = PyDict_GetItem(dirs, key);
 		if (val != NULL) {
 			PYLONG_VALUE(val) += 1;
-			if (pos < 2) {
-				/* This was a short string, so we
-				 * probably got a small shared string
-				 * we can't mutate on the next loop
-				 * iteration. Clear it.
-				 */
-				Py_CLEAR(key);
-			}
+			Py_CLEAR(key);
 			break;
 		}
 


More information about the Mercurial-devel mailing list