[PATCH V2] parsers: fix buffer overflow by invalid parent revision read from revlog

Yuya Nishihara yuya at tcha.org
Fri Jul 17 15:45:44 UTC 2015


# HG changeset patch
# User Yuya Nishihara <yuya at tcha.org>
# Date 1437057368 -32400
#      Thu Jul 16 23:36:08 2015 +0900
# Node ID 9ab54b991af71295e6ba10c493d0ba334bc553cb
# Parent  c8c61831756e1f0e5c969ae2fd694ebc1ea4f7c9
parsers: fix buffer overflow by invalid parent revision read from revlog

If revlog file is corrupted, it can have parent pointing to invalid revision.
So we should validate it before updating nothead[], phases[], seen[], etc.
Otherwise it would cause segfault at best.

We could use "rev" instead of "maxrev" as upper bound, but I think the explicit
"maxrev" can clarify that we just want to avoid possible buffer overflow
vulnerability.

diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -752,8 +752,8 @@ static const char *index_deref(indexObje
 	return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
 }
 
-static inline void index_get_parents(indexObject *self, Py_ssize_t rev,
-				int *ps)
+static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
+				    int *ps, int maxrev)
 {
 	if (rev >= self->length - 1) {
 		PyObject *tuple = PyList_GET_ITEM(self->added,
@@ -765,6 +765,13 @@ static inline void index_get_parents(ind
 		ps[0] = getbe32(data + 24);
 		ps[1] = getbe32(data + 28);
 	}
+	/* If index file is corrupted, ps[] may point to invalid revisions. So
+	 * there is a risk of buffer overflow to trust them unconditionally. */
+	if (ps[0] > maxrev || ps[1] > maxrev) {
+		PyErr_SetString(PyExc_ValueError, "parent out of range");
+		return -1;
+	}
+	return 0;
 }
 
 
@@ -1149,7 +1156,8 @@ static PyObject *compute_phases_map_sets
 	if (minrevallphases != -1) {
 		int parents[2];
 		for (i = minrevallphases; i < len; i++) {
-			index_get_parents(self, i, parents);
+			if (index_get_parents(self, i, parents, len - 1) < 0)
+				goto release_phasesetlist;
 			set_phase_from_parents(phases, parents[0], parents[1], i);
 		}
 	}
@@ -1248,7 +1256,8 @@ static PyObject *index_headrevs(indexObj
 			continue;
 		}
 
-		index_get_parents(self, i, parents);
+		if (index_get_parents(self, i, parents, len - 1) < 0)
+			goto bail;
 		for (j = 0; j < 2; j++) {
 			if (parents[j] >= 0)
 				nothead[parents[j]] = 1;
@@ -1716,7 +1725,8 @@ static PyObject *find_gca_candidates(ind
 				}
 			}
 		}
-		index_get_parents(self, v, parents);
+		if (index_get_parents(self, v, parents, maxrev) < 0)
+			goto bail;
 
 		for (i = 0; i < 2; i++) {
 			int p = parents[i];
@@ -1813,7 +1823,8 @@ static PyObject *find_deepest(indexObjec
 			continue;
 
 		sv = seen[v];
-		index_get_parents(self, v, parents);
+		if (index_get_parents(self, v, parents, maxrev) < 0)
+			goto bail;
 
 		for (i = 0; i < 2; i++) {
 			int p = parents[i];
diff --git a/tests/test-parseindex.t b/tests/test-parseindex.t
--- a/tests/test-parseindex.t
+++ b/tests/test-parseindex.t
@@ -59,3 +59,62 @@ We approximate that by reducing the read
   26333235a41c
 
   $ cd ..
+
+Test corrupted p1/p2 fields that could cause SEGV at parsers.c:
+
+  $ mkdir invalidparent
+  $ cd invalidparent
+
+  $ hg clone --pull -q --config phases.publish=False ../a limit
+  $ hg clone --pull -q --config phases.publish=False ../a segv
+  $ rm -R limit/.hg/cache segv/.hg/cache
+
+  $ python <<EOF
+  > data = open("limit/.hg/store/00changelog.i", "rb").read()
+  > for n, p in [('limit', '\0\0\0\x02'), ('segv', '\0\x01\0\0')]:
+  >     # corrupt p1 at rev0 and p2 at rev1
+  >     d = data[:24] + p + data[28:127 + 28] + p + data[127 + 32:]
+  >     open(n + "/.hg/store/00changelog.i", "wb").write(d)
+  > EOF
+
+  $ hg debugindex -f1 limit/.hg/store/00changelog.i
+     rev flag   offset   length     size   base   link     p1     p2       nodeid
+       0 0000        0       63       62      0      0      2     -1 7c31755bf9b5
+       1 0000       63       66       65      1      1      0      2 26333235a41c
+  $ hg debugindex -f1 segv/.hg/store/00changelog.i
+     rev flag   offset   length     size   base   link     p1     p2       nodeid
+       0 0000        0       63       62      0      0  65536     -1 7c31755bf9b5
+       1 0000       63       66       65      1      1      0  65536 26333235a41c
+
+  $ cat <<EOF > test.py
+  > import sys
+  > from mercurial import changelog, scmutil
+  > cl = changelog.changelog(scmutil.vfs(sys.argv[1]))
+  > n0, n1 = cl.node(0), cl.node(1)
+  > ops = [
+  >     ('compute_phases_map_sets', lambda: cl.computephases([[0], []])),
+  >     ('index_headrevs', lambda: cl.headrevs()),
+  >     ('find_gca_candidates', lambda: cl.commonancestorsheads(n0, n1)),
+  >     ('find_deepest', lambda: cl.ancestor(n0, n1)),
+  >     ]
+  > for l, f in ops:
+  >     print l + ':',
+  >     try:
+  >         f()
+  >         print 'uncaught buffer overflow?'
+  >     except ValueError, inst:
+  >         print inst
+  > EOF
+
+  $ python test.py limit/.hg/store
+  compute_phases_map_sets: parent out of range
+  index_headrevs: parent out of range
+  find_gca_candidates: parent out of range
+  find_deepest: parent out of range
+  $ python test.py segv/.hg/store
+  compute_phases_map_sets: parent out of range
+  index_headrevs: parent out of range
+  find_gca_candidates: parent out of range
+  find_deepest: parent out of range
+
+  $ cd ..


More information about the Mercurial-devel mailing list