[PATCH] run-tests: lock uses of channels to avoid race condition

Fri Jan 22 04:34:41 UTC 2016

# HG changeset patch
# User Bryan O'Sullivan <bos at serpentine.com>
# Date 1453437023 28800
#      Thu Jan 21 20:30:23 2016 -0800
# Branch stable
# Node ID d8816e8a4e4c4956bf5749410612b5bff34bb10e
# Parent  0b752757a0c4f30ac755389014e2af9d2e0b4a3f
run-tests: lock uses of channels to avoid race condition

When running multiple tests at once, there's a huge race condition
that, for me, is 100% reproducible.  Laurent's commit 0b752757a0c4
tries to address the same problem, but doesn't correctly fix it.

1. A KeyboardInterrupt is raised to the main thread.

2. "channels = []" is invoked at the end of "TestSuite.run".

3. The KeyboardInterrupt propagates to each thread running the local
   function "job", and an attempt is made to assign to
   "channels[channel]" based on the now-wrong understanding that
   "channels" is non-empty.

The result is that every thread raises an IndexError.

With this commit, every access to "channels" is protected by a
lock, and in the "job" threads, checked for non-emptiness.

diff --git a/tests/run-tests.py b/tests/run-tests.py
--- a/tests/run-tests.py
+++ b/tests/run-tests.py
@@ -1517,39 +1517,48 @@ class TestSuite(unittest.TestSuite):
         running = 0
 
         channels = [""] * self._jobs
+        chanlock = threading.Lock()
 
         def job(test, result):
-            for n, v in enumerate(channels):
-                if not v:
-                    channel = n
-                    break
-            channels[channel] = "=" + test.name[5:].split(".")[0]
+            with chanlock:
+                for n, v in enumerate(channels):
+                    if not v:
+                        channel = n
+                        break
+                channels[channel] = "=" + test.name[5:].split(".")[0]
             try:
                 test(result)
-                channels[channel] = ''
                 done.put(None)
             except KeyboardInterrupt:
-                channels[channel] = ''
+                pass
             except: # re-raises
                 done.put(('!', test, 'run-test raised an error, see traceback'))
                 raise
+            with chanlock:
+                if channels:
+                    channels[channel] = ''
 
         def stat():
             count = 0
-            while channels:
-                d = '\n%03s  ' % count
-                for n, v in enumerate(channels):
-                    if v:
-                        d += v[0]
-                        channels[n] = v[1:] or '.'
-                    else:
+            while True:
+                with chanlock:
+                    if not channels:
+                        break
+                    d = '\n%03s  ' % count
+                    for n, v in enumerate(channels):
+                        if v:
+                            d += v[0]
+                            channels[n] = v[1:] or '.'
+                        else:
+                            d += ' '
                         d += ' '
-                    d += ' '
                 with iolock:
                     sys.stdout.write(d + '  ')
                     sys.stdout.flush()
                 for x in xrange(10):
-                    if channels:
+                    with chanlock:
+                        sleep = len(channels)
+                    if sleep:
                         time.sleep(.1)
                 count += 1
 
@@ -1602,7 +1611,8 @@ class TestSuite(unittest.TestSuite):
             for test in runtests:
                 test.abort()
 
-        channels = []
+        with chanlock:
+            channels = []
 
         return result