[PATCH] issue1610: Add tests for convert's topological sort algorithm

Fri Apr 17 17:20:12 CDT 2009

# HG changeset patch
# User Greg Ward <greg-hg at gerg.ca>
# Date 1240006756 14400
# Node ID 7f37d0256d61578cc808c4247c80dc9820fc3993
# Parent  70f0eb571ea4958a2fa54dd103da6a2406543b6e
issue1610: Add tests for convert's topological sort algorithm.

diff --git a/tests/test-convert-toposort b/tests/test-convert-toposort
new file mode 100755
--- /dev/null
+++ b/tests/test-convert-toposort
@@ -0,0 +1,224 @@
+#!/usr/bin/env python
+
+# Unit tests for hgext.convert.convcmd.converter.toposort() method.
+#
+# toposort() has a couple of goals to satisfy:
+#   correctness: if it doesn't generate a valid topological sort,
+#     what's the point?
+#   efficiency: the order in which we traverse the source DAG can
+#     have a dramatic effect on the size of the manifest file
+#     in the resulting Mercurial repository.  Generally, toposort()
+#     wants to stick to one linear chain of changesets for as long
+#     as possible, rather than hop from branch to branch.
+#   aesthetic appeal: in a nutshell, we want old branches in the source
+#     repository to appear early in the target repository, and recent
+#     branches to appear late
+#
+# Testing for efficiency is not too hard.  Specifically, assert_chains()
+# lets us test that the topo sort follows linear chains of changesets for as
+# long as reasonable, only breaking at branch points.  Note that
+# assert_chains() is quite dumb: the intelligence comes from the person
+# writing tests and deciding which chains must be followed.
+#
+# Testing for correctness and aesthetic appeal are both done with
+# assert_toposort(), which just takes a list of "valid" sorts.  Obviously
+# we don't list all correct topo sorts for a given DAG.  We don't even
+# bother to list all correct and appealing topo sorts, except for the
+# simpler test cases.  So, it's possible to modify toposort() such that it
+# still generates correct and appealing sorts, but fails a test.  In that
+# case, you need to verify that the resulting sort really is correct and
+# meets the "old branches stay old, recent branches stay recent"
+# requirement and, if so, add it to the list of accepted sorts passed to
+# assert_toposort().
+#
+# Currently, toposort() meets the correctness and efficiency goals, but
+# sometimes fails on aesthetic appeal.  Thus, several test cases pass a
+# correct-but-unappealing sort to assert_toposort() in order that the tests
+# can pass.  Once toposort() is fixed to take aesthetics into account,
+# those sorts (they are commented "yuck" below) can be removed from the
+# list of acceptable sorts.
+
+import sys, os
+sys.path.insert(
+    0, os.path.dirname(os.path.dirname(os.path.abspath(sys.argv[0]))))
+
+import unittest
+from hgext.convert import convcmd, common
+
+class TestTopoSort(unittest.TestCase):
+    def setUp(self):
+        self.converter = convcmd.converter(
+            dummy_ui(), None, dummy_sink(), None, {})
+
+    def assert_chains(self, chains, parents):
+        actual = " ".join(self.converter.toposort(parents))
+        for chain in chains:
+            #lchain = " ".split(chain)
+            self.assert_(
+                chain in actual,
+                "expected chain %s in sort %s" % (chain, actual))
+
+    def assert_toposort(self, valid_sorts, parents):
+        # valid_sorts should really be the set of correct, efficient,
+        # aesthetically appealing sorts: i.e. we're not just looking for
+        # correctness, we're looking for a topo sort that yields a
+        # space-efficient Mercurial repository by sticking to one branch as
+        # long as it can; and that keeps old branches old, and recent
+        # braches recent.
+        actual = self.converter.toposort(parents)
+        actual = " ".join(actual)
+        self.assert_(actual in valid_sorts,
+                     "expected one of:\n  " +
+                     ("\n  ".join(map(str, valid_sorts))) +
+                     "\nbut got:\n  " + str(actual))
+
+    def test_simple_linear(self):
+        # simple linear chain of changesets (mainly to see if I can unit test
+        # this code)
+        parents = { '1': [], '2': ['1'], '3': ['2'] }
+        self.assert_toposort(('1 2 3',),
+                             parents)
+
+    def test_one_branch(self):
+        # 1 -- 2 -- 4
+        #      \
+        #       +-- 3 -- 5
+        parents = {
+            '1': [], '2': ['1'], '4': ['2'],
+            '3': ['2'], '5': ['3'] }
+        self.assert_chains(['1 2', '3 5', '4'], parents)
+        self.assert_toposort(
+            ('1 2 4 3 5',
+             '1 2 3 5 4',),
+            parents)
+
+    def test_one_merged_branch(self):
+        # 1 -- 2 -- 4 ------- 6
+        #      \             /
+        #       +-- 3 -- 5 -+
+        parents = {
+            '1': [], '2': ['1'], '3': ['2'], '4': ['2'], '5': ['3'],
+            '6': ['4', '5'] }
+        # XXX this is currently broken: toposort() returns 1 2 4 3 5 6,
+        # i.e. it goes as far along the trunk as it can before it has
+        # to backup to include the ancestors of 6
+        #self.assert_chains(
+        #    ['1 2', '4 6', '3 5'],
+        #    parents)
+        self.assert_toposort(
+            ('1 2 4 3 5 6',
+             '1 2 3 5 4 6'),
+            parents)
+
+    def test_two_open_branches(self):
+        # 1 -- 2 -- 4 ------- 6 -- 7
+        #      \              \
+        #       +-- 3 -- 5     +--- 8
+        parents = {
+            '1': [], '2': ['1'], '4': ['2'], '6': ['4'], '7': ['6'],
+            '3': ['2'], '5': ['3'],
+            '8': ['6'] }
+        self.assert_chains(
+            ['1 2', '3 5', '4 6', '8', '7'],
+            parents)
+        self.assert_toposort(
+            ('1 2 4 6 8 7 3 5',  # yuck
+             '1 2 3 5 4 6 7 8',
+             '1 2 3 5 4 6 8 7',),
+            parents)
+
+    def test_many_branches(self):
+        # 1 -- 2 -- 4 -- 6 -- 7 -- 9 -- 11 -- 13 -- 17 -- 19
+        #      \                   \           \
+        #       3 --- 5 --- 8       \           14 -- 16
+        #                            \
+        #                             10 -- 12 -- 15 -- 18 -- 20
+        #                                                 \
+        #                                                  21 -- 22
+        parents = {
+            '1': [], '2': ['1'], '4': ['2'], '6': ['4'], '7': ['6'],
+            '9': ['7'], '11': ['9'], '13': ['11'], '17': ['13'], '19': ['17'],
+            '3': ['2'], '5': ['3'], '8': ['5'],
+            '10': ['9'], '12': ['10'], '15': ['12'], '18': ['15'], '20': ['18'],
+            '14': ['13'], '16': ['14'],
+            '21': ['18'], '22': ['21'] }
+        self.assert_chains(
+            ['1 2', '3 5 8', '4 6 7 9', '10 12 15 18', '21 22', '20',
+             '11 13', '14 16', '17 19'],
+            parents)
+        self.assert_toposort(
+            ('1 2 4 6 7 9 10 12 15 18 21 22 20 11 13 17 19 14 16 3 5 8', # yuck
+             '1 2 3 5 8 4 6 7 9 10 12 15 18 21 22 20 11 13 14 16 17 19',
+             '1 2 3 5 8 4 6 7 9 10 12 15 18 20 11 13 14 16 17 19 21 22'),
+            parents)
+
+    def test_multiple_children(self):
+        # one node has many children
+        # 1 -- 2 -- 9
+        #       \-- 3 -- 5
+        #        \-- 4 -- 7 -- 8
+        #         \-- 6
+        parents = { '1': [], '2': ['1'], '9': ['2'],
+                    '3': ['2'], '5': ['3'],
+                    '4': ['2'], '7': ['4'], '8': ['7'],
+                    '6': ['2'] }
+        expect_chains = ['1 2', '3 5', '4 7 8']
+        self.assert_chains(expect_chains, parents)
+        self.assert_toposort(
+            ('1 2 9 6 4 7 8 3 5',        # yuck
+             '1 2 3 5 4 7 8 6 9',),
+            parents)
+
+        # add some merges to spice things up:
+        # 1 -- 2 -- 9 ----------------- 11
+        #       \-- 3 -- 5            /
+        #        \-- 4 -- 7 -- 8 -- 10
+        #         \-- 6 ------------/
+        parents['10'] = ['8', '6']
+        parents['11'] = ['9', '10']
+        self.assert_chains(expect_chains, parents)
+        self.assert_toposort(
+            ('1 2 9 6 4 7 8 10 11 3 5',     # yuck
+             '1 2 3 5 6 4 7 8 10 9 11',
+             '1 2 6 4 7 8 10 3 5 9 11',
+             '1 2 3 5 4 7 8 6 9 10 11',
+             '1 2 3 5 4 7 8 6 10 9 11',),
+            parents)
+
+    def test_real_life_1(self):
+        # This one came from a real-live CVS repository (stripped down
+        # to bare essentials, obviously).
+        # 741 -- 742 -- 948 -- 949 -- 1023 -- 1025 -- 1260 -- 1261
+        #  \             \                              \
+        #   \             1024                           1698
+        #    \
+        #     1413 -- 1417 -- 1434
+        parents = {
+            '741': [], '742': ['741'],
+            '948': ['742'], '949': ['948'],
+            '1023': ['949'], '1025': ['1023'],
+            '1260': ['1025'], '1261': ['1260'],
+            '1413': ['741'], '1417': ['1413'], '1434': ['1417'],
+            '1024': ['948'],
+            '1698': ['1260'],
+            }
+        self.assert_chains(
+            ['1413 1417 1434',
+             '949 1023 1025 1260'],
+            parents)
+        self.assert_toposort(
+            ('741 742 948 1413 1417 1434 1024 949 1023 1025 1260 1699 1261',
+             '741 1413 1417 1434 742 948 1024 949 1023 1025 1260 1698 1261',),
+            parents)
+
+
+class dummy_ui(object):
+    status = warn = note = debug = lambda self, *msg: None
+
+class dummy_sink(common.converter_sink):
+    def __init__(self):
+        pass
+
+# argh: PyUnit writes to stderr
+os.dup2(sys.stdout.fileno(), sys.stderr.fileno())
+unittest.main()
diff --git a/tests/test-convert-toposort.out b/tests/test-convert-toposort.out
new file mode 100644
--- /dev/null
+++ b/tests/test-convert-toposort.out
@@ -0,0 +1,5 @@
+.......
+----------------------------------------------------------------------
+Ran 7 tests in 0.003s
+
+OK