D5416: rust: translation of missingancestors

gracinet (Georges Racinet) phabricator at mercurial-scm.org
Fri Dec 14 22:20:35 EST 2018


This revision was automatically updated to reflect the committed changes.
gracinet marked an inline comment as done.
Closed by commit rHG5817c3b186a7: rust: translation of missingancestors (authored by gracinet, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D5416?vs=12855&id=12869

REVISION DETAIL
  https://phab.mercurial-scm.org/D5416

AFFECTED FILES
  rust/hg-core/src/ancestors.rs
  rust/hg-core/src/lib.rs
  tests/test-ancestor.py
  tests/test-ancestor.py.out

CHANGE DETAILS

diff --git a/tests/test-ancestor.py.out b/tests/test-ancestor.py.out
--- a/tests/test-ancestor.py.out
+++ b/tests/test-ancestor.py.out
@@ -1,3 +1,17 @@
+% removeancestorsfrom(), example 1
+remaining (sorted): [5, 6, 8, 9]
+% removeancestorsfrom(), example 2
+remaining (sorted): [11, 12, 13, 14]
+% removeancestorsfrom(), example 3
+remaining (sorted): [3, 5]
+% missingancestors(), example 1
+return [3, 7, 11]
+% missingancestors(), example 2
+return [5, 10]
+% missingancestors(), example 3
+return [3, 6, 9, 11]
+% removeancestorsfrom(), bigger graph
+Ok
 % lazy ancestor set for [], stoprev = 0, inclusive = False
 membership: []
 iteration:  []
diff --git a/tests/test-ancestor.py b/tests/test-ancestor.py
--- a/tests/test-ancestor.py
+++ b/tests/test-ancestor.py
@@ -182,6 +182,64 @@
          5: [4, -1], 6: [4, -1], 7: [4, -1], 8: [-1, -1], 9: [6, 7],
          10: [5, -1], 11: [3, 7], 12: [9, -1], 13: [8, -1]}
 
+def test_missingancestors_explicit():
+    """A few explicit cases, easier to check for catching errors in refactors.
+
+    The bigger graph at the end has been produced by the random generator
+    above, and we have some evidence that the other tests don't cover it.
+    """
+    for i, (bases, revs) in enumerate((({1, 2, 3, 4, 7}, set(xrange(10))),
+                                       ({10}, set({11, 12, 13, 14})),
+                                       ({7}, set({1, 2, 3, 4, 5})),
+                                       )):
+        print("%% removeancestorsfrom(), example %d" % (i + 1))
+        missanc = ancestor.incrementalmissingancestors(graph.get, bases)
+        missanc.removeancestorsfrom(revs)
+        print("remaining (sorted): %s" % sorted(list(revs)))
+
+    for i, (bases, revs) in enumerate((({10}, {11}),
+                                       ({11}, {10}),
+                                       ({7}, {9, 11}),
+                                       )):
+        print("%% missingancestors(), example %d" % (i + 1))
+        missanc = ancestor.incrementalmissingancestors(graph.get, bases)
+        print("return %s" % missanc.missingancestors(revs))
+
+    print("% removeancestorsfrom(), bigger graph")
+    vecgraph = [
+        [-1, -1], [0, -1], [1, 0], [2, 1], [3, -1], [4, -1], [5, 1],
+        [2, -1], [7, -1], [8, -1], [9, -1], [10, 1], [3, -1], [12, -1],
+        [13, -1], [14, -1], [4, -1], [16, -1], [17, -1], [18, -1],
+        [19, 11], [20, -1], [21, -1], [22, -1], [23, -1], [2, -1],
+        [3, -1], [26, 24], [27, -1], [28, -1], [12, -1], [1, -1], [1, 9],
+        [32, -1], [33, -1], [34, 31], [35, -1], [36, 26], [37, -1],
+        [38, -1], [39, -1], [40, -1], [41, -1], [42, 26], [0, -1],
+        [44, -1], [45, 4], [40, -1], [47, -1], [36, 0], [49, -1],
+        [-1, -1], [51, -1], [52, -1], [53, -1], [14, -1],
+        [55, -1], [15, -1], [23, -1], [58, -1], [59, -1], [2, -1],
+        [61, 59], [62, -1], [63, -1], [-1, -1], [65, -1],
+        [66, -1], [67, -1], [68, -1], [37, 28], [69, 25],
+        [71, -1], [72, -1], [50, 2], [74, -1], [12, -1],
+        [18, -1], [77, -1], [78, -1], [79, -1], [43, 33],
+        [81, -1], [82, -1], [83, -1], [84, 45], [85, -1],
+        [86, -1], [-1, -1], [88, -1], [-1, -1], [76, 83], [44, -1],
+        [92, -1], [93, -1], [9, -1], [95, 67], [96, -1], [97, -1],
+        [-1, -1]]
+    problem_rev = 28
+    problem_base = 70
+    # problem_rev is a parent of problem_base, but a faulty implementation
+    # could forget to remove it.
+    bases = {60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6}
+    if problem_rev not in vecgraph[problem_base] or problem_base not in bases:
+        print("Conditions have changed")
+    missanc = ancestor.incrementalmissingancestors(vecgraph.__getitem__, bases)
+    revs = {4, 12, 41, 28, 68, 38, 1, 30, 56, 44}
+    missanc.removeancestorsfrom(revs)
+    if 28 in revs:
+        print("Failed!")
+    else:
+        print("Ok")
+
 def genlazyancestors(revs, stoprev=0, inclusive=False):
     print(("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" %
            (revs, stoprev, inclusive)))
@@ -276,6 +334,7 @@
             seed = long(time.time() * 1000)
 
     rng = random.Random(seed)
+    test_missingancestors_explicit()
     test_missingancestors(seed, rng)
     test_lazyancestors()
     test_gca()
diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -3,7 +3,7 @@
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2 or any later version.
 mod ancestors;
-pub use ancestors::AncestorsIterator;
+pub use ancestors::{AncestorsIterator, MissingAncestors};
 
 /// Mercurial revision numbers
 ///
@@ -15,6 +15,9 @@
 
 /// The simplest expression of what we need of Mercurial DAGs.
 pub trait Graph {
+    /// Return the two parents of the given `Revision`.
+    ///
+    /// Each of the parents can be independently `NULL_REVISION`
     fn parents(&self, Revision) -> Result<[Revision; 2], GraphError>;
 }
 
diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs
--- a/rust/hg-core/src/ancestors.rs
+++ b/rust/hg-core/src/ancestors.rs
@@ -8,6 +8,7 @@
 //! Rust versions of generic DAG ancestors algorithms for Mercurial
 
 use super::{Graph, GraphError, Revision, NULL_REVISION};
+use std::cmp::max;
 use std::collections::{BinaryHeap, HashSet};
 
 /// Iterator over the ancestors of a given list of revisions
@@ -24,6 +25,11 @@
     stoprev: Revision,
 }
 
+pub struct MissingAncestors<G: Graph> {
+    graph: G,
+    bases: HashSet<Revision>,
+}
+
 impl<G: Graph> AncestorsIterator<G> {
     /// Constructor.
     ///
@@ -131,10 +137,187 @@
     }
 }
 
+impl<G: Graph> MissingAncestors<G> {
+    pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
+        let mut bases: HashSet<Revision> = bases.into_iter().collect();
+        if bases.is_empty() {
+            bases.insert(NULL_REVISION);
+        }
+        MissingAncestors { graph, bases }
+    }
+
+    pub fn has_bases(&self) -> bool {
+        self.bases.iter().any(|&b| b != NULL_REVISION)
+    }
+
+    /// Return a reference to current bases.
+    ///
+    /// This is useful in unit tests, but also setdiscovery.py does
+    /// read the bases attribute of a ancestor.missingancestors instance.
+    pub fn get_bases<'a>(&'a self) -> &'a HashSet<Revision> {
+        &self.bases
+    }
+
+    pub fn add_bases(
+        &mut self,
+        new_bases: impl IntoIterator<Item = Revision>,
+    ) {
+        self.bases.extend(new_bases);
+    }
+
+    /// Remove all ancestors of self.bases from the revs set (in place)
+    pub fn remove_ancestors_from(
+        &mut self,
+        revs: &mut HashSet<Revision>,
+    ) -> Result<(), GraphError> {
+        revs.retain(|r| !self.bases.contains(r));
+        // the null revision is always an ancestor
+        revs.remove(&NULL_REVISION);
+        if revs.is_empty() {
+            return Ok(());
+        }
+        // anything in revs > start is definitely not an ancestor of bases
+        // revs <= start need to be investigated
+        // TODO optim: if a missingancestors is to be used several times,
+        // we shouldn't need to iterate each time on bases
+        let start = match self.bases.iter().cloned().max() {
+            Some(m) => m,
+            None => {
+                // bases is empty (shouldn't happen, but let's be safe)
+                return Ok(());
+            }
+        };
+        // whatever happens, we'll keep at least keepcount of them
+        // knowing this gives us a earlier stop condition than
+        // going all the way to the root
+        let keepcount = revs.iter().filter(|r| **r > start).count();
+
+        let mut curr = start;
+        while curr != NULL_REVISION && revs.len() > keepcount {
+            if self.bases.contains(&curr) {
+                revs.remove(&curr);
+                self.add_parents(curr)?;
+            }
+            curr -= 1;
+        }
+        Ok(())
+    }
+
+    /// Add rev's parents to self.bases
+    #[inline]
+    fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
+        // No need to bother the set with inserting NULL_REVISION over and
+        // over
+        for p in self.graph.parents(rev)?.iter().cloned() {
+            if p != NULL_REVISION {
+                self.bases.insert(p);
+            }
+        }
+        Ok(())
+    }
+
+    /// Return all the ancestors of revs that are not ancestors of self.bases
+    ///
+    /// This may include elements from revs.
+    ///
+    /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
+    /// revision number order, which is a topological order.
+    pub fn missing_ancestors(
+        &mut self,
+        revs: impl IntoIterator<Item = Revision>,
+    ) -> Result<Vec<Revision>, GraphError> {
+        // just for convenience and comparison with Python version
+        let bases_visit = &mut self.bases;
+        let mut revs: HashSet<Revision> = revs
+            .into_iter()
+            .filter(|r| !bases_visit.contains(r))
+            .collect();
+        let revs_visit = &mut revs;
+        let mut both_visit: HashSet<Revision> =
+            revs_visit.intersection(&bases_visit).cloned().collect();
+        if revs_visit.is_empty() {
+            return Ok(Vec::new());
+        }
+
+        let max_bases =
+            bases_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
+        let max_revs =
+            revs_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
+        let start = max(max_bases, max_revs);
+
+        // TODO heuristics for with_capacity()?
+        let mut missing: Vec<Revision> = Vec::new();
+        for curr in (0..=start).map(|i| start - i) {
+            if revs_visit.is_empty() {
+                break;
+            }
+            if both_visit.contains(&curr) {
+                // curr's parents might have made it into revs_visit through
+                // another path
+                // TODO optim: Rust's HashSet.remove returns a boolean telling
+                // if it happened. This will spare us one set lookup
+                both_visit.remove(&curr);
+                for p in self.graph.parents(curr)?.iter().cloned() {
+                    if p == NULL_REVISION {
+                        continue;
+                    }
+                    revs_visit.remove(&p);
+                    bases_visit.insert(p);
+                    both_visit.insert(p);
+                }
+                continue;
+            }
+            // in Rust, one can't just use mutable variables assignation
+            // to be more straightforward. Instead of Python's
+            // thisvisit and othervisit, we'll differentiate with a boolean
+            let this_visit_is_revs = {
+                if revs_visit.remove(&curr) {
+                    missing.push(curr);
+                    true
+                } else if bases_visit.contains(&curr) {
+                    false
+                } else {
+                    // not an ancestor of revs or bases: ignore
+                    continue;
+                }
+            };
+
+            for p in self.graph.parents(curr)?.iter().cloned() {
+                if p == NULL_REVISION {
+                    continue;
+                }
+                let in_other_visit = if this_visit_is_revs {
+                    bases_visit.contains(&p)
+                } else {
+                    revs_visit.contains(&p)
+                };
+                if in_other_visit || both_visit.contains(&p) {
+                    // p is implicitely in this_visit.
+                    // This means p is or should be in bothvisit
+                    // TODO optim: hence if bothvisit, we look up twice
+                    revs_visit.remove(&p);
+                    bases_visit.insert(p);
+                    both_visit.insert(p);
+                } else {
+                    // visit later
+                    if this_visit_is_revs {
+                        revs_visit.insert(p);
+                    } else {
+                        bases_visit.insert(p);
+                    }
+                }
+            }
+        }
+        missing.reverse();
+        Ok(missing)
+    }
+}
+
 #[cfg(test)]
 mod tests {
 
     use super::*;
+    use std::iter::FromIterator;
 
     #[derive(Clone, Debug)]
     struct Stub;
@@ -252,4 +435,211 @@
             AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
         assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
     }
+
+    #[test]
+    /// Test constructor, add/get bases
+    fn test_missing_bases() {
+        let mut missing_ancestors =
+            MissingAncestors::new(Stub, [5, 3, 1, 3].iter().cloned());
+        let mut as_vec: Vec<Revision> =
+            missing_ancestors.get_bases().iter().cloned().collect();
+        as_vec.sort();
+        assert_eq!(as_vec, [1, 3, 5]);
+
+        missing_ancestors.add_bases([3, 7, 8].iter().cloned());
+        as_vec = missing_ancestors.get_bases().iter().cloned().collect();
+        as_vec.sort();
+        assert_eq!(as_vec, [1, 3, 5, 7, 8]);
+    }
+
+    fn assert_missing_remove(
+        bases: &[Revision],
+        revs: &[Revision],
+        expected: &[Revision],
+    ) {
+        let mut missing_ancestors =
+            MissingAncestors::new(Stub, bases.iter().cloned());
+        let mut revset: HashSet<Revision> = revs.iter().cloned().collect();
+        missing_ancestors
+            .remove_ancestors_from(&mut revset)
+            .unwrap();
+        let mut as_vec: Vec<Revision> = revset.into_iter().collect();
+        as_vec.sort();
+        assert_eq!(as_vec.as_slice(), expected);
+    }
+
+    #[test]
+    fn test_missing_remove() {
+        assert_missing_remove(
+            &[1, 2, 3, 4, 7],
+            Vec::from_iter(1..10).as_slice(),
+            &[5, 6, 8, 9],
+        );
+        assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
+        assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
+    }
+
+    fn assert_missing_ancestors(
+        bases: &[Revision],
+        revs: &[Revision],
+        expected: &[Revision],
+    ) {
+        let mut missing_ancestors =
+            MissingAncestors::new(Stub, bases.iter().cloned());
+        let missing = missing_ancestors
+            .missing_ancestors(revs.iter().cloned())
+            .unwrap();
+        assert_eq!(missing.as_slice(), expected);
+    }
+
+    #[test]
+    fn test_missing_ancestors() {
+        // examples taken from test-ancestors.py by having it run
+        // on the same graph (both naive and fast Python algs)
+        assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
+        assert_missing_ancestors(&[11], &[10], &[5, 10]);
+        assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
+    }
+
+    // A Graph represented by a vector whose indices are revisions
+    // and values are parents of the revisions
+    type VecGraph = Vec<[Revision; 2]>;
+
+    impl Graph for VecGraph {
+        fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
+            Ok(self[rev as usize])
+        }
+    }
+
+    /// An interesting case found by a random generator similar to
+    /// the one in test-ancestor.py. An early version of Rust MissingAncestors
+    /// failed this, yet none of the integration tests of the whole suite
+    /// catched it.
+    #[test]
+    fn test_remove_ancestors_from_case1() {
+        let graph: VecGraph = vec![
+            [NULL_REVISION, NULL_REVISION],
+            [0, NULL_REVISION],
+            [1, 0],
+            [2, 1],
+            [3, NULL_REVISION],
+            [4, NULL_REVISION],
+            [5, 1],
+            [2, NULL_REVISION],
+            [7, NULL_REVISION],
+            [8, NULL_REVISION],
+            [9, NULL_REVISION],
+            [10, 1],
+            [3, NULL_REVISION],
+            [12, NULL_REVISION],
+            [13, NULL_REVISION],
+            [14, NULL_REVISION],
+            [4, NULL_REVISION],
+            [16, NULL_REVISION],
+            [17, NULL_REVISION],
+            [18, NULL_REVISION],
+            [19, 11],
+            [20, NULL_REVISION],
+            [21, NULL_REVISION],
+            [22, NULL_REVISION],
+            [23, NULL_REVISION],
+            [2, NULL_REVISION],
+            [3, NULL_REVISION],
+            [26, 24],
+            [27, NULL_REVISION],
+            [28, NULL_REVISION],
+            [12, NULL_REVISION],
+            [1, NULL_REVISION],
+            [1, 9],
+            [32, NULL_REVISION],
+            [33, NULL_REVISION],
+            [34, 31],
+            [35, NULL_REVISION],
+            [36, 26],
+            [37, NULL_REVISION],
+            [38, NULL_REVISION],
+            [39, NULL_REVISION],
+            [40, NULL_REVISION],
+            [41, NULL_REVISION],
+            [42, 26],
+            [0, NULL_REVISION],
+            [44, NULL_REVISION],
+            [45, 4],
+            [40, NULL_REVISION],
+            [47, NULL_REVISION],
+            [36, 0],
+            [49, NULL_REVISION],
+            [NULL_REVISION, NULL_REVISION],
+            [51, NULL_REVISION],
+            [52, NULL_REVISION],
+            [53, NULL_REVISION],
+            [14, NULL_REVISION],
+            [55, NULL_REVISION],
+            [15, NULL_REVISION],
+            [23, NULL_REVISION],
+            [58, NULL_REVISION],
+            [59, NULL_REVISION],
+            [2, NULL_REVISION],
+            [61, 59],
+            [62, NULL_REVISION],
+            [63, NULL_REVISION],
+            [NULL_REVISION, NULL_REVISION],
+            [65, NULL_REVISION],
+            [66, NULL_REVISION],
+            [67, NULL_REVISION],
+            [68, NULL_REVISION],
+            [37, 28],
+            [69, 25],
+            [71, NULL_REVISION],
+            [72, NULL_REVISION],
+            [50, 2],
+            [74, NULL_REVISION],
+            [12, NULL_REVISION],
+            [18, NULL_REVISION],
+            [77, NULL_REVISION],
+            [78, NULL_REVISION],
+            [79, NULL_REVISION],
+            [43, 33],
+            [81, NULL_REVISION],
+            [82, NULL_REVISION],
+            [83, NULL_REVISION],
+            [84, 45],
+            [85, NULL_REVISION],
+            [86, NULL_REVISION],
+            [NULL_REVISION, NULL_REVISION],
+            [88, NULL_REVISION],
+            [NULL_REVISION, NULL_REVISION],
+            [76, 83],
+            [44, NULL_REVISION],
+            [92, NULL_REVISION],
+            [93, NULL_REVISION],
+            [9, NULL_REVISION],
+            [95, 67],
+            [96, NULL_REVISION],
+            [97, NULL_REVISION],
+            [NULL_REVISION, NULL_REVISION],
+        ];
+        let problem_rev = 28 as Revision;
+        let problem_base = 70 as Revision;
+        // making the problem obvious: problem_rev is a parent of problem_base
+        assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
+
+        let mut missing_ancestors: MissingAncestors<VecGraph> =
+            MissingAncestors::new(
+                graph,
+                [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
+                    .iter()
+                    .cloned(),
+            );
+        assert!(missing_ancestors.bases.contains(&problem_base));
+
+        let mut revs: HashSet<Revision> =
+            [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
+                .iter()
+                .cloned()
+                .collect();
+        missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
+        assert!(!revs.contains(&problem_rev));
+    }
+
 }



To: gracinet, #hg-reviewers
Cc: yuja, durin42, kevincox, mjpieters, mercurial-devel


More information about the Mercurial-devel mailing list