D5945: rust: itering less on MissingAncestors.bases for max()

gracinet (Georges Racinet) phabricator at mercurial-scm.org
Tue Feb 12 12:02:41 UTC 2019


gracinet created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Instead of iterating on the whole `self.bases` each time to find
  its max, we keep the latter in a separate member attribute and
  keep it up to date in `add_bases()`
  
  On a perfdiscovery done on PyPy, with repos prepared with
  `contrib/discovery-helper.sh 50 100`, this gives a slight
  improvement (around 0.5% on wall time, but 10% on CPU)
  
  before:
  ! wall 0.172801 comb 0.180000 user 0.180000 sys 0.000000 (median of 541)
  after:
  ! wall 0.171798 comb 0.160000 user 0.160000 sys 0.000000 (median of 551)
  
  (perf command run time upped because of bigger variability during this test).

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5945

AFFECTED FILES
  contrib/perf.py
  rust/hg-core/src/ancestors.rs
  rust/hg-core/src/dagops.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/dagops.rs b/rust/hg-core/src/dagops.rs
--- a/rust/hg-core/src/dagops.rs
+++ b/rust/hg-core/src/dagops.rs
@@ -46,7 +46,9 @@
     let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
     heads.remove(&NULL_REVISION);
     for rev in iter_revs {
-        remove_parents(graph, *rev, &mut heads)?;
+        if *rev != NULL_REVISION {
+            remove_parents(graph, *rev, &mut heads)?;
+        }
     }
     Ok(heads)
 }
@@ -71,7 +73,9 @@
     // mutating
     let as_vec: Vec<Revision> = revs.iter().cloned().collect();
     for rev in as_vec {
-        remove_parents(graph, rev, revs)?;
+        if rev != NULL_REVISION {
+            remove_parents(graph, rev, revs)?;
+        }
     }
     Ok(())
 }
diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs
--- a/rust/hg-core/src/ancestors.rs
+++ b/rust/hg-core/src/ancestors.rs
@@ -38,6 +38,7 @@
 pub struct MissingAncestors<G: Graph> {
     graph: G,
     bases: HashSet<Revision>,
+    max_base: Option<Revision>,
 }
 
 impl<G: Graph> AncestorsIterator<G> {
@@ -209,7 +210,13 @@
 
 impl<G: Graph> MissingAncestors<G> {
     pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
-        MissingAncestors { graph: graph, bases: bases.into_iter().collect() }
+        let mut created = MissingAncestors {
+            graph: graph,
+            bases: HashSet::new(),
+            max_base: None,
+        };
+        created.add_bases(bases);
+        created
     }
 
     pub fn has_bases(&self) -> bool {
@@ -237,12 +244,26 @@
         Ok(self.bases)
     }
 
+    /// Add some revisions to `self.bases`
+    ///
+    /// Takes care of keeping `self.max_base` up to date.
     pub fn add_bases(
         &mut self,
         new_bases: impl IntoIterator<Item = Revision>,
     ) {
-        self.bases.extend(new_bases);
+        // even if the type of Revision changes,
+        // NULL_REVISION would keep being the smallest
+        let mut max_base = self.max_base.unwrap_or(NULL_REVISION);
+        self.bases.extend(new_bases.into_iter().map(|r| {
+            if r > max_base {
+                max_base = r;
+            }
+            r
+        }));
         self.bases.remove(&NULL_REVISION);
+        if max_base != NULL_REVISION {
+            self.max_base = Some(max_base);
+        }
     }
 
     /// Remove all ancestors of self.bases from the revs set (in place)
@@ -261,14 +282,11 @@
         }
         // anything in revs > start is definitely not an ancestor of bases
         // revs <= start need to be investigated
-        // TODO optim: if a missingancestors is to be used several times,
-        // we shouldn't need to iterate each time on bases
-        let start = match self.bases.iter().cloned().max() {
-            Some(m) => m,
-            None => {  // self.bases is empty
-                return Ok(());
-            }
-        };
+
+        let start = match self.max_base {
+            Some(r) => r,
+            None => {return Ok(());}};
+
         // whatever happens, we'll keep at least keepcount of them
         // knowing this gives us a earlier stop condition than
         // going all the way to the root
@@ -285,12 +303,17 @@
         Ok(())
     }
 
-    /// Add rev's parents to self.bases
+    /// Add the parents of `rev` to `self.bases`
+    ///
+    /// This has no effect on `self.max_base`
     #[inline]
     fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
-        // No need to bother the set with inserting NULL_REVISION over and
-        // over
+        if rev == NULL_REVISION {
+            return Ok(());
+        }
         for p in self.graph.parents(rev)?.iter().cloned() {
+            // No need to bother the set with inserting NULL_REVISION over and
+            // over
             if p != NULL_REVISION {
                 self.bases.insert(p);
             }
@@ -320,12 +343,8 @@
         if revs_visit.is_empty() {
             return Ok(Vec::new());
         }
-
-        let max_bases =
-            bases_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
-        let max_revs =
-            revs_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
-        let start = max(max_bases, max_revs);
+        let max_revs = revs_visit.iter().cloned().max().unwrap();
+        let start = max(self.max_base.unwrap_or(max_revs), max_revs);
 
         // TODO heuristics for with_capacity()?
         let mut missing: Vec<Revision> = Vec::new();
@@ -571,11 +590,13 @@
             missing_ancestors.get_bases().iter().cloned().collect();
         as_vec.sort();
         assert_eq!(as_vec, [1, 3, 5]);
+        assert_eq!(missing_ancestors.max_base, Some(5));
 
         missing_ancestors.add_bases([3, 7, 8].iter().cloned());
         as_vec = missing_ancestors.get_bases().iter().cloned().collect();
         as_vec.sort();
         assert_eq!(as_vec, [1, 3, 5, 7, 8]);
+        assert_eq!(missing_ancestors.max_base, Some(8));
 
         as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
         as_vec.sort();
diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -310,9 +310,9 @@
         count += 1
         results.append(item[0])
         cstop = util.timer()
-        if cstop - begin > 3 and count >= 100:
+        if cstop - begin > 3 and count >= 1000:
             break
-        if cstop - begin > 10 and count >= 3:
+        if cstop - begin > 90 and count >= 300:
             break
 
     formatone(fm, results, title=title, result=r,



To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel


More information about the Mercurial-devel mailing list