D6345: rust-discovery: optionally don't randomize at all, for tests

Mon May 6 15:31:04 UTC 2019

gracinet created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  As seen from Python, this is a new `randomize` kwarg in init of the
  discovery object. It replaces random picking by some arbitrary yet
  deterministic strategy.
  
  This is the same as what test-setdiscovery.t does, with the added
  benefit to be usable both in Python and Rust implementations.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6345

AFFECTED FILES
  mercurial/setdiscovery.py
  rust/hg-core/src/discovery.rs
  rust/hg-cpython/src/discovery.rs
  tests/test-rust-discovery.py

CHANGE DETAILS

diff --git a/tests/test-rust-discovery.py b/tests/test-rust-discovery.py
--- a/tests/test-rust-discovery.py
+++ b/tests/test-rust-discovery.py
@@ -106,6 +106,10 @@
         self.assertTrue(disco.iscomplete())
         self.assertEqual(disco.commonheads(), {1})
 
+    def testinitnorandom(self):
+        idx = self.parseindex()
+        PartialDiscovery(idx, [3], randomize=False)
+
 if __name__ == '__main__':
     import silenttestrunner
     silenttestrunner.main(__name__)
diff --git a/rust/hg-cpython/src/discovery.rs b/rust/hg-cpython/src/discovery.rs
--- a/rust/hg-cpython/src/discovery.rs
+++ b/rust/hg-cpython/src/discovery.rs
@@ -30,13 +30,15 @@
     def __new__(
         _cls,
         index: PyObject,
-        targetheads: PyObject
+        targetheads: PyObject,
+        randomize: bool = true
     ) -> PyResult<PartialDiscovery> {
         Self::create_instance(
             py,
             RefCell::new(Box::new(CorePartialDiscovery::new(
                 Index::new(py, index)?,
                 rev_pyiter_collect(py, &targetheads)?,
+                randomize,
             )))
         )
     }
diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs
--- a/rust/hg-core/src/discovery.rs
+++ b/rust/hg-core/src/discovery.rs
@@ -29,6 +29,7 @@
     children_cache: Option<HashMap<Revision, Vec<Revision>>>,
     missing: HashSet<Revision>,
     rng: Rng,
+    randomize: bool,
 }
 
 pub struct DiscoveryStats {
@@ -144,16 +145,32 @@
     /// If we want to make the signature more flexible,
     /// we'll have to make it a type argument of `PartialDiscovery` or a trait
     /// object since we'll keep it in the meanwhile
-    pub fn new(graph: G, target_heads: Vec<Revision>) -> Self {
+    ///
+    /// The `randomize` boolean affects sampling, and specifically how
+    /// limiting or last-minute expanding is been done:
+    ///
+    /// If `true`, both will perform random picking from `self.undecided`.
+    /// This is currently the best for actual discoveries.
+    ///
+    /// If `false`, a reproductible picking strategy is performed. This is
+    /// useful for integration tests.
+    pub fn new(
+        graph: G,
+        target_heads: Vec<Revision>,
+        randomize: bool,
+    ) -> Self {
         let mut seed: [u8; 16] = [0; 16];
-        thread_rng().fill_bytes(&mut seed);
-        Self::new_with_seed(graph, target_heads, seed)
+        if randomize {
+            thread_rng().fill_bytes(&mut seed);
+        }
+        Self::new_with_seed(graph, target_heads, seed, randomize)
     }
 
     pub fn new_with_seed(
         graph: G,
         target_heads: Vec<Revision>,
         seed: [u8; 16],
+        randomize: bool,
     ) -> Self {
         PartialDiscovery {
             undecided: None,
@@ -163,6 +180,7 @@
             common: MissingAncestors::new(graph, vec![]),
             missing: HashSet::new(),
             rng: Rng::from_seed(seed),
+            randomize: randomize,
         }
     }
 
@@ -173,6 +191,11 @@
         mut sample: Vec<Revision>,
         size: usize,
     ) -> Vec<Revision> {
+        if !self.randomize {
+            sample.sort();
+            sample.truncate(size);
+            return sample
+        }
         let sample_len = sample.len();
         if sample_len <= size {
             return sample;
@@ -413,20 +436,22 @@
 
     /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
     ///
-    /// To avoid actual randomness in tests, we give it a fixed random seed.
+    /// To avoid actual randomness in these tests, we give it a fixed
+    /// random seed, but by default we'll test the random version.
     fn full_disco() -> PartialDiscovery<SampleGraph> {
         PartialDiscovery::new_with_seed(
             SampleGraph,
             vec![10, 11, 12, 13],
             [0; 16],
+            true,
         )
     }
 
     /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
     ///
     /// To avoid actual randomness in tests, we give it a fixed random seed.
     fn disco12() -> PartialDiscovery<SampleGraph> {
-        PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16])
+        PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16], true)
     }
 
     fn sorted_undecided(
@@ -511,6 +536,14 @@
     }
 
     #[test]
+    fn test_limit_sample_no_random() {
+        let mut disco = full_disco();
+        disco.randomize = false;
+        assert_eq!(disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
+                   vec![1, 3, 5, 7]);
+    }
+
+    #[test]
     fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
         let mut disco = full_disco();
         disco.undecided = Some((1..=13).collect());
diff --git a/mercurial/setdiscovery.py b/mercurial/setdiscovery.py
--- a/mercurial/setdiscovery.py
+++ b/mercurial/setdiscovery.py
@@ -92,11 +92,19 @@
                 dist.setdefault(p, d + 1)
                 visit.append(p)
 
-def _limitsample(sample, desiredlen):
-    """return a random subset of sample of at most desiredlen item"""
-    if len(sample) > desiredlen:
-        sample = set(random.sample(sample, desiredlen))
-    return sample
+def _limitsample(sample, desiredlen, randomize=True):
+    """return a random subset of sample of at most desiredlen item.
+
+    If randomize is False, though, a deterministic subset is returned.
+    This is meant for integration tests.
+    """
+    if len(sample) <= desiredlen:
+        return sample
+    if randomize:
+        return set(random.sample(sample, desiredlen))
+    sample = list(sample)
+    sample.sort()
+    return set(sample[:desiredlen])
 
 class partialdiscovery(object):
     """an object representing ongoing discovery
@@ -110,13 +118,14 @@
     (all tracked revisions are known locally)
     """
 
-    def __init__(self, repo, targetheads):
+    def __init__(self, repo, targetheads, randomize=True):
         self._repo = repo
         self._targetheads = targetheads
         self._common = repo.changelog.incrementalmissingrevs()
         self._undecided = None
         self.missing = set()
         self._childrenmap = None
+        self.randomize = randomize
 
     def addcommons(self, commons):
         """register nodes known as common"""
@@ -221,7 +230,7 @@
         sample = set(self._repo.revs('heads(%ld)', revs))
 
         if len(sample) >= size:
-            return _limitsample(sample, size)
+            return _limitsample(sample, size, randomize=self.randomize)
 
         _updatesample(None, headrevs, sample, self._parentsgetter(),
                       quicksamplesize=size)
@@ -246,10 +255,15 @@
 
         _updatesample(revs, revsroots, sample, childrenrevs)
         assert sample
-        sample = _limitsample(sample, size)
+        sample = _limitsample(sample, size, randomize=self.randomize)
         if len(sample) < size:
             more = size - len(sample)
-            sample.update(random.sample(list(revs - sample), more))
+            takefrom = list(revs - sample)
+            if self.randomize:
+                sample.update(random.sample(takefrom, more))
+            else:
+                takefrom.sort()
+                sample.update(takefrom[:more])
         return sample
 
 def findcommonheads(ui, local, remote,



To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel