[PATCH 2 of 9 "] discovery: moved sampling functions inside discovery object

Pierre-Yves David pierre-yves.david at ens-lyon.org
Tue Mar 5 12:39:13 EST 2019


# HG changeset patch
# User Georges Racinet <georges.racinet at octobus.net>
# Date 1551308119 -3600
#      Wed Feb 27 23:55:19 2019 +0100
# Node ID 3a3743e61f7682c7b1b3c8e785e9a69aec9bc07e
# Parent  d0213fa8b2d4fa5f46f4efc79f8562faa9dfd2ef
# EXP-Topic discovery-speedup
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 3a3743e61f76
discovery: moved sampling functions inside discovery object

In this patch, we transform the sampling functions into
methods of the `partialdiscovery` class in the Python case.

This will provide multiple benefit. For example we can keep some cache from one
sampling to another. In addition this will help the Oxidation work as all graph
traversal related logic will be contained in a single object.

diff --git a/mercurial/setdiscovery.py b/mercurial/setdiscovery.py
--- a/mercurial/setdiscovery.py
+++ b/mercurial/setdiscovery.py
@@ -92,69 +92,6 @@ def _updatesample(revs, heads, sample, p
                 dist.setdefault(p, d + 1)
                 visit.append(p)
 
-def _takequicksample(repo, headrevs, revs, size):
-    """takes a quick sample of size <size>
-
-    It is meant for initial sampling and focuses on querying heads and close
-    ancestors of heads.
-
-    :dag: a dag object
-    :headrevs: set of head revisions in local DAG to consider
-    :revs: set of revs to discover
-    :size: the maximum size of the sample"""
-    if len(revs) <= size:
-        return list(revs)
-    sample = set(repo.revs('heads(%ld)', revs))
-
-    if len(sample) >= size:
-        return _limitsample(sample, size)
-
-    _updatesample(None, headrevs, sample, repo.changelog.parentrevs,
-                  quicksamplesize=size)
-    return sample
-
-def _takefullsample(repo, headrevs, revs, size):
-    if len(revs) <= size:
-        return list(revs)
-    sample = set(repo.revs('heads(%ld)', revs))
-
-    # update from heads
-    revsheads = set(repo.revs('heads(%ld)', revs))
-    _updatesample(revs, revsheads, sample, repo.changelog.parentrevs)
-
-    # update from roots
-    revsroots = set(repo.revs('roots(%ld)', revs))
-
-    # _updatesample() essentially does interaction over revisions to look up
-    # their children. This lookup is expensive and doing it in a loop is
-    # quadratic. We precompute the children for all relevant revisions and
-    # make the lookup in _updatesample() a simple dict lookup.
-    #
-    # Because this function can be called multiple times during discovery, we
-    # may still perform redundant work and there is room to optimize this by
-    # keeping a persistent cache of children across invocations.
-    children = {}
-
-    parentrevs = repo.changelog.parentrevs
-    for rev in repo.changelog.revs(start=min(revsroots)):
-        # Always ensure revision has an entry so we don't need to worry about
-        # missing keys.
-        children.setdefault(rev, [])
-
-        for prev in parentrevs(rev):
-            if prev == nullrev:
-                continue
-
-            children.setdefault(prev, []).append(rev)
-
-    _updatesample(revs, revsroots, sample, children.__getitem__)
-    assert sample
-    sample = _limitsample(sample, size)
-    if len(sample) < size:
-        more = size - len(sample)
-        sample.update(random.sample(list(revs - sample), more))
-    return sample
-
 def _limitsample(sample, desiredlen):
     """return a random subset of sample of at most desiredlen item"""
     if len(sample) > desiredlen:
@@ -228,6 +165,70 @@ class partialdiscovery(object):
         # common.bases and all its ancestors
         return self._common.basesheads()
 
+    def takequicksample(self, headrevs, size):
+        """takes a quick sample of size <size>
+
+        It is meant for initial sampling and focuses on querying heads and close
+        ancestors of heads.
+
+        :headrevs: set of head revisions in local DAG to consider
+        :size: the maximum size of the sample"""
+        revs = self.undecided
+        if len(revs) <= size:
+            return list(revs)
+        sample = set(self._repo.revs('heads(%ld)', revs))
+
+        if len(sample) >= size:
+            return _limitsample(sample, size)
+
+        _updatesample(None, headrevs, sample, self._repo.changelog.parentrevs,
+                      quicksamplesize=size)
+        return sample
+
+    def takefullsample(self, headrevs, size):
+        revs = self.undecided
+        if len(revs) <= size:
+            return list(revs)
+        repo = self._repo
+        sample = set(repo.revs('heads(%ld)', revs))
+
+        # update from heads
+        revsheads = set(repo.revs('heads(%ld)', revs))
+        _updatesample(revs, revsheads, sample, repo.changelog.parentrevs)
+
+        # update from roots
+        revsroots = set(repo.revs('roots(%ld)', revs))
+
+        # _updatesample() essentially does interaction over revisions to look
+        # up their children. This lookup is expensive and doing it in a loop is
+        # quadratic. We precompute the children for all relevant revisions and
+        # make the lookup in _updatesample() a simple dict lookup.
+        #
+        # Because this function can be called multiple times during discovery,
+        # we may still perform redundant work and there is room to optimize
+        # this by keeping a persistent cache of children across invocations.
+        children = {}
+
+        parentrevs = repo.changelog.parentrevs
+        for rev in repo.changelog.revs(start=min(revsroots)):
+            # Always ensure revision has an entry so we don't need to worry
+            # about missing keys.
+            children.setdefault(rev, [])
+
+            for prev in parentrevs(rev):
+                if prev == nullrev:
+                    continue
+
+                children.setdefault(prev, []).append(rev)
+
+        _updatesample(revs, revsroots, sample, children.__getitem__)
+        assert sample
+        sample = _limitsample(sample, size)
+        if len(sample) < size:
+            more = size - len(sample)
+            sample.update(random.sample(list(revs - sample), more))
+        return sample
+
 def findcommonheads(ui, local, remote,
                     initialsamplesize=100,
                     fullsamplesize=200,
@@ -309,14 +310,14 @@ def findcommonheads(ui, local, remote,
                 ui.note(_("sampling from both directions\n"))
             else:
                 ui.debug("taking initial sample\n")
-            samplefunc = _takefullsample
+            samplefunc = disco.takefullsample
             targetsize = fullsamplesize
         else:
             # use even cheaper initial sample
             ui.debug("taking quick initial sample\n")
-            samplefunc = _takequicksample
+            samplefunc = disco.takequicksample
             targetsize = initialsamplesize
-        sample = samplefunc(local, ownheads, disco.undecided, targetsize)
+        sample = samplefunc(ownheads, targetsize)
 
         roundtrips += 1
         progress.update(roundtrips)


More information about the Mercurial-devel mailing list