[PATCH 16 of 21 V2] speedy: create a cache of local nodes

Tomasz Kleczek tkleczek at fb.com
Thu Dec 13 20:52:28 CST 2012


# HG changeset patch
# User Tomasz Kleczek <tkleczek at fb.com>
# Date 1355360374 28800
# Node ID a8066abf4b9d2a1d751ce9eeeb2b334d86eb18c1
# Parent  2fd469f383645ddd6bdf96b412f5b5f5bc5d6606
speedy: create a cache of local nodes

To compute an answer to a query we have to combine the server
result with result for local changes. To compute a list of local
changes we have to communicate with the remote server which usually
takes >1s.

To avoid this costly operation we store previously computed list
in a local file and use it next time to compute the superset of
the local nodes which is usually sufficient to answer the query
correctly.

This allow for ~ 1.4 second performance improvement on every history
query if the communication with the server repository is not necessary
(most of the cases).

diff --git a/hgext/speedy/client.py b/hgext/speedy/client.py
--- a/hgext/speedy/client.py
+++ b/hgext/speedy/client.py
@@ -9,6 +9,7 @@
 from mercurial import localrepo
 from mercurial.i18n import _
 from mercurial import cmdutil
+from mercurial.node import bin, hex
 import server
 import protocol
 import tcptransport
@@ -20,6 +21,68 @@
 def revstonodes(repo, revs):
     return [repo[r].node() for r in revs]
 
+lncachepath = 'hgext/speedyclient/localnodescache'
+
+def readnodescache(opener):
+    """Return a tuple of (last common node, list of local nodes).
+
+    Return (None, []) if the cache is missing.
+    """
+    lines = None
+    try:
+        f = opener(lncachepath, 'r')
+        try:
+            lines = f.read().splitlines()
+        finally:
+            f.close()
+    except (IOError, OSError):
+        # If the cache is unavailable or corrupted, we are just going to query
+        # the remote repository.
+        pass
+    if not lines:
+        return None, []
+    lcommonnode = bin(lines[0])
+    localnodes = [bin(line) for line in lines[1:]]
+    return lcommonnode, localnodes
+
+def writenodescache(opener, lastcommonnode, localnodes):
+    """Write data to local nodes cache.
+
+    Failure to write cache is silent.
+    """
+    try:
+        f = opener(lncachepath, 'w')
+        f.write(hex(lastcommonnode) + '\n')
+        f.write('\n'.join(map(hex, localnodes)))
+    except (OSError, IOError):
+        # We can live without cache.
+        pass
+
+def estimatelocalnodes(repo):
+    """Return a list of node ids that is a superset of local changes.
+
+    Returns (None, []) if it was impossible to estimate local changes
+    (e.g. when there was an error when reading from cache or it was too old).
+
+    Some of the returned nodes may not exist in the repository anymore.
+
+    Does not communicate with server repository, uses localnodescache instead.
+    """
+    lastcommonnode, cachelocalnodes = readnodescache(repo.opener)
+    if not lastcommonnode:
+        return None, []
+    try:
+        lastcommonrev = repo[lastcommonnode].rev()
+    except LookupError:
+        # Unfortunately the common node have been stripped, we cannot
+        # be certain about local nodes in this case
+        return None, []
+
+    # add all subsequest nodes (some of they might be common, but we don't care)
+    newlocalnodes = revstonodes(repo, range(lastcommonrev + 1, len(repo)))
+
+    return lastcommonnode, list(set(cachelocalnodes + newlocalnodes))
+
 def exactlocalnodes(repo, remotepeer):
     """Returns a tuple describing local nodes.
 
@@ -64,9 +127,17 @@
 
     @util.propertycache
     def _localrevs(self):
-        remotepeer = hg.peer(self._repo, {}, self.serverrepopath)
-        lastcommonnode, localnodes = exactlocalnodes(self._repo, remotepeer)
-        localrevs = nodestorevs(self._repo, localnodes)
+        repo = self._repo
+        lastcommonnode, localnodes = estimatelocalnodes(repo)
+        # The optimal value for maxlocal is yet to be determined. It should
+        # be greater than your expected number of local changes to take
+        # advantage of caching.
+        maxlocal = repo.ui.configint('speedy', 'maxlocal', 500)
+        if lastcommonnode is None or len(localnodes) > maxlocal:
+            remotepeer = hg.peer(repo, {}, self.serverrepopath)
+            lastcommonnode, localnodes = exactlocalnodes(repo, remotepeer)
+            writenodescache(repo.opener, lastcommonnode, localnodes)
+        localrevs = list(nodestorevs(repo, localnodes))
         return localrevs
 
     def localrevs(self):
diff --git a/tests/test-speedy.t b/tests/test-speedy.t
--- a/tests/test-speedy.t
+++ b/tests/test-speedy.t
@@ -277,6 +277,51 @@
   chgpushed
   chg4
 
+Testing local nodes cache
+  $ cat .hg/hgext/speedyclient/localnodescache
+  d66f0d22f3370bb034c30a4fd38fc05f45feb487
+  bb9b586d773f189267f9959bf011b33238326660
+  d887f78da2e99638923378d012787e0aba83c7b5
+  dfd2954cee28a63f6667e43ee6ebb4f23d563de8
+  b81d76c8f1562ac24216af87cd3f4089e9634daf (no-eol)
+
+  $ hg pull
+  pulling from $TESTTMP/serverrepo
+  searching for changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 3 changesets with 3 changes to 3 files
+  (run 'hg update' to get a working copy)
+
+  $ cat .hg/hgext/speedyclient/localnodescache
+  d66f0d22f3370bb034c30a4fd38fc05f45feb487
+  bb9b586d773f189267f9959bf011b33238326660
+  d887f78da2e99638923378d012787e0aba83c7b5
+  dfd2954cee28a63f6667e43ee6ebb4f23d563de8
+  b81d76c8f1562ac24216af87cd3f4089e9634daf (no-eol)
+
+  $ cat >> $TESTTMP/localrepo/.hg/hgrc <<EOF_END
+  > [speedy]
+  > maxlocal=1
+  > EOF_END
+
+
+  $ hg log d1
+  chg6
+  chg8
+  chgl6
+  chg2
+  chg1
+  chg0
+
+  $ cat .hg/hgext/speedyclient/localnodescache
+  4d770c18dab6597681e056e8b2656620079e35ef
+  bb9b586d773f189267f9959bf011b33238326660
+  d887f78da2e99638923378d012787e0aba83c7b5
+  dfd2954cee28a63f6667e43ee6ebb4f23d563de8
+  b81d76c8f1562ac24216af87cd3f4089e9634daf (no-eol)
+
   $ cd $TESTTMP/serverrepo
 
   $ kill `cat pidfile`


More information about the Mercurial-devel mailing list