[PATCH 2 of 8] changelog: add cache for abandoned

Martin Geisler mg at lazybytes.net
Wed Jun 1 11:42:48 CDT 2011


# HG changeset patch
# User Martin Geisler <mg at lazybytes.net>
# Date 1306945890 -7200
# Node ID 9095768d8e9fd1278cbf4ed6dfd88f595968926f
# Parent  9b14fa930acae9dc802d7cec87780ba1a3a9aa8b
changelog: add cache for abandoned

  Some issues:

  * The cache is written inside the store...

  * The delaywriter stuff changelog does was not prepared for creating
    a file with atomictemp=True.

  Perhaps this suggests that localrepo.heads should be the one that
  maintains the cache. Most of the code calls repo.heads and not
  repo.changelog.heads, so this would speedup those calls. However,
  discovery uses repo.changelog so that might give problems.

The cache maps each abandoned node to the node that abandoned it.

Changes perfabandoned from

  ! wall 0.095847 comb 0.090000 user 0.090000 sys 0.000000 (best of 100)

to

  ! wall 0.000004 comb 0.000000 user 0.000000 sys 0.000000 (best of 345012)

on the OpenOffice repository with 276,000 changesets.

diff --git a/mercurial/changelog.py b/mercurial/changelog.py
--- a/mercurial/changelog.py
+++ b/mercurial/changelog.py
@@ -5,7 +5,7 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
-from node import bin, hex, nullid
+from node import bin, hex, nullid, short
 from i18n import _
 import util, error, revlog, encoding
 
@@ -88,13 +88,13 @@
         self.offset += len(s)
 
 def delayopener(opener, target, divert, buf):
-    def o(name, mode='r'):
+    def o(name, mode='r',  text=False, atomictemp=False):
         if name != target:
-            return opener(name, mode)
+            return opener(name, mode, text, atomictemp)
         if divert:
-            return opener(name + ".a", mode.replace('a', 'w'))
+            return opener(name + ".a", mode.replace('a', 'w'), text, atomictemp)
         # otherwise, divert to memory
-        return appender(opener(name, mode), buf)
+        return appender(opener(name, mode, text, atomictemp), buf)
     return o
 
 class changelog(revlog.revlog):
@@ -107,6 +107,8 @@
         self._realopener = opener
         self._delayed = False
         self._divert = False
+        self._abandonedcache = {}
+        self._abandonedtip = nullid
 
     def delayupdate(self):
         "delay visibility of index updates to other readers"
@@ -241,23 +243,92 @@
         text = "\n".join(l)
         return self.addrevision(text, transaction, len(self), p1, p2)
 
+    def _readabandonedcache(self, tip):
+        import os
+        if 'DEBUG' in os.environ:
+            print ' reading abandoned cache for', short(tip)
+        abandoned = {}
+        try:
+            f = self.opener("cache/abandoned")
+            lasttip = bin(f.readline().strip())
+            if 'DEBUG' in os.environ:
+                print '  our tip: ', short(tip)
+                print '  last tip:', short(lasttip)
+
+            if tip != lasttip:
+                # stale cache
+                f.close()
+                return None
+
+            for line in f:
+                node, abandoner = line.strip().split()[:2]
+                abandoned[bin(node)] = bin(abandoner)
+            f.close()
+            if 'DEBUG' in os.environ:
+                print ' read up-to-date abandoned cache for', short(lasttip)
+                for k, v in abandoned.iteritems():
+                    print '  ', short(k), '->', short(v)
+            return abandoned
+        except (IOError, OSError), inst:
+            if 'DEBUG' in os.environ:
+                print ' error while reading abandoned cache:'
+                print '  ', inst
+            return None
+
+    def _writeabandonedcache(self, lasttip, abandoned):
+        import os
+        if 'DEBUG' in os.environ:
+            print ' writing abandoned cache for', short(lasttip)
+            for k, v in abandoned.iteritems():
+                print '  ', short(k)[:12], '->', short(v)
+
+        try:
+            # TODO: this creates the cache in
+            # .hg/store/cache/abandoned.
+            f = self.opener("cache/abandoned", "w", atomictemp=True)
+            f.write("%s\n" % hex(lasttip))
+            for node, abandoner in abandoned.iteritems():
+                f.write("%s %s\n" % (hex(node), hex(abandoner)))
+            f.rename()
+        except (IOError, OSError), inst:
+            if 'DEBUG' in os.environ:
+                print ' error while writing abandoned cache:', inst
+            pass
+
+    def updateabandonedcache(self):
+        import os
+        tip = self.tip()
+        if tip == self._abandonedtip:
+            if 'DEBUG' in os.environ:
+                print 'no need to update cache'
+            return
+
+        if 'DEBUG' in os.environ:
+            print 'must update cache'
+
+        abandoned = self._readabandonedcache(tip)
+        if abandoned is None:
+            if 'DEBUG' in os.environ:
+                print 'found no cache on disk, rebuilding'
+            # no suitable cache found on disk, recompute
+            abandoned = {}
+            for head in self.heads():
+                extra = self.read(head)[5]
+                if 'abandon' in extra:
+                    for node in extra['abandon'].split(' '):
+                        abandoned[bin(node)] = head
+                    abandoned[head] = head
+
+            for node in abandoned.copy():
+                for head in self.heads():
+                    if (head not in abandoned and self.ancestor(head, node) == node):
+                        del abandoned[node]
+
+            self._writeabandonedcache(tip, abandoned)
+
+        self._abandonedcache = abandoned
+        self._abandonedtip = tip
+
     def abandoned(self, node):
-        # TODO: need cache
-        abandoned = set()
-
-        for head in self.heads():
-            extra = self.read(head)[5]
-            if 'abandon' in extra:
-                abandoned.update(map(bin, extra['abandon'].split(' ')))
-                abandoned.add(head)
-
-        if node in abandoned:
-            for head in self.heads():
-                if head not in abandoned and self.ancestor(head, node) == node:
-                    return None
-
-            return [h for h in self.heads() if
-                    hex(node) in self.read(h)[5].get('abandon', '').split(' ')
-                    or node == h][0]
-
-        return None
+        self.updateabandonedcache()
+        return self._abandonedcache.get(node)
diff --git a/tests/test-abandoned.t b/tests/test-abandoned.t
--- a/tests/test-abandoned.t
+++ b/tests/test-abandoned.t
@@ -39,6 +39,11 @@
   $ hg log
   0 54dbcd775ef0 init
 
+  $ cat .hg/store/cache/abandoned
+  339976ff501061539429787f2d34c191ba2f426d
+  339976ff501061539429787f2d34c191ba2f426d 339976ff501061539429787f2d34c191ba2f426d
+  0b00c28422ee839b7b5b2a8f0408bd804c350df2 339976ff501061539429787f2d34c191ba2f426d
+
   $ hg log --abandoned
   2 339976ff5010 abandoned
   1 0b00c28422ee x


More information about the Mercurial-devel mailing list