[PATCH 5 of 5] changelog: rework the delayupdate mechanism

Pierre-Yves David pierre-yves.david at ens-lyon.org
Sat Oct 18 06:44:57 CDT 2014


# HG changeset patch
# User Pierre-Yves David <pierre-yves.david at fb.com>
# Date 1413619938 25200
#      Sat Oct 18 01:12:18 2014 -0700
# Node ID 54bf25b6826692364859474a9ff5e8e11e5d4913
# Parent  85a27c5441d6fbbc86a8272b46c2f1eb4255389e
changelog: rework the delayupdate mechanism

The current way we use the `delayupdate` mechanism is wrong. We call
`delayupdate` right after the transaction retrieval, then we call `finalize`
right before calling `tr.close()`. The `finalize` call will -always- result in a
flush to disk, making the data available to all readers. But the `tr.close()` may
be a no-op if the transaction is nested. This would result in data:

1) exposed to reader to early,
2) rollbacked by other part of the transaction after such exposition.

So we need to end up in a situation where we call `finalize` a single time when
the transaction actually close. For this purpose we need to be able to call
`delayupdate` and `_writepending` multiple times and `finalize` once. This was
not possible with the previous state of the code.

This changeset re-factor the code to makes this possible. We buffer data in memory
as much as possible and fall-back to writing to a ".a" file after the first call
to `_writepending`.

diff --git a/mercurial/changelog.py b/mercurial/changelog.py
--- a/mercurial/changelog.py
+++ b/mercurial/changelog.py
@@ -106,30 +106,36 @@ class appender(object):
 
     def write(self, s):
         self.data.append(str(s))
         self.offset += len(s)
 
-def delayopener(opener, target, divert, buf):
-    def o(name, mode='r'):
+def _divertopener(opener, target):
+    """build an opener that write in `target.a` instead of `target`"""
+    def _divert(name, mode='r'):
         if name != target:
             return opener(name, mode)
-        if divert:
-            return opener(name + ".a", mode.replace('a', 'w'))
-        # otherwise, divert to memory
+        return opener(name + ".a", mode)
+    return _divert
+
+def _delayopener(opener, target, buf):
+    """build an opener that store chunk in `buf` instead of `target`"""
+    def _delay(name, mode='r'):
+        if name != target:
+            return opener(name, mode)
         return appender(opener, name, mode, buf)
-    return o
+    return _delay
 
 class changelog(revlog.revlog):
     def __init__(self, opener):
         revlog.revlog.__init__(self, opener, "00changelog.i")
         if self._initempty:
             # changelogs don't benefit from generaldelta
             self.version &= ~revlog.REVLOGGENERALDELTA
             self._generaldelta = False
         self._realopener = opener
         self._delayed = False
-        self._delaybuf = []
+        self._delaybuf = None
         self._divert = False
         self.filteredrevs = frozenset()
 
     def tip(self):
         """filtered version of revlog.tip"""
@@ -218,31 +224,40 @@ class changelog(revlog.revlog):
             raise error.FilteredIndexError(rev)
         return super(changelog, self).flags(rev)
 
     def delayupdate(self):
         "delay visibility of index updates to other readers"
+
+        if not self._delayed:
+            if len(self) == 0:
+                self._divert = True
+                if self._realopener.exists(self.indexfile + '.a'):
+                    self._realopener.unlink(self.indexfile + '.a')
+                self.opener = _divertopener(self._realopener, self.indexfile)
+            else:
+                self._delaybuf = []
+                self.opener = _delayopener(self._realopener, self.indexfile,
+                                           self._delaybuf)
         self._delayed = True
-        self._divert = (len(self) == 0)
-        self._delaybuf = []
-        self.opener = delayopener(self._realopener, self.indexfile,
-                                  self._divert, self._delaybuf)
 
     def finalize(self, tr):
         "finalize index updates"
         self._delayed = False
         self.opener = self._realopener
         # move redirected index data back into place
         if self._divert:
+            assert not self._delaybuf
             tmpname = self.indexfile + ".a"
             nfile = self.opener.open(tmpname)
             nfile.close()
             self.opener.rename(tmpname, self.indexfile)
         elif self._delaybuf:
             fp = self.opener(self.indexfile, 'a')
             fp.write("".join(self._delaybuf))
             fp.close()
-            self._delaybuf = []
+            self._delaybuf = None
+        self._divert = False
         # split when we're done
         self.checkinlinesize(tr)
 
     def readpending(self, file):
         r = revlog.revlog(self.opener, file)
@@ -260,12 +275,13 @@ class changelog(revlog.revlog):
             fp2.write(fp1.read())
             # add pending data
             fp2.write("".join(self._delaybuf))
             fp2.close()
             # switch modes so finalize can simply rename
-            self._delaybuf = []
+            self._delaybuf = None
             self._divert = True
+            self.opener = _divertopener(self._realopener, self.indexfile)
 
         if self._divert:
             return True
 
         return False


More information about the Mercurial-devel mailing list