[PATCH 3 of 4] revlog: use persistent file handles during addgroup

Gregory Szorc gregory.szorc at gmail.com
Tue Jul 14 15:51:12 CDT 2015


# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1436903210 25200
#      Tue Jul 14 12:46:50 2015 -0700
# Node ID 3c74157a105dc99514775809a99d7ba0f950c614
# Parent  bcdf6cd26113b06a489b9a3051bcd20a1e44fc4b
revlog: use persistent file handles during addgroup

revlog.addgroup() calls into revlog.revision() whenever a new chain is created.
Previously, every time revlog.revision() requested chunks that weren't
cached, we would open a new file handle, seek, read data, and close the
file handle. For large repositories, this resulted in several extra
open()/close() system calls (thousands when processing changesets on a
repository with over 100,000 changesets).

By caching the read file handle during addgroup(), we go a long way
towards eliminating extra file open()/close() system calls as part of
applying changegroup data.

This change appears to show no statistically significant change in
performance on my MBP. Yet it does reduce the number of system calls
significantly. That doesn't mean it isn't a good idea, however.

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -1489,63 +1489,66 @@ class revlog(object):
             if dfh:
                 dfh.flush()
             ifh.flush()
         try:
-            # loop through our set of deltas
-            chain = None
-            while True:
-                chunkdata = bundle.deltachunk(chain)
-                if not chunkdata:
-                    break
-                node = chunkdata['node']
-                p1 = chunkdata['p1']
-                p2 = chunkdata['p2']
-                cs = chunkdata['cs']
-                deltabase = chunkdata['deltabase']
-                delta = chunkdata['delta']
+            with self.persistentreadhandle() as refreshreadhandles:
+                # loop through our set of deltas
+                chain = None
+                while True:
+                    chunkdata = bundle.deltachunk(chain)
+                    if not chunkdata:
+                        break
+                    node = chunkdata['node']
+                    p1 = chunkdata['p1']
+                    p2 = chunkdata['p2']
+                    cs = chunkdata['cs']
+                    deltabase = chunkdata['deltabase']
+                    delta = chunkdata['delta']
 
-                content.append(node)
+                    content.append(node)
 
-                link = linkmapper(cs)
-                if node in self.nodemap:
-                    # this can happen if two branches make the same change
-                    chain = node
-                    continue
+                    link = linkmapper(cs)
+                    if node in self.nodemap:
+                        # this can happen if two branches make the same change
+                        chain = node
+                        continue
 
-                for p in (p1, p2):
-                    if p not in self.nodemap:
-                        raise LookupError(p, self.indexfile,
-                                          _('unknown parent'))
+                    for p in (p1, p2):
+                        if p not in self.nodemap:
+                            raise LookupError(p, self.indexfile,
+                                              _('unknown parent'))
 
-                if deltabase not in self.nodemap:
-                    raise LookupError(deltabase, self.indexfile,
-                                      _('unknown delta base'))
+                    if deltabase not in self.nodemap:
+                        raise LookupError(deltabase, self.indexfile,
+                                          _('unknown delta base'))
 
-                baserev = self.rev(deltabase)
+                    baserev = self.rev(deltabase)
 
-                if baserev != nullrev and self.iscensored(baserev):
-                    # if base is censored, delta must be full replacement in a
-                    # single patch operation
-                    hlen = struct.calcsize(">lll")
-                    oldlen = self.rawsize(baserev)
-                    newlen = len(delta) - hlen
-                    if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
-                        raise error.CensoredBaseError(self.indexfile,
-                                                      self.node(baserev))
+                    if baserev != nullrev and self.iscensored(baserev):
+                        # if base is censored, delta must be full replacement
+                        # in a single patch operation
+                        hlen = struct.calcsize(">lll")
+                        oldlen = self.rawsize(baserev)
+                        newlen = len(delta) - hlen
+                        if delta[:hlen] != mdiff.replacediffheader(oldlen,
+                                                                   newlen):
+                            raise error.CensoredBaseError(self.indexfile,
+                                                          self.node(baserev))
 
-                flags = REVIDX_DEFAULT_FLAGS
-                if self._peek_iscensored(baserev, delta, flush):
-                    flags |= REVIDX_ISCENSORED
+                    flags = REVIDX_DEFAULT_FLAGS
+                    if self._peek_iscensored(baserev, delta, flush):
+                        flags |= REVIDX_ISCENSORED
 
-                chain = self._addrevision(node, None, transaction, link,
-                                          p1, p2, flags, (baserev, delta),
-                                          ifh, dfh)
-                if not dfh and not self._inline:
-                    # addrevision switched from inline to conventional
-                    # reopen the index
-                    ifh.close()
-                    dfh = self.opener(self.datafile, "a")
-                    ifh = self.opener(self.indexfile, "a")
+                    chain = self._addrevision(node, None, transaction, link,
+                                              p1, p2, flags, (baserev, delta),
+                                              ifh, dfh)
+                    if not dfh and not self._inline:
+                        # addrevision switched from inline to conventional
+                        # reopen the index
+                        ifh.close()
+                        dfh = self.opener(self.datafile, "a")
+                        ifh = self.opener(self.indexfile, "a")
+                        refreshreadhandles()
         finally:
             if dfh:
                 dfh.close()
             ifh.close()


More information about the Mercurial-devel mailing list