[PATCH] perf: split obtaining chunks from decompression

Gregory Szorc gregory.szorc at gmail.com
Sun Feb 5 18:54:39 UTC 2017


# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1486226866 28800
#      Sat Feb 04 08:47:46 2017 -0800
# Node ID bc36af48a787dc1cd09a993163549054f3c78777
# Parent  1f51b4658f21bbb797e922d155c1046eddccf91d
perf: split obtaining chunks from decompression

Previously, the code was similar to what revlog._chunks() was doing,
which took a raw data segment and delta chain, obtained buffers for
the raw revlog chunks within, and decompressed them.

This commit splits the "get raw chunks" action from "decompress." The
goal of this change is to more accurately measurely decompression
performance.

On a ~50k deltachain for a manifest in mozilla-central:

! full
! wall 0.430548 comb 0.440000 user 0.410000 sys 0.030000 (best of 24)
! deltachain
! wall 0.016053 comb 0.010000 user 0.010000 sys 0.000000 (best of 181)
! read
! wall 0.008078 comb 0.010000 user 0.000000 sys 0.010000 (best of 362)
! rawchunks
! wall 0.033785 comb 0.040000 user 0.040000 sys 0.000000 (best of 100)
! decompress
! wall 0.327126 comb 0.320000 user 0.320000 sys 0.000000 (best of 31)
! patch
! wall 0.032391 comb 0.030000 user 0.030000 sys 0.000000 (best of 100)
! hash
! wall 0.012587 comb 0.010000 user 0.010000 sys 0.000000 (best of 233)

diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -993,6 +993,26 @@ def perfrevlogrevision(ui, repo, file_, 
     node = r.lookup(rev)
     rev = r.rev(node)
 
+    def getrawchunks(data, chain):
+        start = r.start
+        length = r.length
+        inline = r._inline
+        iosize = r._io.size
+        buffer = util.buffer
+        offset = start(chain[0])
+
+        chunks = []
+        ladd = chunks.append
+
+        for rev in chain:
+            chunkstart = start(rev)
+            if inline:
+                chunkstart += (rev + 1) * iosize
+            chunklength = length(rev)
+            ladd(buffer(data, chunkstart - offset, chunklength))
+
+        return chunks
+
     def dodeltachain(rev):
         if not cache:
             r.clearcaches()
@@ -1003,24 +1023,15 @@ def perfrevlogrevision(ui, repo, file_, 
             r.clearcaches()
         r._chunkraw(chain[0], chain[-1])
 
-    def dodecompress(data, chain):
+    def dorawchunks(data, chain):
         if not cache:
             r.clearcaches()
-
-        start = r.start
-        length = r.length
-        inline = r._inline
-        iosize = r._io.size
-        buffer = util.buffer
-        offset = start(chain[0])
+        getrawchunks(data, chain)
 
-        for rev in chain:
-            chunkstart = start(rev)
-            if inline:
-                chunkstart += (rev + 1) * iosize
-            chunklength = length(rev)
-            b = buffer(data, chunkstart - offset, chunklength)
-            r.decompress(b)
+    def dodecompress(chunks):
+        decomp = r.decompress
+        for chunk in chunks:
+            decomp(chunk)
 
     def dopatch(text, bins):
         if not cache:
@@ -1039,6 +1050,7 @@ def perfrevlogrevision(ui, repo, file_, 
 
     chain = r._deltachain(rev)[0]
     data = r._chunkraw(chain[0], chain[-1])[1]
+    rawchunks = getrawchunks(data, chain)
     bins = r._chunks(chain)
     text = str(bins[0])
     bins = bins[1:]
@@ -1048,7 +1060,8 @@ def perfrevlogrevision(ui, repo, file_, 
         (lambda: dorevision(), 'full'),
         (lambda: dodeltachain(rev), 'deltachain'),
         (lambda: doread(chain), 'read'),
-        (lambda: dodecompress(data, chain), 'decompress'),
+        (lambda: dorawchunks(data, chain), 'rawchunks'),
+        (lambda: dodecompress(rawchunks), 'decompress'),
         (lambda: dopatch(text, bins), 'patch'),
         (lambda: dohash(text), 'hash'),
     ]


More information about the Mercurial-devel mailing list