[PATCH 6 of 6] perf: add perfrevlogrevision
Gregory Szorc
gregory.szorc at gmail.com
Sun Dec 20 22:04:14 CST 2015
# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1450665501 28800
# Sun Dec 20 18:38:21 2015 -0800
# Node ID 85c0fa263fdcbdc0ee19aa3708dce1a854c18bf9
# Parent f43c3343c6888d7f1c960872091ebd86b3a54d3a
perf: add perfrevlogrevision
As part of investigating performance improvements to revlog reading,
I needed a mechanism to measure every part of revlog reading so I knew
where time was spent and how effective optimizations were.
This patch implements a perf command for benchmarking the various
stages of reading a single revlog revision.
When executed against a manifest revision at the end of a 30,000+
long delta chain in mozilla-central, the command demonstrates that
~80% of time is spent in zlib decompression.
diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -1,18 +1,20 @@
# perf.py - performance test routines
'''helper extension to measure performance'''
from mercurial import cmdutil, scmutil, util, commands, obsolete
-from mercurial import repoview, branchmap, merge, copies, error
+from mercurial import repoview, branchmap, merge, copies, error, revlog
+from mercurial import mdiff
import time, os, sys
import random
import functools
formatteropts = commands.formatteropts
+revlogopts = commands.debugrevlogopts
cmdtable = {}
command = cmdutil.command(cmdtable)
def getlen(ui):
if ui.configbool("perf", "stub"):
return lambda x: 1
return len
@@ -483,16 +485,106 @@ def perfrevlog(ui, repo, file_, **opts):
def d():
r = revlog.revlog(lambda fn: open(fn, 'rb'), file_)
for x in xrange(0, _len(r), dist):
r.revision(r.node(x))
timer(d)
fm.end()
+ at command('perfrevlogrevision', revlogopts + formatteropts +
+ [('', 'cache', False, 'use caches instead of clearing')],
+ '-c|-m|FILE REV')
+def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
+ """Benchmark obtaining a revlog revision.
+
+ Obtaining a revlog revision consists of roughly the following steps:
+
+ 1. Compute the delta chain
+ 2. Obtain the raw chunks for that delta chain
+ 3. Decompress each raw chunk
+ 4. Apply binary patches to obtain fulltext
+ 5. Verify hash of fulltext
+
+ This command measures the time spent in each of these phases.
+ """
+ if opts.get('changelog') or opts.get('manifest'):
+ file_, rev = None, file_
+ elif rev is None:
+ raise error.CommandError('perfrevlogrevision', 'invalid arguments')
+
+ r = cmdutil.openrevlog(repo, 'perfrevlogrevision', file_, opts)
+ node = r.lookup(rev)
+ rev = r.rev(node)
+
+ def dodeltachain(rev):
+ if not cache:
+ r.clearcaches()
+ r._deltachain(rev)
+
+ def doread(chain):
+ if not cache:
+ r.clearcaches()
+ r._chunkraw(chain[0], chain[-1])
+
+ def dodecompress(data, chain):
+ if not cache:
+ r.clearcaches()
+
+ start = r.start
+ length = r.length
+ inline = r._inline
+ iosize = r._io.size
+ buffer = util.buffer
+ offset = start(chain[0])
+
+ for rev in chain:
+ chunkstart = start(rev)
+ if inline:
+ chunkstart += (rev + 1) * iosize
+ chunklength = length(rev)
+ b = buffer(data, chunkstart - offset, chunklength)
+ revlog.decompress(b)
+
+ def dopatch(text, bins):
+ if not cache:
+ r.clearcaches()
+ mdiff.patches(text, bins)
+
+ def dohash(text):
+ if not cache:
+ r.clearcaches()
+ r._checkhash(text, node, rev)
+
+ def dorevision():
+ if not cache:
+ r.clearcaches()
+ r.revision(node)
+
+ chain = r._deltachain(rev)[0]
+ data = r._chunkraw(chain[0], chain[-1])
+ bins = r._chunks(chain)
+ text = str(bins[0])
+ bins = bins[1:]
+ text = mdiff.patches(text, bins)
+
+ benches = [
+ (lambda: dorevision(), 'full'),
+ (lambda: dodeltachain(rev), 'deltachain'),
+ (lambda: doread(chain), 'read'),
+ (lambda: dodecompress(data, chain), 'decompress'),
+ (lambda: dopatch(text, bins), 'patch'),
+ (lambda: dohash(text), 'hash'),
+ ]
+
+ for fn, title in benches:
+ timer, fm = gettimer(ui, opts)
+ timer(fn, title=title)
+ fm.end()
+
@command('perfrevset',
[('C', 'clear', False, 'clear volatile cache between each call.'),
('', 'contexts', False, 'obtain changectx for each revision')]
+ formatteropts, "REVSET")
def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
"""benchmark the execution time of a revset
Use the --clean option if need to evaluate the impact of build volatile
diff --git a/tests/test-contrib-perf.t b/tests/test-contrib-perf.t
--- a/tests/test-contrib-perf.t
+++ b/tests/test-contrib-perf.t
@@ -86,16 +86,18 @@ perfstatus
perfmoonwalk benchmark walking the changelog backwards
perfnodelookup
(no help text available)
perfparents (no help text available)
perfpathcopies
(no help text available)
perfrawfiles (no help text available)
perfrevlog (no help text available)
+ perfrevlogrevision
+ Benchmark obtaining a revlog revision.
perfrevrange (no help text available)
perfrevset benchmark the execution time of a revset
perfstartup (no help text available)
perfstatus (no help text available)
perftags (no help text available)
perftemplating
(no help text available)
perfvolatilesets
@@ -131,16 +133,17 @@ perfstatus
$ hg perflrucache
$ hg perfmanifest 2
$ hg perfmergecalculate -r 3
$ hg perfmoonwalk
$ hg perfnodelookup 2
$ hg perfpathcopies 1 2
$ hg perfrawfiles 2
$ hg perfrevlog .hg/store/data/a.i
+ $ hg perfrevlogrevision -m 0
$ hg perfrevrange
$ hg perfrevset 'all()'
$ hg perfstartup
$ hg perfstatus
$ hg perftags
$ hg perftemplating
$ hg perfvolatilesets
$ hg perfwalk
More information about the Mercurial-devel
mailing list