[PATCH 2 of 2 V2] perf: benchmark command for revlog indexes
Gregory Szorc
gregory.szorc at gmail.com
Sun May 28 14:13:22 EDT 2017
# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1495995190 25200
# Sun May 28 11:13:10 2017 -0700
# Node ID f21925c52da67931702838d0579f157ce21939bb
# Parent 1ca1ef5d942aa405bec4ae74cbcf3155a5a4033f
perf: benchmark command for revlog indexes
We didn't have explicit microbenchmark coverage for loading revlog
indexes. That seems like a useful thing to have, so let's add it.
We currently measure the low-level nodemap APIs. There is room to
hook in at the actual revlog layer. This could be done as a follow-up.
The hackiest thing about this patch is specifying revlog paths.
Other commands have arguments that allow resolution of changelog,
manifest, and filelog. I needed to hook in at a lower level of
the revlog API than what the existing helper functions to resolve
revlogs allowed. I was too lazy to write some new APIs. This could
be done as a follow-up easily enough.
Example output for `hg perfrevlogindex 00changelog.i` on my
Firefox repo (404418 revisions):
! revlog constructor
! wall 0.003106 comb 0.000000 user 0.000000 sys 0.000000 (best of 912)
! read
! wall 0.003077 comb 0.000000 user 0.000000 sys 0.000000 (best of 924)
! create index object
! wall 0.000000 comb 0.000000 user 0.000000 sys 0.000000 (best of 1803994)
! retrieve index entry for rev 0
! wall 0.000193 comb 0.000000 user 0.000000 sys 0.000000 (best of 14037)
! look up missing node
! wall 0.003313 comb 0.000000 user 0.000000 sys 0.000000 (best of 865)
! look up node at rev 0
! wall 0.003295 comb 0.010000 user 0.010000 sys 0.000000 (best of 858)
! look up node at 1/4 len
! wall 0.002598 comb 0.010000 user 0.010000 sys 0.000000 (best of 1103)
! look up node at 1/2 len
! wall 0.001909 comb 0.000000 user 0.000000 sys 0.000000 (best of 1507)
! look up node at 3/4 len
! wall 0.001213 comb 0.000000 user 0.000000 sys 0.000000 (best of 2275)
! look up node at tip
! wall 0.000453 comb 0.000000 user 0.000000 sys 0.000000 (best of 5697)
! look up all nodes (forward)
! wall 0.094615 comb 0.100000 user 0.100000 sys 0.000000 (best of 100)
! look up all nodes (reverse)
! wall 0.045889 comb 0.050000 user 0.050000 sys 0.000000 (best of 100)
! retrieve all index entries (forward)
! wall 0.078398 comb 0.080000 user 0.060000 sys 0.020000 (best of 100)
! retrieve all index entries (reverse)
! wall 0.079376 comb 0.080000 user 0.070000 sys 0.010000 (best of 100)
diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -23,6 +23,7 @@ import functools
import gc
import os
import random
+import struct
import sys
import time
from mercurial import (
@@ -34,6 +35,7 @@ from mercurial import (
extensions,
mdiff,
merge,
+ revlog,
util,
)
@@ -61,6 +63,10 @@ try:
from mercurial import scmutil # since 1.9 (or 8b252e826c68)
except ImportError:
pass
+try:
+ from mercurial import pycompat # since 3.8 (or 6041fb8f2da8)
+except ImportError:
+ pass
# for "historical portability":
# define util.safehasattr forcibly, because util.safehasattr has been
@@ -857,6 +863,122 @@ def perfdiffwd(ui, repo, **opts):
timer(d, title)
fm.end()
+ at command('perfrevlogindex', revlogopts + formatteropts,
+ '-c|-m|FILE')
+def perfrevlogindex(ui, repo, file_=None, **opts):
+ """Benchmark operations against a revlog index.
+
+ This tests constructing a revlog instance, reading index data,
+ parsing index data, and performing various operations related to
+ index data.
+ """
+
+ rl = cmdutil.openrevlog(repo, 'perfrevlogindex', file_, opts)
+
+ opener = getattr(rl, 'opener') # trick linter
+ indexfile = rl.indexfile
+ data = opener.read(indexfile)
+
+ header = struct.unpack('>I', data[0:4])[0]
+ version = header & 0xFFFF
+ if version == 1:
+ revlogio = revlog.revlogio()
+ inline = header & (1 << 16)
+ else:
+ raise error.Abort(_('unsupported revlog version: %d') % version)
+
+ rllen = len(rl)
+
+ node0 = rl.node(0)
+ node25 = rl.node(rllen // 4)
+ node50 = rl.node(rllen // 2)
+ node75 = rl.node(rllen // 4 * 3)
+ node100 = rl.node(rllen - 1)
+
+ allrevs = range(rllen)
+ allrevsrev = list(reversed(allrevs))
+ allnodes = [rl.node(rev) for rev in range(rllen)]
+ allnodesrev = list(reversed(allnodes))
+
+ def constructor():
+ revlog.revlog(opener, indexfile)
+
+ def read():
+ with opener(indexfile) as fh:
+ fh.read()
+
+ def parseindex():
+ revlogio.parseindex(data, inline)
+
+ def getentry(revornode):
+ index = revlogio.parseindex(data, inline)[0]
+ index[revornode]
+
+ def getentries(revs, count=1):
+ index = revlogio.parseindex(data, inline)[0]
+
+ for i in range(count):
+ for rev in revs:
+ index[rev]
+
+ def resolvenode(node):
+ nodemap = revlogio.parseindex(data, inline)[1]
+ # This only works for the C code.
+ if nodemap is None:
+ return
+
+ try:
+ nodemap[node]
+ except error.RevlogError:
+ pass
+
+ def resolvenodes(nodes, count=1):
+ nodemap = revlogio.parseindex(data, inline)[1]
+ if nodemap is None:
+ return
+
+ for i in range(count):
+ for node in nodes:
+ try:
+ nodemap[node]
+ except error.RevlogError:
+ pass
+
+ benches = [
+ (constructor, 'revlog constructor'),
+ (read, 'read'),
+ (parseindex, 'create index object'),
+ (lambda: getentry(0), 'retrieve index entry for rev 0'),
+ (lambda: resolvenode('a' * 20), 'look up missing node'),
+ (lambda: resolvenode(node0), 'look up node at rev 0'),
+ (lambda: resolvenode(node25), 'look up node at 1/4 len'),
+ (lambda: resolvenode(node50), 'look up node at 1/2 len'),
+ (lambda: resolvenode(node75), 'look up node at 3/4 len'),
+ (lambda: resolvenode(node100), 'look up node at tip'),
+ # 2x variation is to measure caching impact.
+ (lambda: resolvenodes(allnodes),
+ 'look up all nodes (forward)'),
+ (lambda: resolvenodes(allnodes, 2),
+ 'look up all nodes 2x (forward)'),
+ (lambda: resolvenodes(allnodesrev),
+ 'look up all nodes (reverse)'),
+ (lambda: resolvenodes(allnodesrev, 2),
+ 'look up all nodes 2x (reverse)'),
+ (lambda: getentries(allrevs),
+ 'retrieve all index entries (forward)'),
+ (lambda: getentries(allrevs, 2),
+ 'retrieve all index entries 2x (forward)'),
+ (lambda: getentries(allrevsrev),
+ 'retrieve all index entries (reverse)'),
+ (lambda: getentries(allrevsrev, 2),
+ 'retrieve all index entries 2x (reverse)'),
+ ]
+
+ for fn, title in benches:
+ timer, fm = gettimer(ui, opts)
+ timer(fn, title=title)
+ fm.end()
+
@command('perfrevlogrevisions', revlogopts + formatteropts +
[('d', 'dist', 100, 'distance between the revisions'),
('s', 'startrev', 0, 'revision to start reading at'),
diff --git a/tests/test-contrib-perf.t b/tests/test-contrib-perf.t
--- a/tests/test-contrib-perf.t
+++ b/tests/test-contrib-perf.t
@@ -97,6 +97,8 @@ perfstatus
perfrawfiles (no help text available)
perfrevlogchunks
Benchmark operations on revlog chunks.
+ perfrevlogindex
+ Benchmark operations against a revlog index.
perfrevlogrevision
Benchmark obtaining a revlog revision.
perfrevlogrevisions
@@ -147,6 +149,7 @@ perfstatus
$ hg perfnodelookup 2
$ hg perfpathcopies 1 2
$ hg perfrawfiles 2
+ $ hg perfrevlogindex -c
$ hg perfrevlogrevisions .hg/store/data/a.i
$ hg perfrevlogrevision -m 0
$ hg perfrevlogchunks -c
More information about the Mercurial-devel
mailing list