[PATCH 1 of 2 V2] perf: introduce a perfrevlogwrite command
Augie Fackler
raf at durin42.com
Wed Jan 16 09:33:03 UTC 2019
On Fri, Oct 19, 2018 at 02:25:13PM +0200, Boris Feld wrote:
> # HG changeset patch
> # User Boris Feld <boris.feld at octobus.net>
> # Date 1538556809 -7200
> # Wed Oct 03 10:53:29 2018 +0200
> # Node ID 78ffc55c0d6034972ea3d49b8c565c563ef27121
> # Parent 824b687ff6af49622a18dfea760cf41a7abe5aa7
> # EXP-Topic revlog-perf
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 78ffc55c0d60
> perf: introduce a perfrevlogwrite command
Unless you can give an /extremely/ compelling reason, I'm going to
punt this to 4.9.
>
> The command record times taken by adding many revisions to a revlog. Timing
> each addition, individually. The "added revision" are recreations of the
> original ones.
>
> To time each addition individually, we have to handle the timing and the
> reporting ourselves.
>
> This command is introduced to track the impact of sparse-revlog format on
> delta computations at initial storage time. It starts with the full text, a
> situation similar to the "commit". Additions from an existing delta are better
> timed with bundles.
>
> The complaints from `check-perf-code.py` are not relevant. We are accessing
> and "revlog" opener, not a repository opener.
>
> diff --git a/contrib/perf.py b/contrib/perf.py
> --- a/contrib/perf.py
> +++ b/contrib/perf.py
> @@ -24,8 +24,10 @@ import functools
> import gc
> import os
> import random
> +import shutil
> import struct
> import sys
> +import tempfile
> import threading
> import time
> from mercurial import (
> @@ -1565,6 +1567,149 @@ def perfrevlogrevisions(ui, repo, file_=
> timer(d)
> fm.end()
>
> + at command(b'perfrevlogwrite', revlogopts + formatteropts +
> + [(b's', b'startrev', 1000, b'revision to start writing at'),
> + (b'', b'stoprev', -1, b'last revision to write'),
> + (b'', b'count', 3, b'last revision to write'),
> + ],
> + b'-c|-m|FILE')
> +def perfrevlogwrite(ui, repo, file_=None, startrev=0, stoprev=-1, **opts):
> + """Benchmark writing a series of revisions to a revlog.
> + """
> + opts = _byteskwargs(opts)
> +
> + rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts)
> + rllen = getlen(ui)(rl)
> + if startrev < 0:
> + startrev = rllen + startrev
> + if stoprev < 0:
> + stoprev = rllen + stoprev
> +
> + ### actually gather results
> + count = opts['count']
> + if count <= 0:
> + raise error.Abort('invalide run count: %d' % count)
typo: invalid (spurious e on the end in yours)
> + allresults = []
> + for c in range(count):
> + allresults.append(_timeonewrite(ui, rl, startrev, stoprev, c + 1))
> +
> + ### consolidate the results in a single list
> + results = []
> + for idx, (rev, t) in enumerate(allresults[0]):
> + ts = [t]
> + for other in allresults[1:]:
> + orev, ot = other[idx]
> + assert orev == rev
> + ts.append(ot)
> + results.append((rev, ts))
> + resultcount = len(results)
> +
> + ### Compute and display relevant statistics
> +
> + # get a formatter
> + fm = ui.formatter(b'perf', opts)
> + displayall = ui.configbool(b"perf", b"all-timing", False)
> +
> + # sorts results by median time
> + results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2])
> + # list of (name, index) to display)
> + relevants = [
> + ("min", 0),
> + ("10%", resultcount * 10 // 100),
> + ("25%", resultcount * 25 // 100),
> + ("50%", resultcount * 70 // 100),
> + ("75%", resultcount * 75 // 100),
> + ("90%", resultcount * 90 // 100),
> + ("95%", resultcount * 95 // 100),
> + ("99%", resultcount * 99 // 100),
> + ("max", -1),
> + ]
Clarifying what this block does: you sort results by median time
(median of what? I'm a little lost) and then use integer division to
jump into the result set at the right position for emulating the
various %ile latencies, right?
Some sample output in the commit message would be nice.
> + for name, idx in relevants:
> + data = results[idx]
> + title = '%s of %d, rev %d' % (name, resultcount, data[0])
> + formatone(fm, data[1], title=title, displayall=displayall)
> +
> + # XXX summing that many float will not be very precise, we ignore this fact
> + # for now
Make this a TODO, or just a NOTE? it seems extremely unlikely we'll
ever do it, so maybe just make it "note that summing this many flaots
will ..." and make peace with that?
> + totaltime = []
> + for item in allresults:
> + totaltime.append((sum(x[1][0] for x in item),
> + sum(x[1][1] for x in item),
> + sum(x[1][2] for x in item),)
> + )
> + formatone(fm, totaltime, title="total time (%d revs)" % resultcount,
> + displayall=displayall)
> + fm.end()
> +
> +def _timeonewrite(ui, orig, startrev, stoprev, runidx=None):
> + timings = []
> + tr = type('faketr', (object,) , {'add': lambda s, x, y, z=None:None})()
> + with _temprevlog(ui, orig, startrev) as dest:
> + revs = list(orig.revs(startrev, stoprev))
> + total = len(revs)
> + topic = 'adding'
> + if runidx is not None:
> + topic += ' (run #%d)' % runidx
> + for idx, rev in enumerate(revs):
> + ui.progress(topic, idx, unit='revs', total=total)
> + text = orig.revision(rev)
> + linkrev = orig.linkrev(rev)
> + node = orig.node(rev)
> + p1, p2 = orig.parents(node)
> + with timeone() as r:
> + dest.addrevision(text, tr, linkrev, p1, p2, node=node)
> + timings.append((rev, r[0]))
> + ui.progress(topic, total, unit='revs', total=total)
> + ui.progress(topic, None, unit='revs', total=total)
> + return timings
> +
> + at contextlib.contextmanager
> +def _temprevlog(ui, orig, truncaterev):
> + from mercurial import vfs as vfsmod
> +
> + if orig._inline:
> + raise error.Abort('not supporting inline revlog (yet)')
> +
> + origindexpath = orig.opener.join(orig.indexfile)
> + origdatapath = orig.opener.join(orig.datafile)
> + indexname = 'revlog.i'
> + dataname = 'revlog.d'
> +
> + tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-')
> + try:
> + # copy the data file in a temporary directory
> + ui.debug('copying data in %s\n' % tmpdir)
> + destindexpath = os.path.join(tmpdir, 'revlog.i')
> + destdatapath = os.path.join(tmpdir, 'revlog.d')
> + shutil.copyfile(origindexpath, destindexpath)
> + shutil.copyfile(origdatapath, destdatapath)
> +
> + # remove the data we want to add again
> + ui.debug('truncating data to be rewritten\n')
> + with open(destindexpath, 'ab') as index:
> + index.seek(0)
> + index.truncate(truncaterev * orig._io.size)
> + with open(destdatapath, 'ab') as data:
> + data.seek(0)
> + data.truncate(orig.start(truncaterev))
> +
> + # instantiate a new revlog from the temporary copy
> + ui.debug('truncating adding to be rewritten\n')
> + vfs = vfsmod.vfs(tmpdir)
> + vfs.options = getattr(orig.opener, 'options', None)
> +
> + dest = revlog.revlog(vfs,
> + indexfile=indexname,
> + datafile=dataname)
> + if dest._inline:
> + raise error.Abort('not supporting inline revlog (yet)')
> + # make sure internals are initialized
> + dest.revision(len(dest) - 1)
> + yield dest
> + del dest, vfs
> + finally:
> + shutil.rmtree(tmpdir, True)
> +
> @command(b'perfrevlogchunks', revlogopts + formatteropts +
> [(b'e', b'engines', b'', b'compression engines to use'),
> (b's', b'startrev', 0, b'revision to start at')],
> diff --git a/tests/test-contrib-perf.t b/tests/test-contrib-perf.t
> --- a/tests/test-contrib-perf.t
> +++ b/tests/test-contrib-perf.t
> @@ -114,6 +114,8 @@ perfstatus
> Benchmark obtaining a revlog revision.
> perfrevlogrevisions
> Benchmark reading a series of revisions from a revlog.
> + perfrevlogwrite
> + Benchmark writing a series of revisions to a revlog.
> perfrevrange (no help text available)
> perfrevset benchmark the execution time of a revset
> perfstartup (no help text available)
> @@ -265,4 +267,16 @@ Check perf.py for historical portability
> contrib/perf.py:\d+: (re)
> > from mercurial import (
> import newer module separately in try clause for early Mercurial
> + contrib/perf.py:\d+: (re)
> + > origindexpath = orig.opener.join(orig.indexfile)
> + use getvfs()/getsvfs() for early Mercurial
> + contrib/perf.py:\d+: (re)
> + > origdatapath = orig.opener.join(orig.datafile)
> + use getvfs()/getsvfs() for early Mercurial
> + contrib/perf.py:\d+: (re)
> + > vfs = vfsmod.vfs(tmpdir)
> + use getvfs()/getsvfs() for early Mercurial
> + contrib/perf.py:\d+: (re)
> + > vfs.options = getattr(orig.opener, 'options', None)
> + use getvfs()/getsvfs() for early Mercurial
> [1]
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
More information about the Mercurial-devel
mailing list