[PATCH RFC] perf: add asv benchmarks

Philippe Pepiot philippe.pepiot at logilab.fr
Wed Oct 12 08:35:45 UTC 2016

# HG changeset patch
# User Philippe Pepiot <philippe.pepiot at logilab.fr>
# Date 1475136994 -7200
#      Thu Sep 29 10:16:34 2016 +0200
# Node ID f7847ea1b58780e3508d57376c4fdd63d6aedfcd
# Parent  b85fa6bf298be07804a74d8fdec0d19fdbc6d740
# EXP-Topic hgperf
perf: add asv benchmarks

Airspeed velocity (ASV) is a python framework for benchmarking Python packages
over their lifetime. The results are displayed in an interactive web frontend.

Add ASV benchmarks for mercurial that use contrib/perf.py extension that could
be run against multiple reference repositories.

The benchmark suite now includes revsets from contrib/base-revsets.txt with
variants, perftags, perfstatus, perfmanifest and perfheads.

Installation requires ASV (not yet released
https://github.com/spacetelescope/asv master branch), python-hglib and

This is part of PerformanceTrackingSuitePlan

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -49,6 +49,7 @@ mercurial.egg-info
diff --git a/contrib/asv.conf.json b/contrib/asv.conf.json
new file mode 100644
--- /dev/null
+++ b/contrib/asv.conf.json
@@ -0,0 +1,127 @@
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+    // The name of the project being benchmarked
+    "project": "mercurial",
+    // The project's homepage
+    "project_url": "http://mercurial-scm.org/",
+    // The URL or local path of the source code repository for the
+    // project being benchmarked
+    "repo": "..",
+    // List of branches to benchmark. If not provided, defaults to "master"
+    // (for git) or "default" (for mercurial).
+    // "branches": ["master"], // for git
+    // "branches": ["default"],    // for mercurial
+    "branches": ["default", "stable"],
+    // The DVCS being used.  If not set, it will be automatically
+    // determined from "repo" by looking at the protocol in the URL
+    // (if remote), or by looking for special directories, such as
+    // ".git" (if local).
+    // "dvcs": "git",
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv" or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "virtualenv",
+    // the base URL to show a commit for the project.
+    "show_commit_url": "https://www.selenic.com/hg/rev/",
+    // The Pythons you'd like to test against.  If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    // "pythons": ["2.7", "3.3"],
+    // The matrix of dependencies to test.  Each key is the name of a
+    // package (in PyPI) and the values are version numbers.  An empty
+    // list or empty string indicates to just test against the default
+    // (latest) version. null indicates that the package is to not be
+    // installed. If the package to be tested is only available from
+    // PyPi, and the 'environment_type' is conda, then you can preface
+    // the package name by 'pip+', and the package will be installed via
+    // pip (with all the conda available packages installed first,
+    // followed by the pip installed packages).
+    //
+    // "matrix": {
+    //     "numpy": ["1.6", "1.7"],
+    //     "six": ["", null],        // test with and without six installed
+    //     "pip+emcee": [""],   // emcee is only available for install with pip.
+    // },
+    // Combinations of libraries/python versions can be excluded/included
+    // from the set to test. Each entry is a dictionary containing additional
+    // key-value pairs to include/exclude.
+    //
+    // An exclude entry excludes entries where all values match. The
+    // values are regexps that should match the whole string.
+    //
+    // An include entry adds an environment. Only the packages listed
+    // are installed. The 'python' key is required. The exclude rules
+    // do not apply to includes.
+    //
+    // In addition to package names, the following keys are available:
+    //
+    // - python
+    //     Python version, as in the *pythons* variable above.
+    // - environment_type
+    //     Environment type, as above.
+    // - sys_platform
+    //     Platform, as in sys.platform. Possible values for the common
+    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
+    //
+    // "exclude": [
+    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
+    //     {"environment_type": "conda", "six": null}, // don't run without six on conda
+    // ],
+    //
+    // "include": [
+    //     // additional env for python2.7
+    //     {"python": "2.7", "numpy": "1.8"},
+    //     // additional env if run on windows+conda
+    //     {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
+    // ],
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    "benchmark_dir": "benchmarks",
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    "env_dir": "../.asv/env",
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    "results_dir": "../.asv/results",
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    "html_dir": "../.asv/html",
+    // The number of characters to retain in the commit hashes.
+    // "hash_length": 8,
+    // `asv` will cache wheels of the recent builds in each
+    // environment, making them faster to install next time.  This is
+    // number of builds to keep, per environment.
+    // "wheel_cache_size": 0
+    // The commits after which the regression search in `asv publish`
+    // should start looking for regressions. Dictionary whose keys are
+    // regexps matching to benchmark names, and values corresponding to
+    // the commit (exclusive) after which to start looking for
+    // regressions.  The default is to start from the first commit
+    // with results. If the commit is `null`, regression detection is
+    // skipped for the matching benchmark.
+    //
+    // "regressions_first_commits": {
+    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
+    //    "another_benchmark": null,   // Skip regression detection altogether
+    // }
diff --git a/contrib/benchmarks/__init__.py b/contrib/benchmarks/__init__.py
new file mode 100644
--- /dev/null
+++ b/contrib/benchmarks/__init__.py
@@ -0,0 +1,106 @@
+"""ASV (http://asv.readthedocs.io) benchmark suite
+Benchmark are parameterized against reference repositories found in the
+directory pointed by the REPOS_DIR environment variable.
+Invocation example:
+    $ export REPOS_DIR=~/hgperf/repos
+    # run suite on given revision
+    $ asv --config contrib/asv.conf.json run REV
+    # run suite on new changesets found in stable and default branch
+    $ asv --config contrib/asv.conf.json run NEW
+    # display a comparative result table of benchmark results between two given
+    # revisions
+    $ asv --config contrib/asv.conf.json compare REV1 REV2
+    # compute regression detection and generate ASV static website
+    $ asv --config contrib/asv.conf.json publish
+    # serve the static website
+    $ asv --config contrib/asv.conf.json preview
+from __future__ import absolute_import
+import contextlib
+import functools
+import io
+import os
+import re
+from mercurial import (
+    extensions,
+    hg,
+    ui as uimod,
+basedir = os.path.abspath(os.path.join(os.path.dirname(__file__),
+                          os.path.pardir, os.path.pardir))
+reposdir = os.environ['REPOS_DIR']
+reposnames = [name for name in os.listdir(reposdir)
+              if os.path.isdir(os.path.join(reposdir, name, ".hg"))]
+outputre = re.compile((r'! wall (\d+.\d+) comb \d+.\d+ user \d+.\d+ sys '
+                      r'\d+.\d+ \(best of \d+\)'))
+ at contextlib.contextmanager
+def redirectuioutput(ui):
+    old = ui.fout, ui.ferr
+    new = io.BytesIO()
+    try:
+        ui.fout = ui.ferr = new
+        yield new
+    finally:
+        ui.fout, ui.ferr = old
+def runperfcommand(reponame, command, *args, **kwargs):
+    os.environ["HGRCPATH"] = ""
+    ui = uimod.ui()
+    repo = hg.repository(ui, os.path.join(reposdir, reponame))
+    perfext = extensions.load(ui, 'perfext',
+                              os.path.join(basedir, 'contrib', 'perf.py'))
+    cmd = getattr(perfext, command)
+    with redirectuioutput(ui) as buf:
+        cmd(ui, repo, *args, **kwargs)
+    buf.seek(0)
+    output = buf.read()
+    match = outputre.search(output)
+    if not match:
+        raise ValueError("Invalid output {0}".format(output))
+    return float(match.group(1))
+def perfbench(repos=reposnames, name=None, params=None):
+    """decorator to declare ASV benchmark based on contrib/perf.py extension
+    An ASV benchmark is a python function with the given attributes:
+    __name__: should start with track_, time_ or mem_ to be collected by ASV
+    params and param_name: parameter matrix to display multiple graphs on the
+    same page.
+    pretty_name: If defined it's displayed in web-ui instead of __name__
+    (useful for revsets)
+    the module name is prepended to the benchmark name and displayed as
+    "category" in webui.
+    Benchmarks are automatically parameterized with repositories found in the
+    REPOS_DIR environment variable.
+    `params` is the param matrix in the form of a list of tuple
+    (param_name, [value0, value1])
+    For example [(x, [a, b]), (y, [c, d])] declare benchmarks for
+    (a, c), (a, d), (b, c) and (b, d).
+    """
+    params = list(params or [])
+    params.insert(0, ("repo", repos))
+    def decorator(func):
+        @functools.wraps(func)
+        def wrapped(repo, *args):
+            def perf(command, *a, **kw):
+                return runperfcommand(repo, command, *a, **kw)
+            return func(perf, *args)
+        wrapped.params = [p[1] for p in params]
+        wrapped.param_names = [p[0] for p in params]
+        wrapped.pretty_name = name
+        return wrapped
+    return decorator
diff --git a/contrib/benchmarks/perf.py b/contrib/benchmarks/perf.py
new file mode 100644
--- /dev/null
+++ b/contrib/benchmarks/perf.py
@@ -0,0 +1,19 @@
+from __future__ import absolute_import
+from . import perfbench
+ at perfbench()
+def track_tags(perf):
+    return perf("perftags")
+ at perfbench()
+def track_status(perf):
+    return perf("perfstatus", unknown=False)
+ at perfbench(params=[('rev', ['1000', '10000', 'tip'])])
+def track_manifest(perf, rev):
+    return perf("perfmanifest", rev)
+ at perfbench()
+def track_heads(perf):
+    return perf("perfheads")
diff --git a/contrib/benchmarks/revset.py b/contrib/benchmarks/revset.py
new file mode 100644
--- /dev/null
+++ b/contrib/benchmarks/revset.py
@@ -0,0 +1,45 @@
+"""ASV revset benchmarks generated from contrib/base-revsets.txt
+Each revset benchmark is parameterized with variants (first, last, sort, ...)
+from __future__ import absolute_import
+import os
+import string
+import sys
+from . import basedir, perfbench
+def createrevsetbenchmark(baseset, variants=None):
+    if variants is None:
+        # Default variants
+        variants = ["plain", "first", "last", "sort", "sort+first",
+                    "sort+last"]
+    fname = "track_" + "_".join("".join([
+        c if c in string.digits + string.letters else " "
+        for c in baseset
+    ]).split())
+    def wrap(fname, baseset):
+        @perfbench(name=baseset, params=[("variant", variants)])
+        def f(perf, variant):
+            revset = baseset
+            if variant != "plain":
+                for var in variant.split("+"):
+                    revset = "%s(%s)" % (var, revset)
+            return perf("perfrevset", revset)
+        f.__name__ = fname
+        return f
+    return wrap(fname, baseset)
+def initializerevsetbenchmarks():
+    mod = sys.modules[__name__]
+    with open(os.path.join(basedir, 'contrib', 'base-revsets.txt'),
+              'rb') as fh:
+        for line in fh:
+            baseset = line.strip()
+            if baseset and not baseset.startswith('#'):
+                func = createrevsetbenchmark(baseset)
+                setattr(mod, func.__name__, func)

More information about the Mercurial-devel mailing list