[PATCH 2 of 2 V3] perf: add asv benchmarks

Gregory Szorc gregory.szorc at gmail.com
Tue Nov 15 22:56:14 EST 2016


On Tue, Nov 15, 2016 at 7:55 AM, Philippe Pepiot <philippe.pepiot at logilab.fr
> wrote:

> # HG changeset patch
> # User Philippe Pepiot <philippe.pepiot at logilab.fr>
> # Date 1475136994 -7200
> #      Thu Sep 29 10:16:34 2016 +0200
> # Node ID 94e48d7dc9630543e0f4179b2ca96f3c66967f6a
> # Parent  ab6e50ddc2c56dcf170991293005be6d6f80a232
> perf: add asv benchmarks
>
> Airspeed velocity (ASV) is a python framework for benchmarking Python
> packages
> over their lifetime. The results are displayed in an interactive web
> frontend.
>
> Add ASV benchmarks for mercurial that use contrib/perf.py extension that
> could
> be run against multiple reference repositories.
>
> The benchmark suite now includes revsets from contrib/base-revsets.txt with
> variants, perftags, perfstatus, perfmanifest and perfheads.
>
> Installation requires asv>=0.2, python-hglib and virtualenv
>
> This is part of PerformanceTrackingSuitePlan
> https://www.mercurial-scm.org/wiki/PerformanceTrackingSuitePlan
>
> diff --git a/.hgignore b/.hgignore
> --- a/.hgignore
> +++ b/.hgignore
> @@ -49,6 +49,7 @@ mercurial.egg-info
>  tags
>  cscope.*
>  .idea/*
> +.asv/*
>  i18n/hg.pot
>  locale/*/LC_MESSAGES/hg.mo
>  hgext/__index__.py
> diff --git a/contrib/asv.conf.json b/contrib/asv.conf.json
> new file mode 100644
> --- /dev/null
> +++ b/contrib/asv.conf.json
> @@ -0,0 +1,127 @@
> +{
> +    // The version of the config file format.  Do not change, unless
> +    // you know what you are doing.
> +    "version": 1,
>

I'll pretend that a custom JSON parser that recognizes a non-standard
comment syntax doesn't exist :)


> +
> +    // The name of the project being benchmarked
> +    "project": "mercurial",
> +
> +    // The project's homepage
> +    "project_url": "http://mercurial-scm.org/",
>

https:/


> +
> +    // The URL or local path of the source code repository for the
> +    // project being benchmarked
> +    "repo": "..",
> +
> +    // List of branches to benchmark. If not provided, defaults to
> "master"
> +    // (for git) or "default" (for mercurial).
> +    // "branches": ["master"], // for git
> +    // "branches": ["default"],    // for mercurial
> +    "branches": ["default", "stable"],
> +
> +    // The DVCS being used.  If not set, it will be automatically
> +    // determined from "repo" by looking at the protocol in the URL
> +    // (if remote), or by looking for special directories, such as
> +    // ".git" (if local).
> +    // "dvcs": "git",
>

This block can likely be deleted.


> +
> +    // The tool to use to create environments.  May be "conda",
> +    // "virtualenv" or other value depending on the plugins in use.
> +    // If missing or the empty string, the tool will be automatically
> +    // determined by looking for tools on the PATH environment
> +    // variable.
> +    "environment_type": "virtualenv",
> +
> +    // the base URL to show a commit for the project.
> +    "show_commit_url": "https://www.mercurial-scm.org/repo/hg/rev/",
> +
> +    // The Pythons you'd like to test against.  If not provided, defaults
> +    // to the current version of Python used to run `asv`.
> +    // "pythons": ["2.7", "3.3"],
> +
> +    // The matrix of dependencies to test.  Each key is the name of a
> +    // package (in PyPI) and the values are version numbers.  An empty
> +    // list or empty string indicates to just test against the default
> +    // (latest) version. null indicates that the package is to not be
> +    // installed. If the package to be tested is only available from
> +    // PyPi, and the 'environment_type' is conda, then you can preface
> +    // the package name by 'pip+', and the package will be installed via
> +    // pip (with all the conda available packages installed first,
> +    // followed by the pip installed packages).
> +    //
> +    // "matrix": {
> +    //     "numpy": ["1.6", "1.7"],
> +    //     "six": ["", null],        // test with and without six
> installed
> +    //     "pip+emcee": [""],   // emcee is only available for install
> with pip.
> +    // },
> +
> +    // Combinations of libraries/python versions can be excluded/included
> +    // from the set to test. Each entry is a dictionary containing
> additional
> +    // key-value pairs to include/exclude.
> +    //
> +    // An exclude entry excludes entries where all values match. The
> +    // values are regexps that should match the whole string.
> +    //
> +    // An include entry adds an environment. Only the packages listed
> +    // are installed. The 'python' key is required. The exclude rules
> +    // do not apply to includes.
> +    //
> +    // In addition to package names, the following keys are available:
> +    //
> +    // - python
> +    //     Python version, as in the *pythons* variable above.
> +    // - environment_type
> +    //     Environment type, as above.
> +    // - sys_platform
> +    //     Platform, as in sys.platform. Possible values for the common
> +    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
> +    //
> +    // "exclude": [
> +    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on
> windows
> +    //     {"environment_type": "conda", "six": null}, // don't run
> without six on conda
> +    // ],
> +    //
> +    // "include": [
> +    //     // additional env for python2.7
> +    //     {"python": "2.7", "numpy": "1.8"},
> +    //     // additional env if run on windows+conda
> +    //     {"platform": "win32", "environment_type": "conda", "python":
> "2.7", "libpython": ""},
> +    // ],
>

That's a lot of dead code. Can we just remove it?


> +
> +    // The directory (relative to the current directory) that benchmarks
> are
> +    // stored in.  If not provided, defaults to "benchmarks"
> +    "benchmark_dir": "benchmarks",
> +
> +    // The directory (relative to the current directory) to cache the
> Python
> +    // environments in.  If not provided, defaults to "env"
> +    "env_dir": "../.asv/env",
> +
> +    // The directory (relative to the current directory) that raw
> benchmark
> +    // results are stored in.  If not provided, defaults to "results".
> +    "results_dir": "../.asv/results",
> +
> +    // The directory (relative to the current directory) that the html
> tree
> +    // should be written to.  If not provided, defaults to "html".
> +    "html_dir": "../.asv/html",
> +
> +    // The number of characters to retain in the commit hashes.
> +    // "hash_length": 8,
> +
> +    // `asv` will cache wheels of the recent builds in each
> +    // environment, making them faster to install next time.  This is
> +    // number of builds to keep, per environment.
> +    // "wheel_cache_size": 0
> +
> +    // The commits after which the regression search in `asv publish`
> +    // should start looking for regressions. Dictionary whose keys are
> +    // regexps matching to benchmark names, and values corresponding to
> +    // the commit (exclusive) after which to start looking for
> +    // regressions.  The default is to start from the first commit
> +    // with results. If the commit is `null`, regression detection is
> +    // skipped for the matching benchmark.
> +    //
> +    // "regressions_first_commits": {
> +    //    "some_benchmark": "352cdf",  // Consider regressions only after
> this commit
> +    //    "another_benchmark": null,   // Skip regression detection
> altogether
> +    // }
>

More dead code.


> +}
> diff --git a/contrib/benchmarks/__init__.py b/contrib/benchmarks/__init__.
> py
> new file mode 100644
> --- /dev/null
> +++ b/contrib/benchmarks/__init__.py
> @@ -0,0 +1,95 @@
> +"""ASV (https://asv.readthedocs.io) benchmark suite
>
> This file wants a GPL license header.


> +Benchmark are parameterized against reference repositories found in the
> +directory pointed by the REPOS_DIR environment variable.
> +
> +Invocation example:
> +
> +    $ export REPOS_DIR=~/hgperf/repos
> +    # run suite on given revision
> +    $ asv --config contrib/asv.conf.json run REV
> +    # run suite on new changesets found in stable and default branch
> +    $ asv --config contrib/asv.conf.json run NEW
> +    # display a comparative result table of benchmark results between two
> given
> +    # revisions
> +    $ asv --config contrib/asv.conf.json compare REV1 REV2
> +    # compute regression detection and generate ASV static website
> +    $ asv --config contrib/asv.conf.json publish
> +    # serve the static website
> +    $ asv --config contrib/asv.conf.json preview
> +"""
> +
> +from __future__ import absolute_import
> +
> +import functools
> +import os
> +import re
> +
> +from mercurial import (
> +    extensions,
> +    hg,
> +    ui as uimod,
> +)
> +
> +basedir = os.path.abspath(os.path.join(os.path.dirname(__file__),
> +                          os.path.pardir, os.path.pardir))
> +reposdir = os.environ['REPOS_DIR']
> +reposnames = [name for name in os.listdir(reposdir)
> +              if os.path.isdir(os.path.join(reposdir, name, ".hg"))]
> +if not reposnames:
> +    raise ValueError("No repositories found in $REPO_DIR")
> +outputre = re.compile((r'! wall (\d+.\d+) comb \d+.\d+ user \d+.\d+ sys '
> +                       r'\d+.\d+ \(best of \d+\)'))
> +
> +def runperfcommand(reponame, command, *args, **kwargs):
> +    os.environ["HGRCPATH"] = os.environ.get("ASVHGRCPATH", "")
> +    ui = uimod.ui()
> +    repo = hg.repository(ui, os.path.join(reposdir, reponame))
> +    perfext = extensions.load(ui, 'perfext',
> +                              os.path.join(basedir, 'contrib', 'perf.py'))
> +    cmd = getattr(perfext, command)
> +    ui.pushbuffer()
> +    cmd(ui, repo, *args, **kwargs)
> +    output = ui.popbuffer()
> +    match = outputre.search(output)
> +    if not match:
> +        raise ValueError("Invalid output {0}".format(output))
> +    return float(match.group(1))
> +
> +def perfbench(repos=reposnames, name=None, params=None):
> +    """decorator to declare ASV benchmark based on contrib/perf.py
> extension
> +
> +    An ASV benchmark is a python function with the given attributes:
> +
> +    __name__: should start with track_, time_ or mem_ to be collected by
> ASV
> +    params and param_name: parameter matrix to display multiple graphs on
> the
> +    same page.
> +    pretty_name: If defined it's displayed in web-ui instead of __name__
> +    (useful for revsets)
> +    the module name is prepended to the benchmark name and displayed as
> +    "category" in webui.
> +
> +    Benchmarks are automatically parameterized with repositories found in
> the
> +    REPOS_DIR environment variable.
> +
> +    `params` is the param matrix in the form of a list of tuple
> +    (param_name, [value0, value1])
> +
> +    For example [(x, [a, b]), (y, [c, d])] declare benchmarks for
> +    (a, c), (a, d), (b, c) and (b, d).
> +    """
> +    params = list(params or [])
> +    params.insert(0, ("repo", repos))
> +
> +    def decorator(func):
> +        @functools.wraps(func)
> +        def wrapped(repo, *args):
> +            def perf(command, *a, **kw):
> +                return runperfcommand(repo, command, *a, **kw)
> +            return func(perf, *args)
> +
> +        wrapped.params = [p[1] for p in params]
> +        wrapped.param_names = [p[0] for p in params]
> +        wrapped.pretty_name = name
> +        return wrapped
> +    return decorator
> diff --git a/contrib/benchmarks/perf.py b/contrib/benchmarks/perf.py
> new file mode 100644
> --- /dev/null
> +++ b/contrib/benchmarks/perf.py
> @@ -0,0 +1,20 @@
> +"""ASV benchmarks using contrib/perf.py extension"""
> +from __future__ import absolute_import
> +
> +from . import perfbench
> +
> + at perfbench()
> +def track_tags(perf):
> +    return perf("perftags")
> +
> + at perfbench()
> +def track_status(perf):
> +    return perf("perfstatus", unknown=False)
> +
> + at perfbench(params=[('rev', ['1000', '10000', 'tip'])])
> +def track_manifest(perf, rev):
> +    return perf("perfmanifest", rev)
> +
> + at perfbench()
> +def track_heads(perf):
> +    return perf("perfheads")
> diff --git a/contrib/benchmarks/revset.py b/contrib/benchmarks/revset.py
> new file mode 100644
> --- /dev/null
> +++ b/contrib/benchmarks/revset.py
> @@ -0,0 +1,45 @@
> +"""ASV revset benchmarks generated from contrib/base-revsets.txt
> +
> +Each revset benchmark is parameterized with variants (first, last, sort,
> ...)
> +"""
> +from __future__ import absolute_import
> +
> +import os
> +import string
> +import sys
> +
> +from . import basedir, perfbench
> +
> +def createrevsetbenchmark(baseset, variants=None):
> +    if variants is None:
> +        # Default variants
> +        variants = ["plain", "first", "last", "sort", "sort+first",
> +                    "sort+last"]
> +    fname = "track_" + "_".join("".join([
> +        c if c in string.digits + string.letters else " "
> +        for c in baseset
> +    ]).split())
> +
> +    def wrap(fname, baseset):
> +        @perfbench(name=baseset, params=[("variant", variants)])
> +        def f(perf, variant):
> +            revset = baseset
> +            if variant != "plain":
> +                for var in variant.split("+"):
> +                    revset = "%s(%s)" % (var, revset)
> +            return perf("perfrevset", revset)
> +        f.__name__ = fname
> +        return f
> +    return wrap(fname, baseset)
> +
> +def initializerevsetbenchmarks():
> +    mod = sys.modules[__name__]
> +    with open(os.path.join(basedir, 'contrib', 'base-revsets.txt'),
> +              'rb') as fh:
> +        for line in fh:
> +            baseset = line.strip()
> +            if baseset and not baseset.startswith('#'):
> +                func = createrevsetbenchmark(baseset)
> +                setattr(mod, func.__name__, func)
> +
> +initializerevsetbenchmarks()
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.mercurial-scm.org/pipermail/mercurial-devel/attachments/20161115/8fe7d97e/attachment.html>


More information about the Mercurial-devel mailing list