Source code for asv.commands.find

# Licensed under a 3-clause BSD style license - see LICENSE.rst

import math

from .. import util
from ..benchmarks import Benchmarks
from ..console import log
from ..machine import Machine
from ..repo import get_repo
from ..results import Results
from ..runner import run_benchmarks
from . import Command, common_args
from .setup import Setup



[docs]
def draw_graph(lo, mid, hi, total):
    nchars = 60
    scale = float(nchars) / total
    graph = ['-'] * nchars
    graph[int(lo * scale)] = '<'
    graph[int(hi * scale)] = '>'
    graph[int(mid * scale)] = 'O'
    return ''.join(graph)




[docs]
class Find(Command):
    @classmethod

[docs]
    def setup_arguments(cls, subparsers):
        parser = subparsers.add_parser(
            "find",
            help="Find commits that introduced large regressions",
            description="""Adaptively searches a range of commits for
            one that produces a large regression.  This only works well
            when the regression in the range is mostly monotonic.""",
        )

        parser.add_argument(
            'range',
            type=str,
            metavar=('from..to',),
            help="""Range of commits to search.  For a git
            repository, this is passed as the first argument to ``git
            log``.  See 'specifying ranges' section of the
            `gitrevisions` manpage for more info.""",
        )
        parser.add_argument(
            "bench",
            type=str,
            metavar=('benchmark_name',),
            help="""Name of benchmark to use in search.""",
        )
        parser.add_argument(
            "--invert",
            "-i",
            action="store_true",
            help="""Search for a decrease in the benchmark value,
            rather than an increase.""",
        )
        parser.add_argument(
            "--skip-save",
            action="store_true",
            help="""Do not save intermediate results from the search""",
        )
        common_args.add_parallel(parser)
        common_args.add_show_stderr(parser)
        common_args.add_machine(parser)
        common_args.add_environment(parser)
        common_args.add_launch_method(parser)

        parser.set_defaults(func=cls.run_from_args)

        return parser


    @classmethod

[docs]
    def run_from_conf_args(cls, conf, args, **kwargs):
        return cls.run(
            conf,
            args.range,
            args.bench,
            invert=args.invert,
            show_stderr=args.show_stderr,
            parallel=args.parallel,
            machine=args.machine,
            env_spec=args.env_spec,
            launch_method=args.launch_method,
            skip_save=args.skip_save,
            **kwargs,
        )


    @classmethod

[docs]
    def run(
        cls,
        conf,
        range_spec,
        bench,
        invert=False,
        show_stderr=False,
        parallel=1,
        machine=None,
        env_spec=None,
        _machine_file=None,
        launch_method=None,
        skip_save=False,
    ):
        params = {}
        machine_params = Machine.load(machine_name=machine, _path=_machine_file, interactive=True)
        params.update(machine_params.__dict__)
        machine_params.save(conf.results_dir)

        repo = get_repo(conf)
        repo.pull()

        commit_hashes = repo.get_hashes_from_range(range_spec)[::-1]

        if len(commit_hashes) == 0:
            log.error("No commit hashes selected")
            return 1

        environments = Setup.run(conf=conf, env_spec=env_spec, parallel=parallel)
        if len(environments) == 0:
            log.error("No environments selected")
            return 1

        benchmarks = Benchmarks.discover(conf, repo, environments, commit_hashes, regex=bench)
        if len(benchmarks) == 0:
            log.error(f"'{bench}' benchmark not found")
            return 1
        elif len(benchmarks) > 1:
            exact_matches = benchmarks.filter_out([x for x in benchmarks if x != bench])
            if len(exact_matches) == 1:
                log.warning(f"'{bench}' matches more than one benchmark, using exact match")
                benchmarks = exact_matches
            else:
                log.error(f"'{bench}' matches more than one benchmark")
                return 1

        (benchmark_name,) = benchmarks.keys()
        benchmark_type = benchmarks[benchmark_name]["type"]

        steps = int(math.log(len(commit_hashes)) / math.log(2))

        log.info(f"Running approximately {steps} benchmarks within {len(commit_hashes)} commits")

        env = environments[0]

        results = [None] * len(commit_hashes)

        def do_benchmark(i):
            if results[i] is not None:
                return results[i]

            commit_hash = commit_hashes[i]

            commit_name = repo.get_decorated_hash(commit_hash, 8)
            log.info(f"For {conf.project} commit {commit_name}:")

            env.install_project(conf, repo, commit_hash)
            params['python'] = env.python
            params.update(env.requirements)

            result = Results(
                params,
                env.requirements,
                commit_hash,
                repo.get_date(commit_hash),
                env.python,
                env.name,
                env.env_vars,
            )

            if not skip_save:
                result.load_data(conf.results_dir)

            res = run_benchmarks(
                benchmarks,
                env,
                results=result,
                show_stderr=show_stderr,
                launch_method=launch_method,
            )

            if not skip_save:
                res.save(conf.results_dir)

            result = res.get_result_value(benchmark_name, benchmarks[benchmark_name]['params'])

            results[i] = result

            # If we failed due to timeout in a timing benchmark, set
            # runtime as the timeout to prevent falling back to linear
            # search
            errcode = res.errcode[benchmark_name]
            if errcode == util.TIMEOUT_RETCODE and benchmark_type == "time":
                timeout_limit = benchmarks[benchmark_name]['timeout']
                results[i] = [r if r is not None else timeout_limit for r in results[i]]

            return results[i]

        def non_null_results(*results):
            """
            Whether some value is non-null in all result sets
            """
            for values in zip(*results):
                if all(x is not None for x in values):
                    return True
            return False

        def difference_3way(a, b, c):
            """
            Return largest regression (a-b, b-c).
            """
            results_ab = [0]
            results_bc = [0]
            for va, vb, vc in zip(a, b, c):
                if va is not None and vb is not None and vc is not None:
                    denom = abs(va) + abs(vb) + abs(vc)
                    if denom == 0:
                        denom = 1.0
                    if invert:
                        denom *= -1.0

                    results_ab.append((va - vb) / denom)
                    results_bc.append((vb - vc) / denom)
            return max(results_ab), max(results_bc)

        def do_search(lo, hi):
            if hi - lo <= 1:
                return hi

            mid = int(math.floor((hi - lo) / 2) + lo)

            log.info(f"Testing {draw_graph(lo, mid, hi, len(commit_hashes))}")

            with log.indent():
                lo_result = None
                while lo_result is None:
                    lo_result = do_benchmark(lo)
                    if not non_null_results(lo_result):
                        lo_result = None
                        lo += 1
                        if lo >= mid:
                            raise util.UserError("Too many commits failed")

                mid_result = None
                while mid_result is None:
                    mid_result = do_benchmark(mid)
                    if not non_null_results(mid_result, lo_result):
                        mid_result = None
                        mid += 1
                        if mid >= hi:
                            raise util.UserError("Too many commits failed")

                hi_result = None
                while hi_result is None:
                    hi_result = do_benchmark(hi)
                    if not non_null_results(lo_result, mid_result, hi_result):
                        hi_result = None
                        hi -= 1
                        if hi <= mid:
                            raise util.UserError("Too many commits failed")

            diff_b, diff_a = difference_3way(hi_result, mid_result, lo_result)

            if diff_a >= diff_b:
                return do_search(lo, mid)
            else:
                return do_search(mid, hi)

        result = do_search(0, len(commit_hashes) - 1)

        commit_name = repo.get_decorated_hash(commit_hashes[result], 8)

        if invert:
            direction = "improvement"
        else:
            direction = "regression"

        log.info(f"Greatest {direction} found: {commit_name}")

        return 0