# Licensed under a 3-clause BSD style license - see LICENSE.rst
import math
from .. import util
from ..benchmarks import Benchmarks
from ..console import log
from ..machine import Machine
from ..repo import get_repo
from ..results import Results
from ..runner import run_benchmarks
from . import Command, common_args
from .setup import Setup
[docs]
def draw_graph(lo, mid, hi, total):
nchars = 60
scale = float(nchars) / total
graph = ['-'] * nchars
graph[int(lo * scale)] = '<'
graph[int(hi * scale)] = '>'
graph[int(mid * scale)] = 'O'
return ''.join(graph)
[docs]
class Find(Command):
@classmethod
[docs]
def setup_arguments(cls, subparsers):
parser = subparsers.add_parser(
"find",
help="Find commits that introduced large regressions",
description="""Adaptively searches a range of commits for
one that produces a large regression. This only works well
when the regression in the range is mostly monotonic.""",
)
parser.add_argument(
'range',
type=str,
metavar=('from..to',),
help="""Range of commits to search. For a git
repository, this is passed as the first argument to ``git
log``. See 'specifying ranges' section of the
`gitrevisions` manpage for more info.""",
)
parser.add_argument(
"bench",
type=str,
metavar=('benchmark_name',),
help="""Name of benchmark to use in search.""",
)
parser.add_argument(
"--invert",
"-i",
action="store_true",
help="""Search for a decrease in the benchmark value,
rather than an increase.""",
)
parser.add_argument(
"--skip-save",
action="store_true",
help="""Do not save intermediate results from the search""",
)
common_args.add_parallel(parser)
common_args.add_show_stderr(parser)
common_args.add_machine(parser)
common_args.add_environment(parser)
common_args.add_launch_method(parser)
parser.set_defaults(func=cls.run_from_args)
return parser
@classmethod
[docs]
def run_from_conf_args(cls, conf, args, **kwargs):
return cls.run(
conf,
args.range,
args.bench,
invert=args.invert,
show_stderr=args.show_stderr,
parallel=args.parallel,
machine=args.machine,
env_spec=args.env_spec,
launch_method=args.launch_method,
skip_save=args.skip_save,
**kwargs,
)
@classmethod
[docs]
def run(
cls,
conf,
range_spec,
bench,
invert=False,
show_stderr=False,
parallel=1,
machine=None,
env_spec=None,
_machine_file=None,
launch_method=None,
skip_save=False,
):
params = {}
machine_params = Machine.load(machine_name=machine, _path=_machine_file, interactive=True)
params.update(machine_params.__dict__)
machine_params.save(conf.results_dir)
repo = get_repo(conf)
repo.pull()
commit_hashes = repo.get_hashes_from_range(range_spec)[::-1]
if len(commit_hashes) == 0:
log.error("No commit hashes selected")
return 1
environments = Setup.run(conf=conf, env_spec=env_spec, parallel=parallel)
if len(environments) == 0:
log.error("No environments selected")
return 1
benchmarks = Benchmarks.discover(conf, repo, environments, commit_hashes, regex=bench)
if len(benchmarks) == 0:
log.error(f"'{bench}' benchmark not found")
return 1
elif len(benchmarks) > 1:
exact_matches = benchmarks.filter_out([x for x in benchmarks if x != bench])
if len(exact_matches) == 1:
log.warning(f"'{bench}' matches more than one benchmark, using exact match")
benchmarks = exact_matches
else:
log.error(f"'{bench}' matches more than one benchmark")
return 1
(benchmark_name,) = benchmarks.keys()
benchmark_type = benchmarks[benchmark_name]["type"]
steps = int(math.log(len(commit_hashes)) / math.log(2))
log.info(f"Running approximately {steps} benchmarks within {len(commit_hashes)} commits")
env = environments[0]
results = [None] * len(commit_hashes)
def do_benchmark(i):
if results[i] is not None:
return results[i]
commit_hash = commit_hashes[i]
commit_name = repo.get_decorated_hash(commit_hash, 8)
log.info(f"For {conf.project} commit {commit_name}:")
env.install_project(conf, repo, commit_hash)
params['python'] = env.python
params.update(env.requirements)
result = Results(
params,
env.requirements,
commit_hash,
repo.get_date(commit_hash),
env.python,
env.name,
env.env_vars,
)
if not skip_save:
result.load_data(conf.results_dir)
res = run_benchmarks(
benchmarks,
env,
results=result,
show_stderr=show_stderr,
launch_method=launch_method,
)
if not skip_save:
res.save(conf.results_dir)
result = res.get_result_value(benchmark_name, benchmarks[benchmark_name]['params'])
results[i] = result
# If we failed due to timeout in a timing benchmark, set
# runtime as the timeout to prevent falling back to linear
# search
errcode = res.errcode[benchmark_name]
if errcode == util.TIMEOUT_RETCODE and benchmark_type == "time":
timeout_limit = benchmarks[benchmark_name]['timeout']
results[i] = [r if r is not None else timeout_limit for r in results[i]]
return results[i]
def non_null_results(*results):
"""
Whether some value is non-null in all result sets
"""
for values in zip(*results):
if all(x is not None for x in values):
return True
return False
def difference_3way(a, b, c):
"""
Return largest regression (a-b, b-c).
"""
results_ab = [0]
results_bc = [0]
for va, vb, vc in zip(a, b, c):
if va is not None and vb is not None and vc is not None:
denom = abs(va) + abs(vb) + abs(vc)
if denom == 0:
denom = 1.0
if invert:
denom *= -1.0
results_ab.append((va - vb) / denom)
results_bc.append((vb - vc) / denom)
return max(results_ab), max(results_bc)
def do_search(lo, hi):
if hi - lo <= 1:
return hi
mid = int(math.floor((hi - lo) / 2) + lo)
log.info(f"Testing {draw_graph(lo, mid, hi, len(commit_hashes))}")
with log.indent():
lo_result = None
while lo_result is None:
lo_result = do_benchmark(lo)
if not non_null_results(lo_result):
lo_result = None
lo += 1
if lo >= mid:
raise util.UserError("Too many commits failed")
mid_result = None
while mid_result is None:
mid_result = do_benchmark(mid)
if not non_null_results(mid_result, lo_result):
mid_result = None
mid += 1
if mid >= hi:
raise util.UserError("Too many commits failed")
hi_result = None
while hi_result is None:
hi_result = do_benchmark(hi)
if not non_null_results(lo_result, mid_result, hi_result):
hi_result = None
hi -= 1
if hi <= mid:
raise util.UserError("Too many commits failed")
diff_b, diff_a = difference_3way(hi_result, mid_result, lo_result)
if diff_a >= diff_b:
return do_search(lo, mid)
else:
return do_search(mid, hi)
result = do_search(0, len(commit_hashes) - 1)
commit_name = repo.get_decorated_hash(commit_hashes[result], 8)
if invert:
direction = "improvement"
else:
direction = "regression"
log.info(f"Greatest {direction} found: {commit_name}")
return 0