Source code for asv._stats

# Licensed under a 3-clause BSD style license - see LICENSE.rst

# Author: Pauli Virtanen, 2016

import math
from operator import index


[docs] def get_weight(stats): """ Return a data point weight for the result. """ if stats is None or 'ci_99_a' not in stats or 'ci_99_b' not in stats: return None try: a = stats['ci_99_a'] b = stats['ci_99_b'] if math.isinf(a) or math.isinf(b): # Infinite interval is due to too few samples --- consider # weight as missing return None return 2 / abs(b - a) except ZeroDivisionError: return None
[docs] def is_different(samples_a, samples_b, stats_a, stats_b, p_threshold=0.002): """Check whether the samples are statistically different. If sample data is not provided, or the sample is too small, falls back to a pessimistic CI-based check. If it returns True, then the difference is statistically significant. If it returns False, it might or might not be statistically significant. Parameters ---------- samples_a, samples_b Input samples stats_a, stats_b Input stats data """ if samples_a is not None and samples_b is not None: # Raw data present: Mann-Whitney U test, but only if there's # enough data so that the test can return True a = [x for x in samples_a if not math.isnan(x)] b = [x for x in samples_b if not math.isnan(x)] p_min = 1 / binom(len(a) + len(b), min(len(a), len(b))) if p_min < p_threshold: _, p = mann_whitney_u(a, b) return p < p_threshold # If confidence intervals overlap, reject. # Corresponds to a test with ill-specified threshold p-value, # which generally can be significantly smaller than p <= 0.01 # depending on the actual data. For normal test (known variance), # 0.00027 <= p <= 0.01. ci_a = (stats_a['ci_99_a'], stats_a['ci_99_b']) ci_b = (stats_b['ci_99_a'], stats_b['ci_99_b']) if ci_a[1] >= ci_b[0] and ci_a[0] <= ci_b[1]: return False return True
[docs] _mann_whitney_u_memo = {}
[docs] def mann_whitney_u(x, y, method='auto'): """ Mann-Whitney U test Ties are handled conservatively, returning the least significant tie breaking. :cite:empty:`mwu-mannTestWhetherOne1947,mwu-gibbonsNonparametricStatisticalInference2010`. Parameters ---------- x, y : list of float Samples to test method : {'auto', 'exact', 'normal'} Whether to compute p-value exactly of via normal approximation. The option 'auto' switches to approximation for sample size > 20. Returns ------- u : int U-statistic p : float p-value for two-sided alternative References ---------- .. bibliography:: :filter: docname in docnames :labelprefix: MWU_ :keyprefix: mwu- """ memo = _mann_whitney_u_memo if len(memo) > 100000: memo.clear() m = len(x) n = len(y) if method == 'auto': if max(m, n) > 20: method = 'normal' else: method = 'exact' u, ties = mann_whitney_u_u(x, y) # Conservative tie breaking if u <= m * n // 2 and u + ties >= m * n // 2: ties = m * n // 2 - u ux1 = min(u, m * n - u) ux2 = min(u + ties, m * n - (u + ties)) if ux1 >= ux2: ux = ux1 else: u = u + ties ux = ux2 # Get p-value if method == 'exact': p1 = mann_whitney_u_cdf(m, n, ux, memo) p2 = 1.0 - mann_whitney_u_cdf(m, n, max(m * n // 2, m * n - ux - 1), memo) p = p1 + p2 elif method == 'normal': N = m + n var = m * n * (N + 1) / 12 z = (ux - m * n / 2) / math.sqrt(var) cdf = 0.5 * math.erfc(-z / math.sqrt(2)) p = 2 * cdf else: raise ValueError(f"Unknown method {method!r}") return u, p
[docs] def mann_whitney_u_u(x, y): u = 0 ties = 0 for xx in x: for yy in y: if xx > yy: u += 1 elif xx == yy: ties += 1 return u, ties
[docs] def mann_whitney_u_cdf(m, n, u, memo=None): if memo is None: memo = {} cdf = 0 for uu in range(u + 1): cdf += mann_whitney_u_pmf(m, n, uu, memo) return cdf
[docs] def mann_whitney_u_pmf(m, n, u, memo=None): if memo is None: memo = {} return mann_whitney_u_r(m, n, u, memo) / binom(m + n, m)
[docs] def mann_whitney_u_r(m, n, u, memo=None): """ Number of orderings in Mann-Whitney U test. The PMF of U for samples of sizes (m, n) is given by :cite:empty:`mwur-mannTestWhetherOne1947` .. code-block:: p(u) = r(m, n, u) / binom(m + n, m). References ---------- .. bibliography:: :filter: docname in docnames :labelprefix: MWUR_ :keyprefix: mwur- """ if u < 0: value = 0 elif m == 0 or n == 0: value = 1 if u == 0 else 0 else: # Don't bother figuring out table construction, memoization # sorts it out if memo is None: memo = {} key = (m, n, u) value = memo.get(key) if value is not None: return value value = mann_whitney_u_r(m, n - 1, u, memo) + mann_whitney_u_r(m - 1, n, u - n, memo) memo[key] = value return value
[docs] def binom(n, k): """ Binomial coefficient (n over k) """ n = index(n) k = index(k) if not 0 <= k <= n: return 0 m = n + 1 num = 1 den = 1 for j in range(1, min(k, n - k) + 1): num *= m - j den *= j return num // den