Source code for scorio.eval.avg

r"""Average accuracy metric with Bayesian uncertainty calibration.

Let :math:`R \in \{0,\ldots,C\}^{M \times N}` be outcomes and
:math:`w \in \mathbb{R}^{C+1}` be optional category weights. The weighted
average maps each entry :math:`R_{\alpha i}` to :math:`w_{R_{\alpha i}}` and
averages across questions and trials.

"""

import numpy as np

from .bayes import bayes
from .utils import (
    _as_2d_int_matrix,
    _validate_binary,
    _validate_matrix_range,
    normal_credible_interval,
)


def _avg(
    R: np.ndarray,
    w: np.ndarray | None = None,
) -> float:
    r"""
    Simple (optionally weighted) average of all entries in the result matrix.

    When **w** is omitted, *R* must be binary and the function returns the
    arithmetic mean of the entries.  When **w** is supplied, each entry
    :math:`R_{\alpha i}` is mapped through the weight vector before averaging.

    Args:
        R: :math:`M \times N` result matrix with entries in
           :math:`\{0, \ldots, C\}`.
           Row :math:`\alpha` contains the *N* outcomes for question
           :math:`\alpha`.
        w: optional length :math:`(C+1)` weight vector
           :math:`(w_0, \ldots, w_C)` that maps category *k* to score
           :math:`w_k`.  If *None*, *R* must be binary and
           :math:`w = (0, 1)` is used.

    Returns:
        float: The (weighted) arithmetic mean of the mapped entries.

    Notation:
        :math:`R_{\alpha i}` is the outcome for question :math:`\alpha`
        on trial :math:`i`.

    Formula:
        .. math::

            \text{avg} = \frac{1}{M \cdot N}
                \sum_{\alpha=1}^{M} \sum_{i=1}^{N} w_{R_{\alpha i}}

        When :math:`w = (0, 1)` this reduces to the plain binary average.

    Examples:
        Binary (no weights):

        >>> import numpy as np
        >>> R = np.array([[0, 1, 1, 0, 1],
        ...               [1, 1, 0, 1, 1]])
        >>> round(_avg(R), 6)
        0.7

        Weighted:

        >>> R = np.array([[0, 1, 2, 2, 1],
        ...               [1, 1, 0, 2, 2]])
        >>> w = np.array([0.0, 0.5, 1.0])
        >>> round(_avg(R, w), 6)
        0.6
    """
    Rm = _as_2d_int_matrix(R)
    if w is None:
        _validate_binary(Rm)
        return float(np.mean(Rm))
    wv = np.asarray(w, dtype=float)
    C = wv.size - 1
    _validate_matrix_range(Rm, 0, C, "R")
    return float(np.mean(wv[Rm]))


[docs] def avg( R: np.ndarray, w: np.ndarray | None = None, ) -> tuple[float, float]: r""" Avg@N plus a Bayesian uncertainty estimate (uniform prior, no R0). Under a uniform Dirichlet prior (:math:`D = 0`), the Bayesian posterior mean :math:`\mu` is an affine transform of the naive (weighted) average *a*, and the standard deviations are related by Eq. 20 of the Bayes@N paper: .. math:: \sigma_{\text{avg}} = \frac{T}{N}\,\sigma_{\text{Bayes}} This lets you report the familiar **avg@N** while using the Bayesian framework of ``scorio`` to compute uncertainty on the same scale, without relying on CLT/Wald intervals or bootstrap resampling. Args: R: :math:`M \times N` int matrix with entries in :math:`\{0, \ldots, C\}`. Row :math:`\alpha` contains the *N* outcomes for question :math:`\alpha`. w: optional length :math:`(C+1)` weight vector :math:`(w_0, \ldots, w_C)`. If *None*, *R* must be binary and :math:`w = (0, 1)` is used. Returns: tuple[float, float]: :math:`(a,\; \sigma_a)` where *a* is the (weighted) average and :math:`\sigma_a` is the Bayesian uncertainty rescaled to the avg@N scale. Formula: Let :math:`T = 1 + C + N` (uniform prior, :math:`D = 0`). .. math:: a &= \text{avg}(R, w) \sigma_a &= \frac{T}{N}\,\sigma_{\text{Bayes}}(R, w) Examples: Binary (no weights): >>> import numpy as np >>> R = np.array([[0, 1, 1, 0, 1], ... [1, 1, 0, 1, 1]]) >>> a, sigma = avg(R) >>> round(a, 6), round(sigma, 6) (0.7, 0.165831) Weighted: >>> R = np.array([[0, 1, 2, 2, 1], ... [1, 1, 0, 2, 2]]) >>> w = np.array([0.0, 0.5, 1.0]) >>> a, sigma = avg(R, w) >>> round(a, 6), round(sigma, 6) (0.6, 0.147196) """ Rm = _as_2d_int_matrix(R) if w is None: _validate_binary(Rm) wv = np.array([0.0, 1.0], dtype=float) else: wv = np.asarray(w, dtype=float) _, N = Rm.shape C = wv.size - 1 if N <= 0: raise ValueError("R must have at least one column (N>=1)") # Bayesian σ under uniform prior (D=0) _, sigma_bayes = bayes(Rm, wv, R0=None) T = 1 + C + N # D=0 sigma_avg = (T / N) * sigma_bayes return _avg(Rm, wv), float(sigma_avg)
[docs] def avg_ci( R: np.ndarray, w: np.ndarray | None = None, confidence: float = 0.95, bounds: tuple[float, float] | None = None, ) -> tuple[float, float, float, float]: r""" Avg@N with Bayesian :math:`\sigma` and a normal-approximation credible interval (CrI). Combines :func:`avg` with a symmetric normal credible interval clipped to optional ``bounds``. Args: R: :math:`M \times N` int matrix with entries in :math:`\{0, \ldots, C\}`. Row :math:`\alpha` contains the *N* outcomes for question :math:`\alpha`. w: optional length :math:`(C+1)` weight vector :math:`(w_0, \ldots, w_C)`. If *None*, *R* must be binary and :math:`w = (0, 1)` is used. confidence: credibility level of the interval (default 0.95). bounds: optional ``(lo, hi)`` clipping bounds for the interval. Returns: tuple[float, float, float, float]: :math:`(a,\; \sigma_a,\; \text{lo},\; \text{hi})` Formula: .. math:: \text{lo},\; \text{hi} = a \pm z_{(1+\gamma)/2}\,\sigma_a where :math:`\gamma` is the requested ``confidence`` level and the interval is clipped to ``bounds`` when provided. Examples: Binary (no weights): >>> import numpy as np >>> R = np.array([[0, 1, 1, 0, 1], ... [1, 1, 0, 1, 1]]) >>> a, sigma, lo, hi = avg_ci(R, bounds=(0.0, 1.0)) >>> round(a, 4), round(sigma, 4), round(lo, 4), round(hi, 4) (0.7, 0.1658, 0.375, 1.0) Weighted: >>> R = np.array([[0, 1, 2, 2, 1], ... [1, 1, 0, 2, 2]]) >>> w = np.array([0.0, 0.5, 1.0]) >>> a, sigma, lo, hi = avg_ci(R, w, confidence=0.95) >>> round(a, 4), round(sigma, 4), round(lo, 4), round(hi, 4) (0.6, 0.1472, 0.3115, 0.8885) """ a, sigma = avg(R, w) lo, hi = normal_credible_interval( a, sigma, credibility=confidence, two_sided=True, bounds=bounds ) return float(a), float(sigma), float(lo), float(hi)
__all__ = [ "avg", "avg_ci", ]