Source code for scorio.eval.maj

r"""Majority-vote metrics and uncertainty estimators for binary outcomes.

This module evaluates whether a sampled subset contains a strict majority of
correct traces. It is a binary-outcome wrapper around the generalized
threshold-pass family.

Methods
-------
- ``maj_at_k``: probability that more than half of the ``k`` selected traces
  are correct.

Each metric has a companion ``*_ci`` function that returns
``(mu, sigma, lo, hi)`` under the Bayesian uncertainty model used here.
"""

import numpy as np

from .gpass import g_pass_at_k_tau, g_pass_at_k_tau_ci
from .utils import _as_2d_int_matrix


def _majority_tau(k: int) -> float:
    """Return the threshold τ such that ceil(τ k) is a strict majority."""
    return ((k // 2) + 1) / k



[docs]
def maj_at_k(R: np.ndarray, k: int) -> float:
    r"""
    Maj@k: strict-majority correctness over ``k`` samples.

    This metric measures the probability that a uniformly sampled subset of
    ``k`` observed traces contains strictly more than half correct solutions.
    It is a binary-outcome proxy for the majority-vote metrics often reported
    in reasoning papers when only correctness labels are available.

    Args:
        R: :math:`M \times N` binary outcome matrix.
        k: Number of sampled traces, with ``1 <= k <= N``.

    Returns:
        float: Average strict-majority success probability across prompts.

    Formula:
        Let :math:`\nu_\alpha = \sum_i R_{\alpha i}` and
        :math:`j_0 = \lfloor k/2 \rfloor + 1`. Then

        .. math::

            \mathrm{Maj@k}_\alpha = \sum_{j=j_0}^{k}
            \frac{\binom{\nu_\alpha}{j}\binom{N-\nu_\alpha}{k-j}}
                 {\binom{N}{k}}.

        Equivalently, this is :math:`\mathrm{G\text{-}Pass@k}_{\tau}` with
        :math:`\tau = j_0 / k`.

    Examples:
        >>> import numpy as np
        >>> R = np.array([[0, 1, 1, 0, 1],
        ...               [1, 1, 0, 1, 1]])
        >>> round(maj_at_k(R, 1), 6)
        0.7
        >>> round(maj_at_k(R, 2), 6)
        0.45
        >>> round(maj_at_k(R, 3), 6)
        0.85
    """
    Rm = _as_2d_int_matrix(R)
    _, N = Rm.shape
    if not (1 <= k <= N):
        raise ValueError(f"k must satisfy 1 <= k <= N (N={N}); got k={k}")
    return g_pass_at_k_tau(Rm, k, tau=_majority_tau(k))




[docs]
def maj_at_k_ci(
    R: np.ndarray,
    k: int,
    confidence: float = 0.95,
    bounds: tuple[float, float] = (0.0, 1.0),
    alpha0: float = 1.0,
    beta0: float = 1.0,
) -> tuple[float, float, float, float]:
    r"""
    Bayesian posterior summary for :func:`maj_at_k`.

    This reuses the generalized threshold-pass posterior with the strict
    majority threshold :math:`j_0 = \lfloor k/2 \rfloor + 1`.

    Args:
        R: :math:`M \times N` binary outcome matrix.
        k: Number of sampled traces, with ``1 <= k <= N``.
        confidence: Credibility level for the normal-approximation interval.
        bounds: ``(lo, hi)`` clipping bounds for the interval.
        alpha0: Beta prior parameter :math:`\alpha_0`.
        beta0: Beta prior parameter :math:`\beta_0`.

    Returns:
        tuple[float, float, float, float]:
            :math:`(\mu,\; \sigma,\; \text{lo},\; \text{hi})`

    Formula:
        This is exactly :func:`~scorio.eval.g_pass_at_k_tau_ci` evaluated at

        .. math::

            \tau = \frac{\lfloor k/2 \rfloor + 1}{k}.

    Examples:
        >>> import numpy as np
        >>> R = np.array([[0, 1, 1, 0, 1],
        ...               [1, 1, 0, 1, 1]])
        >>> mu, sigma, lo, hi = maj_at_k_ci(R, 2)
        >>> round(mu, 6), round(sigma, 6), round(lo, 4), round(hi, 4)
        (0.446429, 0.146167, 0.1599, 0.7329)
        >>> mu, sigma, lo, hi = maj_at_k_ci(R, 3)
        >>> round(mu, 6), round(sigma, 6), round(lo, 4), round(hi, 4)
        (0.684524, 0.151958, 0.3867, 0.9824)
    """
    Rm = _as_2d_int_matrix(R)
    _, N = Rm.shape
    if not (1 <= k <= N):
        raise ValueError(f"k must satisfy 1 <= k <= N (N={N}); got k={k}")
    return g_pass_at_k_tau_ci(
        Rm,
        k,
        tau=_majority_tau(k),
        confidence=confidence,
        bounds=bounds,
        alpha0=alpha0,
        beta0=beta0,
    )



__all__ = [
    "maj_at_k",
    "maj_at_k_ci",
]