Source code for scorio.eval.geom

r"""Geometric pass/spectrum metrics for binary outcomes.

This module implements finite-bank geometric and threshold-spectrum metrics
together with approximate Beta-Bernoulli posterior summaries for latent
resampling quantities. The paper ``Geom@k: Fast to Converge, Slow to Drift``
defines a dataset-level endpoint blend; ``scorio`` also exposes a
questionwise Geom@k variant as the primary ``geom_at_k`` metric.

Notation
--------
For a binary matrix :math:`R \in \{0,1\}^{M \times N}`, fixed budget
:math:`k`, and threshold weights :math:`w = (w_1, \ldots, w_k)` with
non-negative entries and :math:`\sum_r w_r \le 1`, define the
threshold-spectrum summary

.. math::

    S_{w,k}(R) = \sum_{r=1}^k w_r T_{r,k}(R),

where :math:`T_{r,k}(R)` is the dataset-level probability that a uniformly
sampled subset of size :math:`k` without replacement contains at least
:math:`r` correct trials.

The GeoSpectrum family is then

.. math::

    \mathrm{GeoSpectrum}_{\lambda,w}@k(R)
    = P_k(R)^\lambda \, S_{w,k}(R)^{1-\lambda},

where :math:`P_k(R)` is dataset-level Pass@k. The endpoint conventions are
:math:`\lambda = 0 \to S_{w,k}` and :math:`\lambda = 1 \to P_k`. The named
operating points are:

- ``geom_ds_at_k``: dataset-level endpoint blend with
  :math:`\lambda = 1/2` and :math:`w_r = 1\{r = k\}`.
- ``geom_at_k``: questionwise endpoint blend, computed before averaging
  across questions.
- ``GeoSpectrum*@k``: :math:`\lambda = 1/2` with upper-half weights
  :math:`w_r = (2/k)\,1\{r \ge \lceil k/2 \rceil + 1\}`.

The ``*_ci`` functions implement the approximate posterior
credible intervals for the corresponding latent i.i.d. quantities under a
Beta-Bernoulli model.

Available API
-------------
- ``geom_at_k`` and ``geom_at_k_ci`` for the questionwise Pass/Unanimous
  geometric blend.
- ``geom_ds_at_k`` and ``geom_ds_at_k_ci`` for the dataset-level
  Pass/Unanimous blend.
- ``geo_spectrum_at_k`` and ``geo_spectrum_at_k_ci`` for
  :math:`\mathrm{GeoSpectrum}_{\lambda,w}@k`.
- ``geo_spectrum_star_at_k`` and ``geo_spectrum_star_at_k_ci`` for the default
  upper-half operating point.
- ``threshold_spectrum_at_k`` and ``threshold_spectrum_at_k_ci`` for
  :math:`S_{w,k}`.
"""

import math

import numpy as np
from scipy.special import comb

from .pass_at_k import (
    _beta_ratio,
    _binary_beta_posterior_params,
)
from .pass_at_k import (
    pass_at_k as _pass_at_k,
)
from .pass_at_k import (
    pass_hat_k as _pass_hat_k,
)
from .utils import _as_2d_int_matrix, _validate_binary, normal_credible_interval


def _weighted_geometric_mean(
    x: float, y: float, x_weight: float, y_weight: float
) -> float:
    if x_weight == 0.0 and y_weight == 0.0:
        raise ValueError("at least one power must be non-zero")

    if x == 0.0 and x_weight < 0.0:
        if y == 0.0 and y_weight > 0.0:
            return 0.0
        raise ValueError(
            f"x_power must be non-negative when x is zero; got x_power={x_weight}"
        )

    if y == 0.0 and y_weight < 0.0:
        if x == 0.0 and x_weight > 0.0:
            return 0.0
        raise ValueError(
            f"y_power must be non-negative when y is zero; got y_power={y_weight}"
        )

    return float((x**x_weight) * (y**y_weight))


def _validate_beta_prior(alpha0: float, beta0: float) -> None:
    if alpha0 <= 0.0 or beta0 <= 0.0:
        raise ValueError(
            f"alpha0 and beta0 must both be > 0 for a Beta prior; got {alpha0}, {beta0}"
        )


def _validate_finite_bank_k(N: int, k: int) -> None:
    if not (1 <= k <= N):
        raise ValueError(f"k must satisfy 1 <= k <= N (N={N}); got k={k}")


def _validate_latent_k(k: int) -> None:
    if k < 1:
        raise ValueError(f"k must be >= 1; got k={k}")


def _resolve_lambda(lam: float, lambda_: float | None = None) -> float:
    if lambda_ is not None:
        if lam != 0.5:
            raise TypeError("Specify at most one of 'lam' and 'lambda_'.")
        lam = lambda_
    if not (0.0 <= lam <= 1.0):
        raise ValueError(f"lam must be in [0, 1]; got {lam}")
    return float(lam)


def _unanimous_spectrum_weights(k: int) -> np.ndarray:
    r"""Return endpoint weights :math:`w_r = 1\{r = k\}`."""
    _validate_latent_k(k)
    weights = np.zeros(k, dtype=float)
    weights[-1] = 1.0
    return weights


def _mg_spectrum_weights(k: int) -> np.ndarray:
    r"""Return the upper-half weights used by ``GeoSpectrum*@k``.

    These weights are given by

    .. math::

        w^{mG}_{r,k} = \frac{2}{k} 1\{r \ge \lceil k/2 \rceil + 1\}.
    """
    _validate_latent_k(k)
    weights = np.zeros(k, dtype=float)
    weights[int(math.ceil(k / 2.0)) :] = 2.0 / k
    return weights


def _validate_spectrum_weights(
    weights: np.ndarray | list[float] | tuple[float, ...],
    k: int,
) -> np.ndarray:
    w = np.asarray(weights, dtype=float)
    if w.ndim != 1 or w.shape[0] != k:
        raise ValueError(f"weights must be a length-{k} 1D array; got shape {w.shape}")
    if not np.all(np.isfinite(w)):
        raise ValueError("weights must be finite")
    if np.any(w < 0.0):
        raise ValueError("weights must be non-negative")
    weight_sum = float(np.sum(w))
    if weight_sum > 1.0 + 1e-12:
        raise ValueError(
            f"weights must satisfy sum(weights) <= 1; got sum={weight_sum}"
        )
    return w


def _event_score_levels(weights: np.ndarray) -> np.ndarray:
    r"""Return :math:`A_j = \sum_{r \le j} w_r` with :math:`A_0 = 0`.

    :math:`A_j` is the credit assigned to a sampled subset of size :math:`k`
    that contains exactly :math:`j` correct trials.
    """
    return np.concatenate(([0.0], np.cumsum(weights, dtype=float)))



[docs]
def threshold_spectrum_at_k(
    R: np.ndarray,
    k: int,
    weights: np.ndarray | list[float] | tuple[float, ...],
) -> float:
    r"""Finite-bank threshold-spectrum summary :math:`S_{w,k}(R)`.

    Args:
        R: :math:`M \times N` binary matrix with entries in :math:`\{0,1\}`.
        k: Sampling budget with :math:`1 \le k \le N`.
        weights: Non-negative length-:math:`k` weights with
            :math:`\sum_r w_r \le 1`.

    Returns:
        float: :math:`S_{w,k}(R)` averaged across questions.

    Notes:
        This summary is defined by

        .. math::

            S_{w,k}(R) = \sum_{r=1}^k w_r T_{r,k}(R).

        The implementation uses the equivalent event-score representation from
        Appendix C.4.
    """
    Rm = _as_2d_int_matrix(R)
    _validate_binary(Rm)
    _, N = Rm.shape
    _validate_finite_bank_k(N, k)
    w = _validate_spectrum_weights(weights, k)

    nu = np.sum(Rm, axis=1)
    levels = _event_score_levels(w)
    denom = float(comb(N, k))
    vals = np.zeros_like(nu, dtype=float)
    for j in range(1, k + 1):
        credit = float(levels[j])
        if credit == 0.0:
            continue
        vals += credit * comb(nu, j) * comb(N - nu, k - j) / denom
    return float(np.mean(vals))




[docs]
def geom_ds_at_k(
    R: np.ndarray, k: int, pass_power: float = 0.5, unanimous_power: float = 0.5
) -> float:
    r"""
    Dataset-level Pass/Unanimous geometric blend.

    This is the endpoint GeoSpectrum operating point from the paper: it first
    averages Pass@k and Unanimous@k across questions, then applies the
    geometric blend. For the questionwise metric that blends before averaging,
    use :func:`geom_at_k`.

    The default operating point is the geometric mean of dataset-level
    Pass@k and Unanimous@k (equivalently Pass^k). The same API also exposes
    nearby operating points by letting callers adjust the exponents on the
    Pass@k and Unanimous@k terms directly.

    Args:
        R: :math:`M \times N` binary matrix with entries in :math:`\{0,1\}`.
           :math:`R_{\alpha i} = 1` if trial :math:`i` for question
           :math:`\alpha` passed, 0 otherwise.
        k: Sampling budget with :math:`1 \le k \le N`.
        pass_power: Exponent applied to ``Pass@k``.
        unanimous_power: Exponent applied to ``Unanimous@k``.

    Returns:
        float: The dataset-level endpoint score.

    Formula:
        .. math::

            G_{\mathrm{ds},k}(R; a, b)
            = \mathrm{Pass}@k(R)^a\,
              \mathrm{Unanimous}@k(R)^b

        with the default dataset-level endpoint operating point given by
        :math:`a = b = 1/2`.

    Examples:
        >>> import numpy as np
        >>> R = np.array([[0, 1, 1, 0, 1],
        ...               [1, 1, 0, 1, 1]])
        >>> round(geom_ds_at_k(R, 2), 6)
        0.653835
    """
    pass_score = _pass_at_k(R, k)
    unanimous_score = _pass_hat_k(R, k)

    return _weighted_geometric_mean(
        pass_score, unanimous_score, pass_power, unanimous_power
    )




[docs]
def geom_at_k(
    R: np.ndarray, k: int, pass_power: float = 0.5, unanimous_power: float = 0.5
) -> float:
    r"""
    Questionwise Geom@k averaged across questions.

    This is ``scorio``'s primary Geom@k metric. Unlike
    :func:`geom_ds_at_k`, which blends dataset-level Pass@k and dataset-level
    Unanimous@k, this function first computes the per-question quantities

    .. math::

        P_{\alpha,k} =
        1 - \frac{\binom{N - \nu_\alpha}{k}}{\binom{N}{k}}

    .. math::

        U_{\alpha,k} =
        \frac{\binom{\nu_\alpha}{k}}{\binom{N}{k}}

    forms the geometric blend

    .. math::

        G_{\alpha,k} = P_{\alpha,k}^{a}\,U_{\alpha,k}^{b},

    and only then averages across questions.

    Args:
        R: :math:`M \times N` binary matrix with entries in :math:`\{0,1\}`.
        k: Sampling budget with :math:`1 \le k \le N`.
        pass_power: Exponent applied to per-question ``Pass@k``.
        unanimous_power: Exponent applied to per-question ``Unanimous@k``.

    Returns:
        float: The average questionwise ``Geom@k`` score.

    Examples:
        >>> import numpy as np
        >>> R = np.array([[0, 1, 1, 0, 1],
        ...               [1, 1, 0, 1, 1]])
        >>> round(geom_at_k(R, 2), 6)
        0.647106
    """
    Rm = _as_2d_int_matrix(R)
    _validate_binary(Rm)
    _, N = Rm.shape
    _validate_finite_bank_k(N, k)

    nu = np.sum(Rm, axis=1)
    denom = float(comb(N, k))
    pass_vals = 1.0 - comb(N - nu, k) / denom
    unanimous_vals = comb(nu, k) / denom

    vals = np.empty(Rm.shape[0], dtype=float)
    for i in range(Rm.shape[0]):
        vals[i] = _weighted_geometric_mean(
            float(pass_vals[i]),
            float(unanimous_vals[i]),
            pass_power,
            unanimous_power,
        )

    return float(np.mean(vals))




[docs]
def geo_spectrum_at_k(
    R: np.ndarray,
    k: int,
    lam: float = 0.5,
    weights: np.ndarray | list[float] | tuple[float, ...] | None = None,
    lambda_: float | None = None,
) -> float:
    r"""
    :math:`\mathrm{GeoSpectrum}_{\lambda,w}@k` on the observed finite bank.

    By default ``weights=None`` selects the upper-half ``mG`` weights,
    so the two-argument call ``geo_spectrum_at_k(R, k)`` remains the special
    case

    .. math::

        \mathrm{GeoSpectrum}^*@k(R)
        = \sqrt{\mathrm{Pass}@k(R)\,\mathrm{mG\text{-}Pass}@k(R)}.

    This function also accepts the keyword alias ``lambda_=...`` for callers
    that prefer naming the coupling parameter after the mathematical symbol.

    Args:
        R: :math:`M \times N` binary matrix with entries in :math:`\{0,1\}`.
        k: Sampling budget with :math:`1 \le k \le N`.
        lam: The coupling parameter :math:`\lambda` in :math:`[0,1]`.
        weights: Spectrum weights :math:`w`. If omitted, uses the built-in
            upper-half mG weights. Custom weights must be length-:math:`k`,
            non-negative, finite, and satisfy :math:`\sum_r w_r \le 1`.

    Returns:
        float: :math:`\mathrm{GeoSpectrum}_{\lambda,w}@k(R)`.

    Formula:
        .. math::

            \mathrm{GeoSpectrum}_{\lambda,w}@k(R)
            = \mathrm{Pass}@k(R)^\lambda \, S_{w,k}(R)^{1-\lambda}

    Examples:
        >>> import numpy as np
        >>> R = np.array([[0, 1, 1, 0, 1],
        ...               [1, 1, 0, 1, 1]])
        >>> round(geo_spectrum_at_k(R, 3), 6)
        0.408248
        >>> round(geo_spectrum_at_k(R, 3, lam=1.0), 6)
        1.0
    """
    lam = _resolve_lambda(lam, lambda_)
    pass_score = _pass_at_k(R, k)
    if lam == 1.0:
        return pass_score

    w = (
        _mg_spectrum_weights(k)
        if weights is None
        else _validate_spectrum_weights(weights, k)
    )
    spectrum_score = threshold_spectrum_at_k(R, k, w)
    return _weighted_geometric_mean(pass_score, spectrum_score, lam, 1.0 - lam)



def _pass_and_spectrum_row_posterior_moments(
    R: np.ndarray,
    k: int,
    weights: np.ndarray,
    alpha0: float = 1.0,
    beta0: float = 1.0,
) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    r"""Per-question posterior moments for latent Pass@k and spectrum scores.

    Returns:
        ``(mean_pass, var_pass, mean_spectrum, var_spectrum, cov_pass_spectrum)``
        arrays, one entry per question.

    Notes:
        Unlike the observed finite-bank metrics, these latent quantities are
        defined for any integer :math:`k \ge 1`. The implementation therefore
        does *not* restrict :math:`k` by the observed trial count :math:`N`.
    """
    _validate_latent_k(k)
    _validate_beta_prior(alpha0, beta0)

    Rm = _as_2d_int_matrix(R)
    _validate_binary(Rm)
    M, _ = Rm.shape
    w = _validate_spectrum_weights(weights, k)

    alpha, beta = _binary_beta_posterior_params(Rm, alpha0=alpha0, beta0=beta0)
    levels = _event_score_levels(w)
    coeff = np.zeros(k + 1, dtype=float)
    for j in range(1, k + 1):
        coeff[j] = float(levels[j] * comb(k, j))
    active_js = [j for j in range(1, k + 1) if coeff[j] != 0.0]

    mean_pass = np.empty(M, dtype=float)
    var_pass = np.empty(M, dtype=float)
    mean_spec = np.empty(M, dtype=float)
    var_spec = np.empty(M, dtype=float)
    cov_ps = np.empty(M, dtype=float)

    for i in range(M):
        a_i = float(alpha[i])
        b_i = float(beta[i])

        eqk = _beta_ratio(a_i, b_i, 0, k)
        eq2k = _beta_ratio(a_i, b_i, 0, 2 * k)
        m_pass = 1.0 - eqk
        v_pass = max(0.0, eq2k - eqk * eqk)

        m_spec = 0.0
        e2_spec = 0.0
        e_ps = 0.0

        for j in active_js:
            c_j = float(coeff[j])
            moment_j = _beta_ratio(a_i, b_i, j, k - j)
            m_spec += c_j * moment_j
            e_ps += c_j * (moment_j - _beta_ratio(a_i, b_i, j, 2 * k - j))
            for l in active_js:
                c_l = float(coeff[l])
                e2_spec += c_j * c_l * _beta_ratio(a_i, b_i, j + l, 2 * k - (j + l))

        v_spec = max(0.0, e2_spec - m_spec * m_spec)
        cov = e_ps - m_pass * m_spec

        mean_pass[i] = m_pass
        var_pass[i] = v_pass
        mean_spec[i] = m_spec
        var_spec[i] = v_spec
        cov_ps[i] = cov

    return mean_pass, var_pass, mean_spec, var_spec, cov_ps


def _pass_and_spectrum_posterior_moments(
    R: np.ndarray,
    k: int,
    weights: np.ndarray,
    alpha0: float = 1.0,
    beta0: float = 1.0,
) -> tuple[float, float, float, float, float]:
    r"""Dataset-level posterior moments for latent Pass@k and spectrum scores."""
    mean_pass, var_pass, mean_spec, var_spec, cov_ps = (
        _pass_and_spectrum_row_posterior_moments(
            R,
            k,
            weights,
            alpha0=alpha0,
            beta0=beta0,
        )
    )
    M = mean_pass.size
    mu_pass = float(np.mean(mean_pass))
    mu_spec = float(np.mean(mean_spec))
    var_pass_dataset = float(np.sum(var_pass) / (M**2))
    var_spec_dataset = float(np.sum(var_spec) / (M**2))
    cov_dataset = float(np.sum(cov_ps) / (M**2))
    return mu_pass, var_pass_dataset, mu_spec, var_spec_dataset, cov_dataset


def _geo_spectrum_at_k_bayes(
    R: np.ndarray,
    k: int,
    lam: float,
    weights: np.ndarray,
    alpha0: float = 1.0,
    beta0: float = 1.0,
) -> tuple[float, float]:
    r"""Approximate posterior mean/std for latent :math:`\mathrm{GeoSpectrum}_{\lambda,w}@k`."""
    lam = _resolve_lambda(lam)
    mu_pass, var_pass, mu_spec, var_spec, cov_ps = _pass_and_spectrum_posterior_moments(
        R,
        k,
        weights,
        alpha0=alpha0,
        beta0=beta0,
    )

    if lam == 0.0:
        return mu_spec, float(math.sqrt(max(0.0, var_spec)))
    if lam == 1.0:
        return mu_pass, float(math.sqrt(max(0.0, var_pass)))

    mu = _weighted_geometric_mean(mu_pass, mu_spec, lam, 1.0 - lam)
    if mu == 0.0:
        return 0.0, 0.0

    grad_pass = lam * (mu_pass ** (lam - 1.0)) * (mu_spec ** (1.0 - lam))
    grad_spec = (1.0 - lam) * (mu_pass**lam) * (mu_spec ** (-lam))
    sigma2 = (
        (grad_pass**2) * var_pass
        + (grad_spec**2) * var_spec
        + 2.0 * grad_pass * grad_spec * cov_ps
    )
    return float(mu), float(math.sqrt(max(0.0, sigma2)))


def _geom_at_k_bayes(
    R: np.ndarray,
    k: int,
    pass_power: float = 0.5,
    unanimous_power: float = 0.5,
    alpha0: float = 1.0,
    beta0: float = 1.0,
) -> tuple[float, float]:
    r"""Approximate posterior mean/std for latent questionwise Geom@k."""
    (
        mean_pass,
        var_pass,
        mean_unanimous,
        var_unanimous,
        cov_pu,
    ) = _pass_and_spectrum_row_posterior_moments(
        R,
        k,
        _unanimous_spectrum_weights(k),
        alpha0=alpha0,
        beta0=beta0,
    )

    means = np.empty_like(mean_pass, dtype=float)
    variances = np.empty_like(mean_pass, dtype=float)
    for i in range(mean_pass.size):
        mu_pass = float(mean_pass[i])
        mu_unanimous = float(mean_unanimous[i])
        mu = _weighted_geometric_mean(
            mu_pass,
            mu_unanimous,
            pass_power,
            unanimous_power,
        )
        means[i] = mu
        if mu == 0.0:
            variances[i] = 0.0
            continue

        grad_pass = 0.0
        if pass_power != 0.0:
            grad_pass = (
                pass_power
                * (mu_pass ** (pass_power - 1.0))
                * (mu_unanimous**unanimous_power)
            )

        grad_unanimous = 0.0
        if unanimous_power != 0.0:
            grad_unanimous = (
                unanimous_power
                * (mu_pass**pass_power)
                * (mu_unanimous ** (unanimous_power - 1.0))
            )

        variances[i] = max(
            0.0,
            (grad_pass**2) * float(var_pass[i])
            + (grad_unanimous**2) * float(var_unanimous[i])
            + 2.0 * grad_pass * grad_unanimous * float(cov_pu[i]),
        )

    mu = float(np.mean(means))
    sigma = float(math.sqrt(float(np.sum(variances))) / mean_pass.size)
    return mu, sigma


def _geom_ds_at_k_bayes(
    R: np.ndarray,
    k: int,
    pass_power: float = 0.5,
    unanimous_power: float = 0.5,
    alpha0: float = 1.0,
    beta0: float = 1.0,
) -> tuple[float, float]:
    r"""Approximate posterior mean/std for latent dataset-level Geom@k."""
    (
        mu_pass,
        var_pass,
        mu_unanimous,
        var_unanimous,
        cov_pu,
    ) = _pass_and_spectrum_posterior_moments(
        R,
        k,
        _unanimous_spectrum_weights(k),
        alpha0=alpha0,
        beta0=beta0,
    )

    mu = _weighted_geometric_mean(
        mu_pass,
        mu_unanimous,
        pass_power,
        unanimous_power,
    )
    if mu == 0.0:
        return 0.0, 0.0

    grad_pass = 0.0
    if pass_power != 0.0:
        grad_pass = (
            pass_power
            * (mu_pass ** (pass_power - 1.0))
            * (mu_unanimous**unanimous_power)
        )

    grad_unanimous = 0.0
    if unanimous_power != 0.0:
        grad_unanimous = (
            unanimous_power
            * (mu_pass**pass_power)
            * (mu_unanimous ** (unanimous_power - 1.0))
        )

    sigma2 = (
        (grad_pass**2) * var_pass
        + (grad_unanimous**2) * var_unanimous
        + 2.0 * grad_pass * grad_unanimous * cov_pu
    )
    return float(mu), float(math.sqrt(max(0.0, sigma2)))



[docs]
def threshold_spectrum_at_k_ci(
    R: np.ndarray,
    k: int,
    weights: np.ndarray | list[float] | tuple[float, ...],
    confidence: float = 0.95,
    bounds: tuple[float, float] = (0.0, 1.0),
    alpha0: float = 1.0,
    beta0: float = 1.0,
) -> tuple[float, float, float, float]:
    r"""
    Approximate posterior summary for the latent spectrum :math:`S_{w,k}(p)`.

    Args:
        R: :math:`M \times N` binary matrix with entries in :math:`\{0,1\}`.
        k: Latent resampling budget. Once the posterior is defined, any integer
           :math:`k \ge 1` is allowed.
        weights: Non-negative length-:math:`k` weights with
            :math:`\sum_r w_r \le 1`.
        confidence: credibility level of the interval (default 0.95).
        bounds: ``(lo, hi)`` clipping bounds for the interval
                (default ``(0, 1)``).
        alpha0: Beta prior parameter :math:`\alpha_0` (default 1).
        beta0: Beta prior parameter :math:`\beta_0` (default 1).

    Returns:
        tuple[float, float, float, float]:
            :math:`(\mu,\; \sigma,\; \text{lo},\; \text{hi})`

    Notes:
        Unlike :func:`threshold_spectrum_at_k`, the posterior target is defined
        for latent i.i.d. resampling and therefore does not require
        :math:`k \le N`.

    Formula:
        Let :math:`A_j = \sum_{r \le j} w_r`. The per-question latent target is

        .. math::

            g(p) = \sum_{j=1}^{k} A_j \binom{k}{j} p^j (1-p)^{k-j}.

        Dataset-level aggregation uses

        .. math::

            \mu = \frac{1}{M} \sum_{\alpha=1}^{M} \mathbb{E}[g(p_\alpha)]

        .. math::

            \sigma = \frac{1}{M} \sqrt{
                \sum_{\alpha=1}^{M} \mathrm{Var}[g(p_\alpha)]
            }.
    """
    w = _validate_spectrum_weights(weights, k)
    _, _, mu_spec, var_spec, _ = _pass_and_spectrum_posterior_moments(
        R,
        k,
        w,
        alpha0=alpha0,
        beta0=beta0,
    )
    sigma = float(math.sqrt(max(0.0, var_spec)))
    lo, hi = normal_credible_interval(
        mu_spec, sigma, credibility=confidence, two_sided=True, bounds=bounds
    )
    return float(mu_spec), sigma, float(lo), float(hi)




[docs]
def geom_at_k_ci(
    R: np.ndarray,
    k: int,
    pass_power: float = 0.5,
    unanimous_power: float = 0.5,
    confidence: float = 0.95,
    bounds: tuple[float, float] = (0.0, 1.0),
    alpha0: float = 1.0,
    beta0: float = 1.0,
) -> tuple[float, float, float, float]:
    r"""
    Approximate posterior summary for the questionwise Geom@k target.

    This is the uncertainty counterpart of :func:`geom_at_k`: it applies a
    first-order delta method to each question's latent Pass@k and
    Unanimous@k quantities, then averages the resulting question-level
    geometric blends.

    Args:
        R: :math:`M \times N` binary matrix with entries in :math:`\{0,1\}`.
        k: Latent resampling budget. Once the posterior is defined, any integer
           :math:`k \ge 1` is allowed.
        pass_power: Exponent applied to each question's latent ``Pass@k``.
        unanimous_power: Exponent applied to each question's latent
            ``Unanimous@k``.
        confidence: credibility level of the interval (default 0.95).
        bounds: ``(lo, hi)`` clipping bounds for the interval
                (default ``(0, 1)``).
        alpha0: Beta prior parameter :math:`\alpha_0` (default 1).
        beta0: Beta prior parameter :math:`\beta_0` (default 1).

    Returns:
        tuple[float, float, float, float]:
            :math:`(\mu,\; \sigma,\; \text{lo},\; \text{hi})`

    Formula:
        Let :math:`\mu_{P,\alpha}` and :math:`\mu_{U,\alpha}` denote the
        posterior means of question :math:`\alpha`'s latent Pass@k and
        Unanimous@k quantities. Then

        .. math::

            \mu \approx \frac{1}{M}\sum_\alpha
                \mu_{P,\alpha}^{a}\,\mu_{U,\alpha}^{b}

        and :math:`\sigma` is computed by per-question first-order delta
        propagation through :math:`g(x, y) = x^a y^b`.

    Examples:
        >>> import numpy as np
        >>> R = np.array([[0, 1, 1, 0, 1],
        ...               [1, 1, 0, 1, 1]])
        >>> mu, sigma, lo, hi = geom_at_k_ci(R, 2)
        >>> round(mu, 6), round(sigma, 6), round(lo, 4), round(hi, 4)
        (0.610666, 0.133107, 0.3498, 0.8716)
    """
    mu, sigma = _geom_at_k_bayes(
        R,
        k,
        pass_power=pass_power,
        unanimous_power=unanimous_power,
        alpha0=alpha0,
        beta0=beta0,
    )
    lo, hi = normal_credible_interval(
        mu, sigma, credibility=confidence, two_sided=True, bounds=bounds
    )
    return float(mu), float(sigma), float(lo), float(hi)




[docs]
def geom_ds_at_k_ci(
    R: np.ndarray,
    k: int,
    pass_power: float = 0.5,
    unanimous_power: float = 0.5,
    confidence: float = 0.95,
    bounds: tuple[float, float] = (0.0, 1.0),
    alpha0: float = 1.0,
    beta0: float = 1.0,
) -> tuple[float, float, float, float]:
    r"""
    Approximate posterior summary for the dataset-level Geom@k target.

    This is the uncertainty counterpart of :func:`geom_ds_at_k` and matches
    the dataset-level latent quantity introduced in the paper when
    ``pass_power = unanimous_power = 0.5``.

    Args:
        R: :math:`M \times N` binary matrix with entries in :math:`\{0,1\}`.
        k: Latent resampling budget. Once the posterior is defined, any integer
           :math:`k \ge 1` is allowed.
        pass_power: Exponent applied to latent dataset-level ``Pass@k``.
        unanimous_power: Exponent applied to latent dataset-level
            ``Unanimous@k``.
        confidence: credibility level of the interval (default 0.95).
        bounds: ``(lo, hi)`` clipping bounds for the interval
                (default ``(0, 1)``).
        alpha0: Beta prior parameter :math:`\alpha_0` (default 1).
        beta0: Beta prior parameter :math:`\beta_0` (default 1).

    Returns:
        tuple[float, float, float, float]:
            :math:`(\mu,\; \sigma,\; \text{lo},\; \text{hi})`

    Formula:
        Let :math:`\mu_P` and :math:`\mu_U` denote the posterior means of the
        latent dataset-level Pass@k and Unanimous@k quantities. Then

        .. math::

            \mu \approx \mu_P^a\,\mu_U^b

        and :math:`\sigma` is computed by first-order delta propagation through
        :math:`g(x, y) = x^a y^b`.

    Examples:
        >>> import numpy as np
        >>> R = np.array([[0, 1, 1, 0, 1],
        ...               [1, 1, 0, 1, 1]])
        >>> mu, sigma, lo, hi = geom_ds_at_k_ci(R, 2)
        >>> round(mu, 6), round(sigma, 6), round(lo, 4), round(hi, 4)
        (0.612112, 0.132755, 0.3519, 0.8723)
    """
    mu, sigma = _geom_ds_at_k_bayes(
        R,
        k,
        pass_power=pass_power,
        unanimous_power=unanimous_power,
        alpha0=alpha0,
        beta0=beta0,
    )
    lo, hi = normal_credible_interval(
        mu, sigma, credibility=confidence, two_sided=True, bounds=bounds
    )
    return float(mu), float(sigma), float(lo), float(hi)




[docs]
def geo_spectrum_at_k_ci(
    R: np.ndarray,
    k: int,
    lam: float = 0.5,
    weights: np.ndarray | list[float] | tuple[float, ...] | None = None,
    lambda_: float | None = None,
    confidence: float = 0.95,
    bounds: tuple[float, float] = (0.0, 1.0),
    alpha0: float = 1.0,
    beta0: float = 1.0,
) -> tuple[float, float, float, float]:
    r"""
    Approximate posterior summary for latent
    :math:`\mathrm{GeoSpectrum}_{\lambda,w}@k`.

    As in :func:`geo_spectrum_at_k`, omitting ``weights`` selects the
    ``GeoSpectrum*@k`` operating point.

    This function also accepts the keyword alias ``lambda_=...`` for callers
    that prefer naming the coupling parameter after the mathematical symbol.

    Args:
        R: :math:`M \times N` binary matrix with entries in :math:`\{0,1\}`.
        k: Latent resampling budget. Once the posterior is defined, any integer
           :math:`k \ge 1` is allowed.
        lam: The coupling parameter :math:`\lambda` in :math:`[0,1]`.
        weights: Spectrum weights :math:`w`. If omitted, uses the built-in
            upper-half mG weights unless :math:`\lambda = 1`, in which case
            the spectrum term is irrelevant. Custom weights must be
            length-:math:`k`, non-negative, finite, and satisfy
            :math:`\sum_r w_r \le 1`.
        confidence: credibility level of the interval (default 0.95).
        bounds: ``(lo, hi)`` clipping bounds for the interval
                (default ``(0, 1)``).
        alpha0: Beta prior parameter :math:`\alpha_0` (default 1).
        beta0: Beta prior parameter :math:`\beta_0` (default 1).

    Returns:
        tuple[float, float, float, float]:
            :math:`(\mu,\; \sigma,\; \text{lo},\; \text{hi})`

    Formula:
        Let :math:`x` denote latent Pass@k and :math:`y` denote the latent
        spectrum :math:`S_{w,k}`. The posterior mean is approximated by

        .. math::

            \mu \approx x^\lambda y^{1-\lambda}

        evaluated at the posterior means of :math:`x` and :math:`y`, and
        :math:`\sigma` is obtained by first-order delta propagation through
        :math:`g(x, y) = x^\lambda y^{1-\lambda}`.
    """
    lam = _resolve_lambda(lam, lambda_)
    w = None
    if lam != 1.0:
        w = (
            _mg_spectrum_weights(k)
            if weights is None
            else _validate_spectrum_weights(weights, k)
        )
    else:
        # GeoSpectrum_{1,w}@k is exactly Pass@k, so ``w`` is irrelevant.
        w = _unanimous_spectrum_weights(k)

    mu, sigma = _geo_spectrum_at_k_bayes(
        R,
        k,
        lam,
        w,
        alpha0=alpha0,
        beta0=beta0,
    )
    lo, hi = normal_credible_interval(
        mu, sigma, credibility=confidence, two_sided=True, bounds=bounds
    )
    return float(mu), float(sigma), float(lo), float(hi)




[docs]
def geo_spectrum_star_at_k(R: np.ndarray, k: int) -> float:
    r"""
    Explicit alias for the default ``GeoSpectrum*@k`` operating point.

    Equivalent to calling :func:`geo_spectrum_at_k` with the default
    upper-half ``mG`` spectrum weights.

    Args:
        R: :math:`M \times N` binary matrix with entries in :math:`\{0,1\}`.
        k: Sampling budget with :math:`1 \le k \le N`.

    Returns:
        float: The ``GeoSpectrum*@k`` score.
    """

    return geo_spectrum_at_k(R, k, lam=0.5, weights=_mg_spectrum_weights(k))




[docs]
def geo_spectrum_star_at_k_ci(
    R: np.ndarray,
    k: int,
    confidence: float = 0.95,
    bounds: tuple[float, float] = (0.0, 1.0),
    alpha0: float = 1.0,
    beta0: float = 1.0,
) -> tuple[float, float, float, float]:
    r"""
    Approximate posterior summary for latent ``GeoSpectrum*@k``.

    Equivalent to :func:`geo_spectrum_at_k_ci` with the default upper-half
    ``mG`` spectrum weights.

    Args:
        R: :math:`M \times N` binary matrix with entries in :math:`\{0,1\}`.
        k: Latent resampling budget. Once the posterior is defined, any integer
           :math:`k \ge 1` is allowed.
        confidence: credibility level of the interval (default 0.95).
        bounds: ``(lo, hi)`` clipping bounds for the interval
                (default ``(0, 1)``).
        alpha0: Beta prior parameter :math:`\alpha_0` (default 1).
        beta0: Beta prior parameter :math:`\beta_0` (default 1).

    Returns:
        tuple[float, float, float, float]:
            :math:`(\mu,\; \sigma,\; \text{lo},\; \text{hi})`
    """
    return geo_spectrum_at_k_ci(
        R,
        k,
        confidence=confidence,
        bounds=bounds,
        alpha0=alpha0,
        beta0=beta0,
    )



__all__ = [
    "geom_at_k",
    "geom_at_k_ci",
    "geom_ds_at_k",
    "geom_ds_at_k_ci",
    "geo_spectrum_at_k",
    "geo_spectrum_at_k_ci",
    "geo_spectrum_star_at_k",
    "geo_spectrum_star_at_k_ci",
    "threshold_spectrum_at_k",
    "threshold_spectrum_at_k_ci",
]