Source code for scorio.utils

import math
from collections.abc import Sequence
from typing import Literal

import numpy as np
from scipy.stats import kendalltau, norm, rankdata, spearmanr, weightedtau



[docs]
def rank_scores(
    scores_in_id_order: Sequence[float] | np.ndarray,
    tol: float = 1e-12,
    sigmas_in_id_order: Sequence[float] | np.ndarray | None = None,
    confidence: float = 0.95,
    ci_tie_method: Literal[
        "zscore_adjacent", "ci_overlap_adjacent"
    ] = "zscore_adjacent",
) -> dict[str, np.ndarray]:
    """
    Convert scores to ranks using multiple ranking methods (with optional confidence-aware ties).

    This is a utility that many evaluation scripts already use: it converts a list of
    scores into four standard rank conventions (competition/min, competition/max,
    dense, average/fractional). Higher scores get better (lower) ranks.

    Extension: if `sigmas_in_id_order` is provided, this function ALSO returns a
    second set of ranks that treats two adjacent scores as tied when they are not
    separable at the requested confidence level.

    Args:
        scores_in_id_order: Scores aligned by ID order.
        tol: Tolerance threshold for treating scores as equal (default: 1e-12).
        sigmas_in_id_order: Optional per-score uncertainty aligned by ID order.
            If provided, uncertainty-aware ranks are added under keys suffixed with "_ci".
        confidence: Confidence used for the uncertainty-aware tie rule.
        ci_tie_method: How to decide ties when sigmas are provided:
            - "zscore_adjacent" (paper Sec. 2.9): tie if z < Φ^{-1}(confidence)
            - "ci_overlap_adjacent": tie if confidence-level CIs overlap

    Returns:
        dict with four base ranking methods:
            - "competition": Min-rank competition (1, 2, 2, 4)
            - "competition_max": Max-rank competition (1, 3, 3, 4)
            - "dense": Dense ranking (1, 2, 2, 3)
            - "avg": Fractional/average ranking (1, 2.5, 2.5, 4)

        If `sigmas_in_id_order` is provided, four extra keys are included:
            - "competition_ci"
            - "competition_max_ci"
            - "dense_ci"
            - "avg_ci"

    Examples:
        >>> import numpy as np
        >>> scores = [95.0, 87.5, 87.5, 80.0, 75.0]
        >>> ranks = rank_scores(scores)
        >>> ranks["competition"].tolist() == [1, 2, 2, 4, 5]
        True
        >>> ranks["dense"].tolist() == [1, 2, 2, 3, 4]
        True

        With uncertainty: the top two are close and get tied at 95%:

        >>> sigmas = [1.0, 1.0, 0.1, 0.1, 0.1]
        >>> ranks2 = rank_scores(scores, sigmas_in_id_order=sigmas, confidence=0.95)
        >>> "dense_ci" in ranks2
        True
    """
    scores = np.asarray(scores_in_id_order, dtype=float)
    if scores.ndim != 1:
        raise ValueError("scores_in_id_order must be a 1D sequence.")
    order = np.argsort(-scores)  # descending order
    sorted_scores = scores[order]

    # Group near-equal scores (within tolerance)
    grouped_scores = sorted_scores.copy()
    for i in range(1, len(grouped_scores)):
        if abs(grouped_scores[i] - grouped_scores[i - 1]) <= tol:
            grouped_scores[i] = grouped_scores[i - 1]

    def ranker(method: str, arr_sorted: np.ndarray) -> np.ndarray:
        ranks_sorted = rankdata(-arr_sorted, method=method)
        ranks = np.empty_like(ranks_sorted)
        ranks[order] = ranks_sorted
        return ranks

    out: dict[str, np.ndarray] = {
        "competition": ranker("min", grouped_scores),  # 1,2,2,4,5
        "competition_max": ranker("max", grouped_scores),  # 1,3,3,4,5
        "dense": ranker("dense", grouped_scores),  # 1,2,2,3,4
        "avg": ranker("average", grouped_scores),  # 1.0,2.5,2.5,4.0,5.0
    }

    if sigmas_in_id_order is not None:
        sigmas = np.asarray(sigmas_in_id_order, dtype=float)
        if sigmas.shape != scores.shape:
            raise ValueError("sigmas_in_id_order must have the same length as scores.")
        mus_s = scores[order]
        sig_s = sigmas[order]
        ci_grouped = grouped_scores.copy()

        if ci_tie_method == "zscore_adjacent":
            z_thresh = float(norm.ppf(confidence))  # one-sided
            for i in range(1, len(ci_grouped)):
                # If already equal by tol, keep tied
                if abs(ci_grouped[i] - ci_grouped[i - 1]) <= tol:
                    ci_grouped[i] = ci_grouped[i - 1]
                    continue
                denom = math.sqrt(sig_s[i - 1] ** 2 + sig_s[i] ** 2)
                if denom == 0.0:
                    continue
                z = abs(mus_s[i - 1] - mus_s[i]) / denom
                if z < z_thresh:
                    ci_grouped[i] = ci_grouped[i - 1]
        elif ci_tie_method == "ci_overlap_adjacent":
            z = float(norm.ppf(0.5 + confidence / 2.0))
            for i in range(1, len(ci_grouped)):
                if abs(ci_grouped[i] - ci_grouped[i - 1]) <= tol:
                    ci_grouped[i] = ci_grouped[i - 1]
                    continue
                lo_prev = mus_s[i - 1] - z * sig_s[i - 1]
                hi_cur = mus_s[i] + z * sig_s[i]
                if lo_prev <= hi_cur:
                    ci_grouped[i] = ci_grouped[i - 1]
        else:
            raise ValueError("Unknown ci_tie_method.")

        out.update(
            {
                "competition_ci": ranker("min", ci_grouped),
                "competition_max_ci": ranker("max", ci_grouped),
                "dense_ci": ranker("dense", ci_grouped),
                "avg_ci": ranker("average", ci_grouped),
            }
        )

    return out




[docs]
def compare_rankings(
    ranked_list_a,
    ranked_list_b,
    method="all",
):
    """
    Compare two rankings using multiple correlation metrics.

    Computes Kendall's tau, Spearman's rho, and weighted Kendall's tau
    to measure agreement between two rankings.

    Args:
        ranked_list_a: First ranking (numeric array or list).
        ranked_list_b: Second ranking (numeric array or list).
        method: Which metric to return: "kendall", "spearman",
            "weighted_kendall", or "all" (default).

    Returns:
        If method is not "all", returns a ``(statistic, pvalue)`` tuple.
        If method is "all", returns a dictionary with:
        ``kendalltau``, ``spearmanr``, ``weighted_kendalltau``,
        ``fraction_mismatched``, and ``max_disp``.

    Raises:
        ValueError: If lists have different lengths or contain NaN/inf.
        TypeError: If lists are not numeric.

    Notes:
        ``scipy.stats.weightedtau`` does not compute p-values (pvalue is NaN).
        Rankings are compared element-wise at matching indices.

    Examples:
        >>> import numpy as np
        >>> rank_a = [1, 2, 3, 4, 5]
        >>> rank_b = [1, 3, 2, 4, 5]
        >>> tau, pval = compare_rankings(rank_a, rank_b, method="kendall")
        >>> round(tau, 2)
        0.8
        >>> results = compare_rankings(rank_a, rank_b, method="all")
        >>> round(results["fraction_mismatched"], 2)
        0.4
    """
    allowed_methods = {"kendall", "spearman", "weighted_kendall", "all"}
    if method not in allowed_methods:
        raise ValueError(
            f"method must be one of {sorted(allowed_methods)}; got {method!r}"
        )

    n = len(ranked_list_a) if len(ranked_list_a) == len(ranked_list_b) else 0
    if n == 0:
        raise ValueError("Ranked lists must have the same non-zero length.")

    g = np.asarray(ranked_list_a)
    t = np.asarray(ranked_list_b)

    if not (np.issubdtype(g.dtype, np.number) and np.issubdtype(t.dtype, np.number)):
        raise TypeError("ranked lists must be numeric.")

    if not (np.isfinite(g).all() and np.isfinite(t).all()):
        raise ValueError("ranked lists must not contain NaN or inf.")

    diffs = (t - g).astype(float)
    fraction_mismatched = float(np.sum(diffs != 0) / n)

    max_disp = float(np.max(np.abs(diffs))) / (n - 1) if n > 1 else 0.0

    tau_res = kendalltau(g, t)
    rho_res = spearmanr(g, t)
    wtau_res = weightedtau(g, t)

    results = {
        "kendall": (float(tau_res.statistic), float(tau_res.pvalue)),
        "spearman": (float(rho_res.statistic), float(rho_res.pvalue)),
        # Note: scipy.stats.weightedtau does not compute p-values, so pvalue is always NaN
        "weighted_kendall": (float(wtau_res.statistic), float(wtau_res.pvalue)),
        "all": {
            "kendalltau": (float(tau_res.statistic), float(tau_res.pvalue)),
            "spearmanr": (float(rho_res.statistic), float(rho_res.pvalue)),
            "weighted_kendalltau": (float(wtau_res.statistic), float(wtau_res.pvalue)),
            "fraction_mismatched": fraction_mismatched,
            "max_disp": max_disp,
        },
    }

    return results[method]



# Lehmer code ranking hash functions (for permutations without ties)



[docs]
def lehmer_hash(ranked_list):
    """
    Convert a permutation to its Lehmer code (factorial number system).

    The Lehmer code provides a bijection between permutations and integers
    in the range [0, n!-1], useful for hashing, indexing, or enumerating
    all possible permutations.

    Args:
        ranked_list: Permutation of integers ``0..n-1`` as a list or array.
            Must contain no ties (all values unique).

    Returns:
        int: Unique hash value in range [0, n!-1] where n = len(ranked_list).

    Notes:
        - Time complexity: O(n²) due to inversion counting
        - Space complexity: O(1)
        - This function does NOT handle ties. All elements must be distinct.
        - Pre-computes factorials for efficiency

    Algorithm:
        For each position i, counts inversions (elements to the right that
        are smaller) and encodes them in the factorial number system.

    Examples:
        >>> lehmer_hash([0, 1, 2])
        0
        >>> lehmer_hash([2, 1, 0])
        5
        >>> lehmer_hash([0, 2, 1])
        1
        >>> lehmer_hash([1, 2, 0])
        3

    References:
        Lehmer, D. H. (1960). Teaching combinatorial tricks to a computer.
        In *Combinatorial Analysis* (Proceedings of Symposia in Applied Mathematics,
        Vol. 10, pp. 179–193). American Mathematical Society. MR 0113289.
    """
    perm = list(ranked_list)
    n = len(perm)

    if any(not isinstance(x, (int, np.integer)) for x in perm):
        raise TypeError("ranked_list must be a permutation of integers 0..n-1.")
    if set(perm) != set(range(n)):
        raise ValueError("ranked_list must be a permutation of 0..n-1 with no ties.")

    # Pre-compute factorials to avoid repeated computation
    factorials = [1] * n
    for i in range(1, n):
        factorials[i] = factorials[i - 1] * i

    hash_value = 0
    for i in range(n):
        # Count inversions: elements to the right that are smaller
        inversions = sum(1 for j in range(i + 1, n) if perm[j] < perm[i])
        hash_value += inversions * factorials[n - 1 - i]

    return hash_value




[docs]
def lehmer_unhash(hash_value, n):
    """
    Convert a Lehmer code (hash) back to its permutation.

    Inverse operation of lehmer_hash. Reconstructs the original permutation
    from its integer representation in the factorial number system.

    Args:
        hash_value: Integer in range [0, n!-1] representing a permutation.
        n: Length of the permutation to generate.

    Returns:
        list: Permutation of integers [0, 1, ..., n-1] corresponding to hash_value.

    Raises:
        ValueError: If hash_value >= n! (invalid hash for given n).

    Notes:
        - Time complexity: O(n²) due to element removal tracking
        - Space complexity: O(n)
        - Pre-computes factorials for efficiency

    Examples:
        >>> lehmer_unhash(0, 3)
        [0, 1, 2]
        >>> lehmer_unhash(5, 3)
        [2, 1, 0]
        >>> lehmer_unhash(1, 3)
        [0, 2, 1]
        >>> lehmer_unhash(3, 3)
        [1, 2, 0]

    References:
        Lehmer, D. H. (1960). Teaching combinatorial tricks to a computer.
        In *Combinatorial Analysis* (Proceedings of Symposia in Applied Mathematics,
        Vol. 10, pp. 179–193). American Mathematical Society. MR 0113289.
    """
    # Validate input
    max_hash = math.factorial(n)
    if hash_value < 0 or hash_value >= max_hash:
        raise ValueError(
            f"hash_value must be in range 0..{n}!-1 = {max_hash - 1}; got {hash_value}"
        )

    # Pre-compute factorials to avoid repeated computation
    factorials = [1] * n
    for i in range(1, n):
        factorials[i] = factorials[i - 1] * i

    # Track which elements have been used
    available = list(range(n))
    result = []

    for i in range(n):
        f = factorials[n - 1 - i]
        idx = hash_value // f
        hash_value %= f
        result.append(available.pop(idx))

    return result



def ordered_bell(n: int):
    """
    Compute Fubini numbers (ordered Bell numbers) F[0..n].

    F[n] counts the number of weak orderings (rankings with ties) on n elements.
    """
    F = [0] * (n + 1)
    F[0] = 1
    for m in range(1, n + 1):
        s = 0
        for k in range(1, m + 1):
            s += math.comb(m, k) * F[m - k]
        F[m] = s
    return F


def comb_rank_lex(indices, n, k):
    """
    Rank a k-combination given by sorted indices in lexicographic order.

    Args:
        indices: Sorted list of k integers from 0..n-1
        n: Size of ground set
        k: Size of combination

    Returns:
        Integer rank in [0, C(n,k)-1]
    """
    r = 0
    prev = -1
    for pos in range(k):
        start = prev + 1
        end = indices[pos]
        remaining = k - pos - 1
        for x in range(start, end):
            r += math.comb(n - 1 - x, remaining)
        prev = indices[pos]
    return r


def comb_unrank_lex(r: int, n: int, k: int):
    """
    Unrank the r-th k-combination of {0..n-1} in lexicographic order.

    Args:
        r: Combination rank
        n: Size of ground set
        k: Size of combination

    Returns:
        Sorted list of k integers

    Raises:
        ValueError: If rank is out of range
    """
    if k == 0:
        return []
    if r < 0 or r >= math.comb(n, k):
        raise ValueError("Combination rank out of range.")
    combo = []
    x = 0
    for pos in range(k):
        rem = k - pos - 1
        while True:
            cnt = math.comb(n - 1 - x, rem) if (n - 1 - x) >= rem else 0
            if r < cnt:
                combo.append(x)
                x += 1
                break
            r -= cnt
            x += 1
    return combo


def blocks_from_rank_list(rank_list, tol=1e-12):
    """
    Convert a ranking to canonical ordered tie blocks.

    Args:
        rank_list: List where rank_list[i] is the rank of item i
        tol: Tolerance for comparing float ranks

    Returns:
        List of tie blocks (each block is a sorted list of item IDs)
        ordered from best to worst rank
    """
    r = np.asarray(rank_list, dtype=float)
    n = len(r)
    if n == 0:
        return []
    ids = np.arange(n)

    # Sort by rank ascending (best first), then by ID for determinism
    order = np.lexsort((ids, r))
    r_sorted = r[order]
    ids_sorted = ids[order]

    blocks = []
    cur = [int(ids_sorted[0])]
    for i in range(1, n):
        if abs(r_sorted[i] - r_sorted[i - 1]) <= tol:
            cur.append(int(ids_sorted[i]))
        else:
            blocks.append(sorted(cur))
            cur = [int(ids_sorted[i])]
    blocks.append(sorted(cur))
    return blocks



[docs]
def ranking_hash(rank_list, tol=1e-12):
    """
    Perfect collision-free hash for rankings with ties.

    Encodes any ranking (with or without ties) into a unique integer using
    ordered Bell numbers (Fubini numbers). This is the theoretically optimal
    approach that fully preserves all ranking information.

    Args:
        rank_list: List where rank_list[i] is the rank of item i.
                  Lower values = better ranks. Ties are represented by
                  equal values. Example: [0, 1, 1, 3] means item 0 is first,
                  items 1 and 2 are tied for second, item 3 is fourth.
        tol: Tolerance for treating float ranks as equal (default: 1e-12).

    Returns:
        int: Unique hash value in range [0, F(n)-1] where F(n) is the
             n-th ordered Bell number. F(n) = exact count of all possible
             rankings with ties for n items.

    Notes:
        - Time complexity: O(n²) for encoding
        - Space complexity: O(n) for storing ordered Bell numbers
        - **Collision-free**: Different rankings always get different hashes
        - **Complete encoding**: Hash fully captures both order and tie structure
        - **Optimal**: Uses exactly log₂(F(n)) bits for F(n) possible rankings

    Algorithm:
        Uses ordered Bell (Fubini) numbers to enumerate all weak orderings.
        For each tie group, encodes both the subset selection (which items)
        and the lexicographic ordering within the subset.

    Examples:
        >>> ranking_hash([0, 1, 2])  # No ties
        0
        >>> ranking_hash([0, 1, 1])  # Items 1,2 tied at second place
        2
        >>> ranking_hash([0, 0, 1])  # Items 0,1 tied at first place
        9
        >>> ranking_hash([0, 0, 0])  # All tied
        12
        >>> ranking_hash([1, 0, 1])  # Items 0,2 tied at rank 1, item 1 at rank 0
        5

    Comparison to factorial-based hashing:
        For example, ``ranking_hash([0, 1, 1]) == 2``,
        ``ranking_hash([0, 0, 1]) == 9``, and ``ranking_hash([1, 0, 1]) == 5``.
        These hashes differ, unlike simpler approaches that lose tie information.

    References:
        Ordered Bell numbers (a.k.a. Fubini numbers), OEIS A000670:
        https://en.wikipedia.org/wiki/Ordered_Bell_number
        https://oeis.org/A000670

        Weak orderings / total preorders (rankings with ties):
        https://en.wikipedia.org/wiki/Total_preorder

        Combinatorial number system (ranking/unranking k-subsets):
        https://en.wikipedia.org/wiki/Combinatorial_number_system

    See Also:
        :func:`unhash_ranking`: Inverse operation to reconstruct ranking from hash.
    """
    blocks = blocks_from_rank_list(rank_list, tol=tol)
    n = len(rank_list)
    F = ordered_bell(n)

    remaining = list(range(n))  # Ground set by ID order
    remaining_set = set(remaining)

    h = 0
    for block in blocks:
        m = len(remaining)
        k = len(block)

        # Add contributions from all smaller tie-group sizes
        for s in range(1, k):
            h += math.comb(m, s) * F[m - s]

        # Add contribution from lexicographic position within this size
        pos = {v: i for i, v in enumerate(remaining)}
        idx = [pos[v] for v in block]  # Already sorted
        subset_rank = comb_rank_lex(idx, m, k)
        h += subset_rank * F[m - k]

        # Remove this tie group from remaining items
        remaining_set.difference_update(block)
        remaining = [x for x in remaining if x in remaining_set]

    return h




[docs]
def unhash_ranking(h: int, n: int):
    """
    Reconstruct ranking with ties from its hash value.

    Inverse operation of ranking_hash. Decodes a hash back to the original
    ranking, returning competition ranks (min-rank with gaps).

    Args:
        h: Hash value in range [0, F(n)-1]
        n: Number of items to rank

    Returns:
        list: Ranking in competition format (min-rank with gaps).
              Example: [1, 2, 2, 4] means item 0 is rank 1,
              items 1 and 2 are tied at rank 2, item 3 is rank 4.

    Raises:
        ValueError: If h is out of valid range [0, F(n)-1]

    Notes:
        - Time complexity: O(n²) for decoding
        - Space complexity: O(n) for ordered Bell numbers
        - Returns competition/min-rank format with gaps after ties
        - Fully reconstructs original ranking including tie structure

    Examples:
        >>> unhash_ranking(0, 3)  # First possible ranking
        [1, 2, 3]
        >>> unhash_ranking(2, 3)  # Items 1,2 tied
        [1, 2, 2]
        >>> unhash_ranking(9, 3)  # Items 0,1 tied
        [1, 1, 3]
        >>> unhash_ranking(12, 3)  # All tied
        [1, 1, 1]
        >>> unhash_ranking(5, 3)  # Items 0,2 tied at rank 2
        [2, 1, 2]

    See Also:
        :func:`ranking_hash`: Inverse operation to hash a ranking.
    """
    F = ordered_bell(n)
    if h < 0 or h >= F[n]:
        raise ValueError(f"h out of range for n={n}. Must be 0..{F[n] - 1}.")

    remaining = list(range(n))  # IDs 0..n-1
    rank_list = [0] * n
    cur_rank = 1

    while remaining:
        m = len(remaining)

        # 1) Determine tie-group size k
        offset = 0
        for k in range(1, m + 1):
            cnt = math.comb(m, k) * F[m - k]
            if h < offset + cnt:
                h -= offset
                break
            offset += cnt
        else:
            raise RuntimeError("Unhashing failed.")

        # 2) Determine which subset and suffix hash
        suffix = F[m - k]
        subset_rank = h // suffix
        h = h % suffix

        # 3) Unrank subset, assign competition rank
        idx = comb_unrank_lex(subset_rank, m, k)
        group_ids = [remaining[i] for i in idx]
        for item in group_ids:
            rank_list[item] = cur_rank

        # 4) Remove group, advance rank (with gaps)
        chosen = set(group_ids)
        remaining = [x for x in remaining if x not in chosen]
        cur_rank += k

    return rank_list



__all__ = [
    "rank_scores",
    "compare_rankings",
    "lehmer_hash",
    "lehmer_unhash",
    "ranking_hash",
    "unhash_ranking",
]