Source code for scorio.utils

import math
from collections.abc import Sequence
from typing import Literal

import numpy as np
from scipy.stats import kendalltau, norm, rankdata, spearmanr, weightedtau


[docs] def rank_scores( scores_in_id_order: Sequence[float] | np.ndarray, tol: float = 1e-12, sigmas_in_id_order: Sequence[float] | np.ndarray | None = None, confidence: float = 0.95, ci_tie_method: Literal[ "zscore_adjacent", "ci_overlap_adjacent" ] = "zscore_adjacent", ) -> dict[str, np.ndarray]: """ Convert scores to ranks using multiple ranking methods (with optional confidence-aware ties). This is a utility that many evaluation scripts already use: it converts a list of scores into four standard rank conventions (competition/min, competition/max, dense, average/fractional). Higher scores get better (lower) ranks. Extension: if `sigmas_in_id_order` is provided, this function ALSO returns a second set of ranks that treats two adjacent scores as tied when they are not separable at the requested confidence level. Args: scores_in_id_order: Scores aligned by ID order. tol: Tolerance threshold for treating scores as equal (default: 1e-12). sigmas_in_id_order: Optional per-score uncertainty aligned by ID order. If provided, uncertainty-aware ranks are added under keys suffixed with "_ci". confidence: Confidence used for the uncertainty-aware tie rule. ci_tie_method: How to decide ties when sigmas are provided: - "zscore_adjacent" (paper Sec. 2.9): tie if z < Φ^{-1}(confidence) - "ci_overlap_adjacent": tie if confidence-level CIs overlap Returns: dict with four base ranking methods: - "competition": Min-rank competition (1, 2, 2, 4) - "competition_max": Max-rank competition (1, 3, 3, 4) - "dense": Dense ranking (1, 2, 2, 3) - "avg": Fractional/average ranking (1, 2.5, 2.5, 4) If `sigmas_in_id_order` is provided, four extra keys are included: - "competition_ci" - "competition_max_ci" - "dense_ci" - "avg_ci" Examples: >>> import numpy as np >>> scores = [95.0, 87.5, 87.5, 80.0, 75.0] >>> ranks = rank_scores(scores) >>> ranks["competition"].tolist() == [1, 2, 2, 4, 5] True >>> ranks["dense"].tolist() == [1, 2, 2, 3, 4] True With uncertainty: the top two are close and get tied at 95%: >>> sigmas = [1.0, 1.0, 0.1, 0.1, 0.1] >>> ranks2 = rank_scores(scores, sigmas_in_id_order=sigmas, confidence=0.95) >>> "dense_ci" in ranks2 True """ scores = np.asarray(scores_in_id_order, dtype=float) if scores.ndim != 1: raise ValueError("scores_in_id_order must be a 1D sequence.") order = np.argsort(-scores) # descending order sorted_scores = scores[order] # Group near-equal scores (within tolerance) grouped_scores = sorted_scores.copy() for i in range(1, len(grouped_scores)): if abs(grouped_scores[i] - grouped_scores[i - 1]) <= tol: grouped_scores[i] = grouped_scores[i - 1] def ranker(method: str, arr_sorted: np.ndarray) -> np.ndarray: ranks_sorted = rankdata(-arr_sorted, method=method) ranks = np.empty_like(ranks_sorted) ranks[order] = ranks_sorted return ranks out: dict[str, np.ndarray] = { "competition": ranker("min", grouped_scores), # 1,2,2,4,5 "competition_max": ranker("max", grouped_scores), # 1,3,3,4,5 "dense": ranker("dense", grouped_scores), # 1,2,2,3,4 "avg": ranker("average", grouped_scores), # 1.0,2.5,2.5,4.0,5.0 } if sigmas_in_id_order is not None: sigmas = np.asarray(sigmas_in_id_order, dtype=float) if sigmas.shape != scores.shape: raise ValueError("sigmas_in_id_order must have the same length as scores.") mus_s = scores[order] sig_s = sigmas[order] ci_grouped = grouped_scores.copy() if ci_tie_method == "zscore_adjacent": z_thresh = float(norm.ppf(confidence)) # one-sided for i in range(1, len(ci_grouped)): # If already equal by tol, keep tied if abs(ci_grouped[i] - ci_grouped[i - 1]) <= tol: ci_grouped[i] = ci_grouped[i - 1] continue denom = math.sqrt(sig_s[i - 1] ** 2 + sig_s[i] ** 2) if denom == 0.0: continue z = abs(mus_s[i - 1] - mus_s[i]) / denom if z < z_thresh: ci_grouped[i] = ci_grouped[i - 1] elif ci_tie_method == "ci_overlap_adjacent": z = float(norm.ppf(0.5 + confidence / 2.0)) for i in range(1, len(ci_grouped)): if abs(ci_grouped[i] - ci_grouped[i - 1]) <= tol: ci_grouped[i] = ci_grouped[i - 1] continue lo_prev = mus_s[i - 1] - z * sig_s[i - 1] hi_cur = mus_s[i] + z * sig_s[i] if lo_prev <= hi_cur: ci_grouped[i] = ci_grouped[i - 1] else: raise ValueError("Unknown ci_tie_method.") out.update( { "competition_ci": ranker("min", ci_grouped), "competition_max_ci": ranker("max", ci_grouped), "dense_ci": ranker("dense", ci_grouped), "avg_ci": ranker("average", ci_grouped), } ) return out
[docs] def compare_rankings( ranked_list_a, ranked_list_b, method="all", ): """ Compare two rankings using multiple correlation metrics. Computes Kendall's tau, Spearman's rho, and weighted Kendall's tau to measure agreement between two rankings. Args: ranked_list_a: First ranking (numeric array or list). ranked_list_b: Second ranking (numeric array or list). method: Which metric to return: "kendall", "spearman", "weighted_kendall", or "all" (default). Returns: If method is not "all", returns a ``(statistic, pvalue)`` tuple. If method is "all", returns a dictionary with: ``kendalltau``, ``spearmanr``, ``weighted_kendalltau``, ``fraction_mismatched``, and ``max_disp``. Raises: ValueError: If lists have different lengths or contain NaN/inf. TypeError: If lists are not numeric. Notes: ``scipy.stats.weightedtau`` does not compute p-values (pvalue is NaN). Rankings are compared element-wise at matching indices. Examples: >>> import numpy as np >>> rank_a = [1, 2, 3, 4, 5] >>> rank_b = [1, 3, 2, 4, 5] >>> tau, pval = compare_rankings(rank_a, rank_b, method="kendall") >>> round(tau, 2) 0.8 >>> results = compare_rankings(rank_a, rank_b, method="all") >>> round(results["fraction_mismatched"], 2) 0.4 """ allowed_methods = {"kendall", "spearman", "weighted_kendall", "all"} if method not in allowed_methods: raise ValueError( f"method must be one of {sorted(allowed_methods)}; got {method!r}" ) n = len(ranked_list_a) if len(ranked_list_a) == len(ranked_list_b) else 0 if n == 0: raise ValueError("Ranked lists must have the same non-zero length.") g = np.asarray(ranked_list_a) t = np.asarray(ranked_list_b) if not (np.issubdtype(g.dtype, np.number) and np.issubdtype(t.dtype, np.number)): raise TypeError("ranked lists must be numeric.") if not (np.isfinite(g).all() and np.isfinite(t).all()): raise ValueError("ranked lists must not contain NaN or inf.") diffs = (t - g).astype(float) fraction_mismatched = float(np.sum(diffs != 0) / n) max_disp = float(np.max(np.abs(diffs))) / (n - 1) if n > 1 else 0.0 tau_res = kendalltau(g, t) rho_res = spearmanr(g, t) wtau_res = weightedtau(g, t) results = { "kendall": (float(tau_res.statistic), float(tau_res.pvalue)), "spearman": (float(rho_res.statistic), float(rho_res.pvalue)), # Note: scipy.stats.weightedtau does not compute p-values, so pvalue is always NaN "weighted_kendall": (float(wtau_res.statistic), float(wtau_res.pvalue)), "all": { "kendalltau": (float(tau_res.statistic), float(tau_res.pvalue)), "spearmanr": (float(rho_res.statistic), float(rho_res.pvalue)), "weighted_kendalltau": (float(wtau_res.statistic), float(wtau_res.pvalue)), "fraction_mismatched": fraction_mismatched, "max_disp": max_disp, }, } return results[method]
# Lehmer code ranking hash functions (for permutations without ties)
[docs] def lehmer_hash(ranked_list): """ Convert a permutation to its Lehmer code (factorial number system). The Lehmer code provides a bijection between permutations and integers in the range [0, n!-1], useful for hashing, indexing, or enumerating all possible permutations. Args: ranked_list: Permutation of integers ``0..n-1`` as a list or array. Must contain no ties (all values unique). Returns: int: Unique hash value in range [0, n!-1] where n = len(ranked_list). Notes: - Time complexity: O(n²) due to inversion counting - Space complexity: O(1) - This function does NOT handle ties. All elements must be distinct. - Pre-computes factorials for efficiency Algorithm: For each position i, counts inversions (elements to the right that are smaller) and encodes them in the factorial number system. Examples: >>> lehmer_hash([0, 1, 2]) 0 >>> lehmer_hash([2, 1, 0]) 5 >>> lehmer_hash([0, 2, 1]) 1 >>> lehmer_hash([1, 2, 0]) 3 References: Lehmer, D. H. (1960). Teaching combinatorial tricks to a computer. In *Combinatorial Analysis* (Proceedings of Symposia in Applied Mathematics, Vol. 10, pp. 179–193). American Mathematical Society. MR 0113289. """ perm = list(ranked_list) n = len(perm) if any(not isinstance(x, (int, np.integer)) for x in perm): raise TypeError("ranked_list must be a permutation of integers 0..n-1.") if set(perm) != set(range(n)): raise ValueError("ranked_list must be a permutation of 0..n-1 with no ties.") # Pre-compute factorials to avoid repeated computation factorials = [1] * n for i in range(1, n): factorials[i] = factorials[i - 1] * i hash_value = 0 for i in range(n): # Count inversions: elements to the right that are smaller inversions = sum(1 for j in range(i + 1, n) if perm[j] < perm[i]) hash_value += inversions * factorials[n - 1 - i] return hash_value
[docs] def lehmer_unhash(hash_value, n): """ Convert a Lehmer code (hash) back to its permutation. Inverse operation of lehmer_hash. Reconstructs the original permutation from its integer representation in the factorial number system. Args: hash_value: Integer in range [0, n!-1] representing a permutation. n: Length of the permutation to generate. Returns: list: Permutation of integers [0, 1, ..., n-1] corresponding to hash_value. Raises: ValueError: If hash_value >= n! (invalid hash for given n). Notes: - Time complexity: O(n²) due to element removal tracking - Space complexity: O(n) - Pre-computes factorials for efficiency Examples: >>> lehmer_unhash(0, 3) [0, 1, 2] >>> lehmer_unhash(5, 3) [2, 1, 0] >>> lehmer_unhash(1, 3) [0, 2, 1] >>> lehmer_unhash(3, 3) [1, 2, 0] References: Lehmer, D. H. (1960). Teaching combinatorial tricks to a computer. In *Combinatorial Analysis* (Proceedings of Symposia in Applied Mathematics, Vol. 10, pp. 179–193). American Mathematical Society. MR 0113289. """ # Validate input max_hash = math.factorial(n) if hash_value < 0 or hash_value >= max_hash: raise ValueError( f"hash_value must be in range 0..{n}!-1 = {max_hash - 1}; got {hash_value}" ) # Pre-compute factorials to avoid repeated computation factorials = [1] * n for i in range(1, n): factorials[i] = factorials[i - 1] * i # Track which elements have been used available = list(range(n)) result = [] for i in range(n): f = factorials[n - 1 - i] idx = hash_value // f hash_value %= f result.append(available.pop(idx)) return result
def ordered_bell(n: int): """ Compute Fubini numbers (ordered Bell numbers) F[0..n]. F[n] counts the number of weak orderings (rankings with ties) on n elements. """ F = [0] * (n + 1) F[0] = 1 for m in range(1, n + 1): s = 0 for k in range(1, m + 1): s += math.comb(m, k) * F[m - k] F[m] = s return F def comb_rank_lex(indices, n, k): """ Rank a k-combination given by sorted indices in lexicographic order. Args: indices: Sorted list of k integers from 0..n-1 n: Size of ground set k: Size of combination Returns: Integer rank in [0, C(n,k)-1] """ r = 0 prev = -1 for pos in range(k): start = prev + 1 end = indices[pos] remaining = k - pos - 1 for x in range(start, end): r += math.comb(n - 1 - x, remaining) prev = indices[pos] return r def comb_unrank_lex(r: int, n: int, k: int): """ Unrank the r-th k-combination of {0..n-1} in lexicographic order. Args: r: Combination rank n: Size of ground set k: Size of combination Returns: Sorted list of k integers Raises: ValueError: If rank is out of range """ if k == 0: return [] if r < 0 or r >= math.comb(n, k): raise ValueError("Combination rank out of range.") combo = [] x = 0 for pos in range(k): rem = k - pos - 1 while True: cnt = math.comb(n - 1 - x, rem) if (n - 1 - x) >= rem else 0 if r < cnt: combo.append(x) x += 1 break r -= cnt x += 1 return combo def blocks_from_rank_list(rank_list, tol=1e-12): """ Convert a ranking to canonical ordered tie blocks. Args: rank_list: List where rank_list[i] is the rank of item i tol: Tolerance for comparing float ranks Returns: List of tie blocks (each block is a sorted list of item IDs) ordered from best to worst rank """ r = np.asarray(rank_list, dtype=float) n = len(r) if n == 0: return [] ids = np.arange(n) # Sort by rank ascending (best first), then by ID for determinism order = np.lexsort((ids, r)) r_sorted = r[order] ids_sorted = ids[order] blocks = [] cur = [int(ids_sorted[0])] for i in range(1, n): if abs(r_sorted[i] - r_sorted[i - 1]) <= tol: cur.append(int(ids_sorted[i])) else: blocks.append(sorted(cur)) cur = [int(ids_sorted[i])] blocks.append(sorted(cur)) return blocks
[docs] def ranking_hash(rank_list, tol=1e-12): """ Perfect collision-free hash for rankings with ties. Encodes any ranking (with or without ties) into a unique integer using ordered Bell numbers (Fubini numbers). This is the theoretically optimal approach that fully preserves all ranking information. Args: rank_list: List where rank_list[i] is the rank of item i. Lower values = better ranks. Ties are represented by equal values. Example: [0, 1, 1, 3] means item 0 is first, items 1 and 2 are tied for second, item 3 is fourth. tol: Tolerance for treating float ranks as equal (default: 1e-12). Returns: int: Unique hash value in range [0, F(n)-1] where F(n) is the n-th ordered Bell number. F(n) = exact count of all possible rankings with ties for n items. Notes: - Time complexity: O(n²) for encoding - Space complexity: O(n) for storing ordered Bell numbers - **Collision-free**: Different rankings always get different hashes - **Complete encoding**: Hash fully captures both order and tie structure - **Optimal**: Uses exactly log₂(F(n)) bits for F(n) possible rankings Algorithm: Uses ordered Bell (Fubini) numbers to enumerate all weak orderings. For each tie group, encodes both the subset selection (which items) and the lexicographic ordering within the subset. Examples: >>> ranking_hash([0, 1, 2]) # No ties 0 >>> ranking_hash([0, 1, 1]) # Items 1,2 tied at second place 2 >>> ranking_hash([0, 0, 1]) # Items 0,1 tied at first place 9 >>> ranking_hash([0, 0, 0]) # All tied 12 >>> ranking_hash([1, 0, 1]) # Items 0,2 tied at rank 1, item 1 at rank 0 5 Comparison to factorial-based hashing: For example, ``ranking_hash([0, 1, 1]) == 2``, ``ranking_hash([0, 0, 1]) == 9``, and ``ranking_hash([1, 0, 1]) == 5``. These hashes differ, unlike simpler approaches that lose tie information. References: Ordered Bell numbers (a.k.a. Fubini numbers), OEIS A000670: https://en.wikipedia.org/wiki/Ordered_Bell_number https://oeis.org/A000670 Weak orderings / total preorders (rankings with ties): https://en.wikipedia.org/wiki/Total_preorder Combinatorial number system (ranking/unranking k-subsets): https://en.wikipedia.org/wiki/Combinatorial_number_system See Also: :func:`unhash_ranking`: Inverse operation to reconstruct ranking from hash. """ blocks = blocks_from_rank_list(rank_list, tol=tol) n = len(rank_list) F = ordered_bell(n) remaining = list(range(n)) # Ground set by ID order remaining_set = set(remaining) h = 0 for block in blocks: m = len(remaining) k = len(block) # Add contributions from all smaller tie-group sizes for s in range(1, k): h += math.comb(m, s) * F[m - s] # Add contribution from lexicographic position within this size pos = {v: i for i, v in enumerate(remaining)} idx = [pos[v] for v in block] # Already sorted subset_rank = comb_rank_lex(idx, m, k) h += subset_rank * F[m - k] # Remove this tie group from remaining items remaining_set.difference_update(block) remaining = [x for x in remaining if x in remaining_set] return h
[docs] def unhash_ranking(h: int, n: int): """ Reconstruct ranking with ties from its hash value. Inverse operation of ranking_hash. Decodes a hash back to the original ranking, returning competition ranks (min-rank with gaps). Args: h: Hash value in range [0, F(n)-1] n: Number of items to rank Returns: list: Ranking in competition format (min-rank with gaps). Example: [1, 2, 2, 4] means item 0 is rank 1, items 1 and 2 are tied at rank 2, item 3 is rank 4. Raises: ValueError: If h is out of valid range [0, F(n)-1] Notes: - Time complexity: O(n²) for decoding - Space complexity: O(n) for ordered Bell numbers - Returns competition/min-rank format with gaps after ties - Fully reconstructs original ranking including tie structure Examples: >>> unhash_ranking(0, 3) # First possible ranking [1, 2, 3] >>> unhash_ranking(2, 3) # Items 1,2 tied [1, 2, 2] >>> unhash_ranking(9, 3) # Items 0,1 tied [1, 1, 3] >>> unhash_ranking(12, 3) # All tied [1, 1, 1] >>> unhash_ranking(5, 3) # Items 0,2 tied at rank 2 [2, 1, 2] See Also: :func:`ranking_hash`: Inverse operation to hash a ranking. """ F = ordered_bell(n) if h < 0 or h >= F[n]: raise ValueError(f"h out of range for n={n}. Must be 0..{F[n] - 1}.") remaining = list(range(n)) # IDs 0..n-1 rank_list = [0] * n cur_rank = 1 while remaining: m = len(remaining) # 1) Determine tie-group size k offset = 0 for k in range(1, m + 1): cnt = math.comb(m, k) * F[m - k] if h < offset + cnt: h -= offset break offset += cnt else: raise RuntimeError("Unhashing failed.") # 2) Determine which subset and suffix hash suffix = F[m - k] subset_rank = h // suffix h = h % suffix # 3) Unrank subset, assign competition rank idx = comb_unrank_lex(subset_rank, m, k) group_ids = [remaining[i] for i in idx] for item in group_ids: rank_list[item] = cur_rank # 4) Remove group, advance rank (with gaps) chosen = set(group_ids) remaining = [x for x in remaining if x not in chosen] cur_rank += k return rank_list
__all__ = [ "rank_scores", "compare_rankings", "lehmer_hash", "lehmer_unhash", "ranking_hash", "unhash_ranking", ]