Source code for scorio.rank.serial_rank

r"""
SerialRank: spectral ranking by seriation.

This module implements the SerialRank construction of
Fogel, d'Aspremont, and Vojnovic.

Notation
--------

Let :math:`R \in \{0,1\}^{L \times M \times N}` and build a skew-symmetric
pairwise comparison matrix :math:`C`, with :math:`C_{ij}>0` indicating
evidence that model :math:`i` is stronger than model :math:`j`.

SerialRank forms

.. math::
    S = \frac{1}{2}\left(L\mathbf{1}\mathbf{1}^{\top} + C C^{\top}\right),
    \qquad
    L_S = \operatorname{diag}(S\mathbf{1}) - S,

and ranks models by sorting a Fiedler vector of :math:`L_S` (the eigenvector
associated with the second-smallest eigenvalue).
"""

import numpy as np

from scorio.utils import rank_scores

from ._base import build_pairwise_counts, validate_input
from ._types import RankMethod, RankResult


def _comparison_matrix_from_counts(
    wins: np.ndarray, ties: np.ndarray, comparison: str
) -> np.ndarray:
    """
    Build a skew-symmetric comparison matrix C from pairwise win and tie counts.

    C[i,j] > 0  => i is preferred to j.
    """
    comparison = str(comparison)

    if comparison in {"prob_diff", "fractional"}:
        total = wins + wins.T + ties
        C = np.zeros_like(wins, dtype=float)
        mask = total > 0
        C[mask] = (wins[mask] - wins.T[mask]) / total[mask]
        np.fill_diagonal(C, 0.0)
        return C

    if comparison in {"sign", "majority"}:
        diff = wins - wins.T
        C = np.sign(diff).astype(float)
        np.fill_diagonal(C, 0.0)
        return C

    raise ValueError('comparison must be "prob_diff" or "sign"')


def _serialrank_similarity(C: np.ndarray) -> np.ndarray:
    """
    Similarity matrix S_match from the SerialRank paper:

        S = Σ_k (1 + C_{ik} C_{jk})/2  = 1/2 (n 11^T + C C^T)
    """
    C = np.asarray(C, dtype=float)
    n = C.shape[0]
    ones = np.ones((n, n), dtype=float)
    S = 0.5 * (n * ones + (C @ C.T))
    return S


def _laplacian(S: np.ndarray) -> np.ndarray:
    S = np.asarray(S, dtype=float)
    d = S.sum(axis=1)
    return np.diag(d) - S


def _fiedler_vector(L: np.ndarray) -> tuple[np.ndarray, bool]:
    """
    Return a Fiedler vector and whether the Fiedler eigenspace is one-dimensional.

    SerialRank requires sorting the eigenvector associated with the second-smallest
    Laplacian eigenvalue. If that eigenvalue is repeated, the Fiedler vector is not
    unique and any basis vector is arbitrary for ranking purposes.
    """
    w, V = np.linalg.eigh(L)
    if V.shape[1] < 2:
        return np.ones(L.shape[0], dtype=float), False

    v = V[:, 1]
    if V.shape[1] == 2:
        return v, True

    scale = max(1.0, float(np.max(np.abs(w))))
    eigengap = float(w[2] - w[1])
    unique = bool(np.isfinite(eigengap) and eigengap > 1e-10 * scale)
    return v, unique


def _orientation_key(scores: np.ndarray, C: np.ndarray) -> tuple[int, float, float]:
    """
    Decide between the two possible orientations (scores vs -scores).

    Returns a key to be minimized: (unweighted_upsets, weighted_upsets, -corr).
    """
    scores = np.asarray(scores, dtype=float)
    C = np.asarray(C, dtype=float)
    n = scores.size

    diff = scores[:, None] - scores[None, :]

    mask = np.triu(np.ones((n, n), dtype=bool), 1)
    c = C[mask]
    pred = np.sign(diff[mask])

    nz = c != 0
    if not np.any(nz):
        return (0, 0.0, 0.0)

    c = c[nz]
    pred = pred[nz]

    disagree = (pred == 0) | ((pred * c) < 0)
    upsets = int(np.sum(disagree))
    w_upsets = float(np.sum(np.abs(c[disagree])))
    corr = float(np.sum(pred * c))

    return (upsets, w_upsets, -corr)



[docs]
def serial_rank(
    R: np.ndarray,
    comparison: str = "prob_diff",
    method: RankMethod = "competition",
    return_scores: bool = False,
) -> RankResult:
    """
    Rank models with SerialRank spectral seriation.

    Method context:
        SerialRank builds a similarity matrix from pairwise comparisons, computes
        its graph Laplacian, and orders models by a Fiedler vector (second-smallest
        Laplacian eigenvector). This is a seriation-based ranking method designed to
        be robust to noisy pairwise outcomes.

    References:
        Fogel, F., d'Aspremont, A., & Vojnovic, M. (2016).
        Spectral Ranking Using Seriation. Journal of Machine Learning Research.
        https://jmlr.org/papers/v17/16-035.html

    Args:
        R: Binary tensor of shape (L, M, N) (or (L, M) treated as N=1).
        comparison: How to aggregate multiple comparisons between a pair:
            - "prob_diff": C_ij = (wins_ij - wins_ji) / total_ij in [-1, 1]
            - "sign":      C_ij = sign(wins_ij - wins_ji) in {-1, 0, 1}
        method: Ranking method passed to `scorio.utils.rank_scores`.
        return_scores: If True, return (ranking, scores) where scores are the
            oriented Fiedler vector (higher ⇒ better).

    Returns:
        Ranking array of shape ``(L,)``.
        If ``return_scores=True``, returns ``(ranking, scores)`` with scores of
        shape ``(L,)``.

    Notation:
        ``L``: number of models.
        ``W_ij``: number of decisive outcomes where model ``i`` beats ``j``.
        ``T_ij``: number of ties between models ``i`` and ``j``.
        ``C``: skew-symmetric comparison matrix.
        ``S``: SerialRank similarity matrix.
        ``L_S``: graph Laplacian of ``S``.

    Formula:
        .. math::
            C_{ij} = \\frac{W_{ij} - W_{ji}}{W_{ij} + W_{ji} + T_{ij}},
            \\quad C_{ii} = 0

        .. math::
            S = \\frac{1}{2}\\left(L\\,\\mathbf{1}\\mathbf{1}^{\\top} + C C^{\\top}\\right),
            \\quad
            L_S = \\operatorname{diag}(S\\mathbf{1}) - S

        Rank by sorting a Fiedler vector of ``L_S``. The sign is chosen to best
        align with observed pairwise comparisons.

    Examples:
        >>> import numpy as np
        >>> from scorio import rank
        >>> R = np.array([[[1, 1], [1, 0]], [[0, 0], [0, 1]]])
        >>> rank.serial_rank(R).tolist()
        [1, 2]

    Notes:
        The original SerialRank paper sets ``C_ii = 1`` for binary tournaments.
        In this implementation we keep ``C`` skew-symmetric with ``C_ii = 0``;
        this changes ``S`` only by a constant diagonal shift and leaves the
        Laplacian (hence the ranking) unchanged.

        If the Fiedler eigenvalue is not simple, the ordering is not identifiable.
        In that degenerate case, the method falls back to mean accuracy scores.
    """
    R = validate_input(R)

    wins, ties = build_pairwise_counts(R)
    C = _comparison_matrix_from_counts(wins, ties, comparison=comparison)

    S = _serialrank_similarity(C)
    Ls = _laplacian(S)

    v, is_unique = _fiedler_vector(Ls)
    if (not is_unique) or (not np.all(np.isfinite(v))) or np.allclose(v, v[0]):
        scores = R.mean(axis=(1, 2))
        ranking = rank_scores(scores)[method]
        return (ranking, scores) if return_scores else ranking

    # Choose the orientation that best agrees with observed comparisons.
    key_pos = _orientation_key(v, C)
    key_neg = _orientation_key(-v, C)
    scores = v if key_pos <= key_neg else -v

    # Stabilize for degenerate (nearly tied) cases.
    if np.std(scores) < 1e-12:
        scores = R.mean(axis=(1, 2))

    ranking = rank_scores(scores)[method]
    return (ranking, scores) if return_scores else ranking



__all__ = ["serial_rank"]