Game Theory

Game Theory — Python Implementations
"""
Game Theory — Python Implementations
=======================================
Companion code for the Optimization curriculum: Game Theory module.

Covers:
  - Normal-form games: dominance, IESDS, best response, pure Nash
  - Nash equilibrium computation: 2×2 indifference, support enumeration
  - Zero-sum games: graphical method, LP formulation, minimax
  - Cooperative games: Shapley value, core membership
  - Dynamic games: backward induction, Vickrey auction, VCG mechanism
  - Learning in games: fictitious play, regret matching, multiplicative weights
  - Congestion games: best-response dynamics, Braess's paradox
  - Multi-robot applications: auction-based task allocation

Usage:
    python game_theory.py          # Run all demos with plots
    python game_theory.py --test   # Run self-tests only
"""

import numpy as np
import matplotlib
matplotlib.use("Agg")  # non-interactive backend for CI
import matplotlib.pyplot as plt
import sys
from itertools import combinations
from math import factorial

def _get_linprog():
    """Lazy import — scipy only needed for LP-based zero-sum solver."""
    from scipy.optimize import linprog
    return linprog


# ============================================================
# 1. Normal Form Games
# ============================================================

class NormalFormGame:
    """Represents a two-player normal-form (strategic) game.

    Attributes
    ----------
    payoff1 : np.ndarray, shape (m, n)
        Payoff matrix for the row player (Player 1).
    payoff2 : np.ndarray, shape (m, n)
        Payoff matrix for the column player (Player 2).
    m : int
        Number of strategies for Player 1.
    n : int
        Number of strategies for Player 2.
    """

    def __init__(self, payoff1, payoff2):
        """Create a two-player normal-form game.

        Parameters
        ----------
        payoff1 : array-like, shape (m, n)
            Row player's payoffs.
        payoff2 : array-like, shape (m, n)
            Column player's payoffs.
        """
        self.payoff1 = np.array(payoff1, dtype=float)
        self.payoff2 = np.array(payoff2, dtype=float)
        assert self.payoff1.shape == self.payoff2.shape, "Payoff matrices must match."
        self.m, self.n = self.payoff1.shape

    def is_dominated(self, player, strategy, strict=True):
        """Check whether *strategy* is (strictly) dominated for *player*.

        Parameters
        ----------
        player : int
            1 for the row player, 2 for the column player.
        strategy : int
            Index of the strategy to check.
        strict : bool
            If True check strict dominance; otherwise weak dominance.

        Returns
        -------
        bool
            True if the strategy is dominated.
        """
        if player == 1:
            payoffs = self.payoff1
            target = payoffs[strategy, :]
            for i in range(self.m):
                if i == strategy:
                    continue
                alt = payoffs[i, :]
                if strict:
                    if np.all(alt > target):
                        return True
                else:
                    if np.all(alt >= target) and np.any(alt > target):
                        return True
        else:
            payoffs = self.payoff2
            target = payoffs[:, strategy]
            for j in range(self.n):
                if j == strategy:
                    continue
                alt = payoffs[:, j]
                if strict:
                    if np.all(alt > target):
                        return True
                else:
                    if np.all(alt >= target) and np.any(alt > target):
                        return True
        return False

    def iesds(self):
        """Iterated Elimination of Strictly Dominated Strategies.

        Returns
        -------
        NormalFormGame
            Reduced game after removing all strictly dominated strategies.
        list of int
            Surviving row indices.
        list of int
            Surviving column indices.
        """
        rows = list(range(self.m))
        cols = list(range(self.n))
        changed = True
        while changed:
            changed = False
            # Eliminate dominated rows
            sub1 = self.payoff1[np.ix_(rows, cols)]
            new_rows = []
            for idx, r in enumerate(rows):
                row_vals = sub1[idx, :]
                dominated = False
                for idx2 in range(len(rows)):
                    if idx2 == idx:
                        continue
                    if np.all(sub1[idx2, :] > row_vals):
                        dominated = True
                        break
                if not dominated:
                    new_rows.append(r)
            if len(new_rows) < len(rows):
                rows = new_rows
                changed = True

            # Eliminate dominated columns
            sub2 = self.payoff2[np.ix_(rows, cols)]
            new_cols = []
            for idx, c in enumerate(cols):
                col_vals = sub2[:, idx]
                dominated = False
                for idx2 in range(len(cols)):
                    if idx2 == idx:
                        continue
                    if np.all(sub2[:, idx2] > col_vals):
                        dominated = True
                        break
                if not dominated:
                    new_cols.append(c)
            if len(new_cols) < len(cols):
                cols = new_cols
                changed = True

        reduced = NormalFormGame(
            self.payoff1[np.ix_(rows, cols)],
            self.payoff2[np.ix_(rows, cols)],
        )
        return reduced, rows, cols

    def best_response(self, player, opponent_strategy):
        """Compute pure-strategy best responses for *player*.

        Parameters
        ----------
        player : int
            1 or 2.
        opponent_strategy : array-like
            Probability vector over the opponent's strategies (mixed strategy).

        Returns
        -------
        list of int
            Indices of all best-response pure strategies.
        """
        opp = np.array(opponent_strategy, dtype=float)
        if player == 1:
            expected = self.payoff1 @ opp  # length m
            mx = np.max(expected)
            return [i for i in range(self.m) if np.isclose(expected[i], mx)]
        else:
            expected = self.payoff2.T @ opp  # length n
            mx = np.max(expected)
            return [j for j in range(self.n) if np.isclose(expected[j], mx)]

    def find_pure_nash(self):
        """Find all pure-strategy Nash equilibria.

        Returns
        -------
        list of (int, int)
            List of (row, col) index pairs that are pure NE.
        """
        equilibria = []
        # For each cell, check if it's a best response for both players
        for i in range(self.m):
            for j in range(self.n):
                # Player 1: is row i best given column j?
                if self.payoff1[i, j] < np.max(self.payoff1[:, j]):
                    continue
                # Player 2: is column j best given row i?
                if self.payoff2[i, j] < np.max(self.payoff2[i, :]):
                    continue
                equilibria.append((i, j))
        return equilibria


# ============================================================
# 2. Nash Equilibrium Computation
# ============================================================

def find_mixed_nash_2x2(game):
    """Find mixed-strategy Nash equilibrium for a 2×2 game.

    Uses the indifference principle: each player mixes such that the
    opponent is indifferent between their pure strategies.

    Parameters
    ----------
    game : NormalFormGame
        Must be 2×2.

    Returns
    -------
    p : np.ndarray, shape (2,)
        Player 1's equilibrium mixed strategy.
    q : np.ndarray, shape (2,)
        Player 2's equilibrium mixed strategy.

    Raises
    ------
    ValueError
        If the game is not 2×2 or no completely mixed NE exists.
    """
    if game.m != 2 or game.n != 2:
        raise ValueError("Game must be 2×2.")

    A = game.payoff1
    B = game.payoff2

    # Player 1 mixes with prob p on row 0 to make Player 2 indifferent.
    # P2's payoff from col 0: p*B[0,0] + (1-p)*B[1,0]
    # P2's payoff from col 1: p*B[0,1] + (1-p)*B[1,1]
    # Set equal: p*(B[0,0]-B[1,0]) + B[1,0] = p*(B[0,1]-B[1,1]) + B[1,1]
    denom_p = (B[0, 0] - B[1, 0]) - (B[0, 1] - B[1, 1])
    if np.isclose(denom_p, 0):
        raise ValueError("No completely mixed NE (Player 2 not indifferent).")
    p0 = (B[1, 1] - B[1, 0]) / denom_p

    # Player 2 mixes with prob q on col 0 to make Player 1 indifferent.
    # P1's payoff from row 0: q*A[0,0] + (1-q)*A[0,1]
    # P1's payoff from row 1: q*A[1,0] + (1-q)*A[1,1]
    denom_q = (A[0, 0] - A[0, 1]) - (A[1, 0] - A[1, 1])
    if np.isclose(denom_q, 0):
        raise ValueError("No completely mixed NE (Player 1 not indifferent).")
    q0 = (A[1, 1] - A[0, 1]) / denom_q

    if not (0.0 <= p0 <= 1.0 and 0.0 <= q0 <= 1.0):
        raise ValueError(f"Mixed NE outside simplex: p={p0:.4f}, q={q0:.4f}.")

    return np.array([p0, 1 - p0]), np.array([q0, 1 - q0])


def support_enumeration(game):
    """Find all Nash equilibria via support enumeration.

    For each pair of support sets (S1 for Player 1, S2 for Player 2) of
    equal size, solve the indifference + probability constraints.

    Parameters
    ----------
    game : NormalFormGame

    Returns
    -------
    list of (np.ndarray, np.ndarray)
        Each element is a (sigma1, sigma2) NE pair.
    """
    equilibria = []

    for k1 in range(1, game.m + 1):
        for k2 in range(1, game.n + 1):
            for supp1 in combinations(range(game.m), k1):
                for supp2 in combinations(range(game.n), k2):
                    result = _solve_support(game, list(supp1), list(supp2))
                    if result is not None:
                        equilibria.append(result)

    # Deduplicate
    unique = []
    for s1, s2 in equilibria:
        is_dup = False
        for u1, u2 in unique:
            if np.allclose(s1, u1) and np.allclose(s2, u2):
                is_dup = True
                break
        if not is_dup:
            unique.append((s1, s2))
    return unique


def _solve_support(game, supp1, supp2):
    """Attempt to solve for an NE with given supports.

    Returns (sigma1, sigma2) or None if no valid solution.
    """
    A = game.payoff1
    B = game.payoff2
    k1 = len(supp1)
    k2 = len(supp2)

    # --- Solve for Player 2's strategy (sigma2) ---
    # Player 1 must be indifferent over supp1 given sigma2.
    # For each pair of consecutive strategies in supp1:
    #   sum_j A[supp1[0], j]*sigma2[j] = sum_j A[supp1[i], j]*sigma2[j]
    # Plus: sum_{j in supp2} sigma2[j] = 1

    # Build system for sigma2 (variables only on supp2)
    rows_eq = []
    rhs_eq = []

    # Indifference equations (k1 - 1 equations)
    for idx in range(1, k1):
        row = np.zeros(k2)
        for jdx, j in enumerate(supp2):
            row[jdx] = A[supp1[0], j] - A[supp1[idx], j]
        rows_eq.append(row)
        rhs_eq.append(0.0)

    # Probability simplex
    rows_eq.append(np.ones(k2))
    rhs_eq.append(1.0)

    C2 = np.array(rows_eq)
    d2 = np.array(rhs_eq)

    try:
        if C2.shape[0] == C2.shape[1]:
            sigma2_supp = np.linalg.solve(C2, d2)
        else:
            sigma2_supp, res, rank, _ = np.linalg.lstsq(C2, d2, rcond=None)
            if not np.allclose(C2 @ sigma2_supp, d2, atol=1e-8):
                return None
    except np.linalg.LinAlgError:
        return None

    if np.any(sigma2_supp < -1e-10):
        return None
    sigma2_supp = np.maximum(sigma2_supp, 0.0)

    # --- Solve for Player 1's strategy (sigma1) ---
    rows_eq2 = []
    rhs_eq2 = []

    for idx in range(1, k2):
        row = np.zeros(k1)
        for jdx, i in enumerate(supp1):
            row[jdx] = B[i, supp2[0]] - B[i, supp2[idx]]
        rows_eq2.append(row)
        rhs_eq2.append(0.0)

    rows_eq2.append(np.ones(k1))
    rhs_eq2.append(1.0)

    C1 = np.array(rows_eq2)
    d1 = np.array(rhs_eq2)

    try:
        if C1.shape[0] == C1.shape[1]:
            sigma1_supp = np.linalg.solve(C1, d1)
        else:
            sigma1_supp, res, rank, _ = np.linalg.lstsq(C1, d1, rcond=None)
            if not np.allclose(C1 @ sigma1_supp, d1, atol=1e-8):
                return None
    except np.linalg.LinAlgError:
        return None

    if np.any(sigma1_supp < -1e-10):
        return None
    sigma1_supp = np.maximum(sigma1_supp, 0.0)

    # Build full strategy vectors
    sigma1 = np.zeros(game.m)
    sigma2 = np.zeros(game.n)
    for idx, i in enumerate(supp1):
        sigma1[i] = sigma1_supp[idx]
    for idx, j in enumerate(supp2):
        sigma2[j] = sigma2_supp[idx]

    # Verify best response condition: strategies outside support
    # must not yield higher payoff.
    eq_payoff1 = A[supp1[0], :] @ sigma2
    for i in range(game.m):
        if i not in supp1:
            if A[i, :] @ sigma2 > eq_payoff1 + 1e-8:
                return None

    eq_payoff2 = B[:, supp2[0]] @ sigma1
    for j in range(game.n):
        if j not in supp2:
            if B[:, j] @ sigma1 > eq_payoff2 + 1e-8:
                return None

    return sigma1, sigma2


# ============================================================
# 3. Zero-Sum Games
# ============================================================

def solve_zero_sum_graphical(A):
    """Solve a 2×n zero-sum game graphically for Player 1.

    Player 1 has 2 strategies; Player 2 has n strategies.
    The lower envelope of Player 2's response lines gives P1's maximin.

    Parameters
    ----------
    A : array-like, shape (2, n)
        Payoff matrix for the row player.

    Returns
    -------
    value : float
        Value of the game.
    p1_strategy : np.ndarray, shape (2,)
        Player 1's optimal mixed strategy.
    p2_strategy : np.ndarray, shape (n,)
        Player 2's optimal mixed strategy (uniform on binding columns).
    """
    A = np.array(A, dtype=float)
    assert A.shape[0] == 2, "Graphical method requires Player 1 to have 2 strategies."
    n = A.shape[1]

    # Parameterize by p = P(row 0).  P2's payoff from col j = p*A[0,j] + (1-p)*A[1,j].
    # P1 wants to maximise the minimum over columns.
    best_val = -np.inf
    best_p = 0.0
    best_cols = []

    # Check intersections of every pair of P2's response lines
    for j1 in range(n):
        for j2 in range(j1 + 1, n):
            # Line j: f_j(p) = A[1,j] + p*(A[0,j] - A[1,j])
            slope1 = A[0, j1] - A[1, j1]
            slope2 = A[0, j2] - A[1, j2]
            if np.isclose(slope1, slope2):
                continue
            p_int = (A[1, j2] - A[1, j1]) / (slope1 - slope2)
            if p_int < -1e-12 or p_int > 1 + 1e-12:
                continue
            p_int = np.clip(p_int, 0, 1)
            val = A[1, j1] + p_int * slope1
            # Check this is the minimum over all columns
            all_vals = A[1, :] + p_int * (A[0, :] - A[1, :])
            min_val = np.min(all_vals)
            if np.isclose(val, min_val, atol=1e-8) and val > best_val + 1e-12:
                best_val = val
                best_p = p_int
                best_cols = [j1, j2]

    # Also check endpoints p=0 and p=1
    for p_test in [0.0, 1.0]:
        all_vals = A[1, :] + p_test * (A[0, :] - A[1, :])
        val = np.min(all_vals)
        if val > best_val + 1e-12:
            best_val = val
            best_p = p_test
            best_cols = [np.argmin(all_vals)]

    p1 = np.array([best_p, 1.0 - best_p])

    # P2's strategy: solve minimax on the binding columns
    p2 = np.zeros(n)
    if len(best_cols) == 1:
        p2[best_cols[0]] = 1.0
    else:
        # Two binding columns: find q to equalise P1's payoff
        j1, j2 = best_cols[0], best_cols[1]
        # q*A[i,j1] + (1-q)*A[i,j2] for i=0,1 should be equal under P1's opt
        # P2 minimises: p1[0]*(q*A[0,j1]+(1-q)*A[0,j2]) + p1[1]*(q*A[1,j1]+(1-q)*A[1,j2])
        # Equivalent: find q s.t. val from col j1 = val from col j2 for the P1 mixture
        # Actually P2 plays on binding cols. Use indifference of P1:
        # q*A[0,j1] + (1-q)*A[0,j2] = q*A[1,j1] + (1-q)*A[1,j2] is wrong.
        # Correct: P2 wants to minimise P1's payoff.  On the binding 2×2 sub-game:
        sub_A = A[:, best_cols]
        denom = (sub_A[0, 0] - sub_A[0, 1]) - (sub_A[1, 0] - sub_A[1, 1])
        if np.isclose(denom, 0):
            p2[best_cols[0]] = 0.5
            p2[best_cols[1]] = 0.5
        else:
            q0 = (sub_A[1, 1] - sub_A[0, 1]) / denom
            q0 = np.clip(q0, 0, 1)
            p2[best_cols[0]] = q0
            p2[best_cols[1]] = 1.0 - q0

    return best_val, p1, p2


def solve_zero_sum_lp(A):
    """Solve a zero-sum game via linear programming.

    max v  s.t.  Aᵀp ≥ v·1,  p ≥ 0,  1ᵀp = 1.

    Parameters
    ----------
    A : array-like, shape (m, n)
        Payoff matrix for the row player.

    Returns
    -------
    value : float
        Value of the game.
    p1_strategy : np.ndarray, shape (m,)
    p2_strategy : np.ndarray, shape (n,)
    """
    A = np.array(A, dtype=float)
    m, n = A.shape

    # Variables: x = [p_1, ..., p_m, v]  (m+1 variables)
    # Maximise v  ⟺  minimise -v
    c = np.zeros(m + 1)
    c[-1] = -1.0  # minimise -v

    # Inequality: A^T p >= v·1  ⟺  -A^T p + v·1 <= 0
    A_ub = np.zeros((n, m + 1))
    A_ub[:, :m] = -A.T
    A_ub[:, m] = 1.0
    b_ub = np.zeros(n)

    # Equality: sum(p) = 1
    A_eq = np.zeros((1, m + 1))
    A_eq[0, :m] = 1.0
    b_eq = np.array([1.0])

    # Bounds: p_i >= 0, v is free
    bounds = [(0, None)] * m + [(None, None)]

    result = _get_linprog()(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq,
                     bounds=bounds, method="highs")
    if not result.success:
        raise RuntimeError(f"LP failed: {result.message}")

    p1 = result.x[:m]
    value = result.x[m]

    # Solve for Player 2 (dual): min w s.t. Aq >= w·1, q >= 0, 1ᵀq = 1
    c2 = np.zeros(n + 1)
    c2[-1] = 1.0  # minimise w

    A_ub2 = np.zeros((m, n + 1))
    A_ub2[:, :n] = -A
    A_ub2[:, n] = 1.0  # -Aq + w <= 0 ⟹ w <= (Aq)_i
    # Wait, we need Aq >= w·1 ⟹ -Aq + w·1 <= 0
    # But we want min w, which is actually: Aq <= w·1 ⟹ Aq - w·1 <= 0
    # No. P2 solves: min w  s.t.  Aq ≤ w·1, q ≥ 0, 1ᵀq = 1
    A_ub2[:, :n] = A
    A_ub2[:, n] = -1.0
    b_ub2 = np.zeros(m)

    A_eq2 = np.zeros((1, n + 1))
    A_eq2[0, :n] = 1.0
    b_eq2 = np.array([1.0])

    bounds2 = [(0, None)] * n + [(None, None)]

    result2 = _get_linprog()(c2, A_ub=A_ub2, b_ub=b_ub2, A_eq=A_eq2, b_eq=b_eq2,
                      bounds=bounds2, method="highs")
    if not result2.success:
        raise RuntimeError(f"LP (dual) failed: {result2.message}")

    p2 = result2.x[:n]

    return value, p1, p2


def minimax_value(A):
    """Compute the minimax (game) value of a matrix game.

    Parameters
    ----------
    A : array-like, shape (m, n)

    Returns
    -------
    float
        The value of the game.
    """
    value, _, _ = solve_zero_sum_lp(A)
    return value


# ============================================================
# 4. Cooperative Games
# ============================================================

def shapley_value(n, v):
    """Compute the Shapley value for an n-player cooperative game.

    φ_i = Σ_{S ⊆ N\\{i}} |S|!(n-|S|-1)!/n! × [v(S ∪ {i}) - v(S)]

    Parameters
    ----------
    n : int
        Number of players (labelled 0 .. n-1).
    v : dict
        Characteristic function mapping frozenset → value.
        Must contain all 2^n subsets.  v[frozenset()] should be 0.

    Returns
    -------
    np.ndarray, shape (n,)
        Shapley value for each player.
    """
    phi = np.zeros(n)
    n_fact = factorial(n)
    players = set(range(n))

    for i in range(n):
        others = players - {i}
        for size in range(0, n):
            for S_tuple in combinations(others, size):
                S = frozenset(S_tuple)
                S_with_i = S | {i}
                marginal = v.get(S_with_i, 0) - v.get(S, 0)
                weight = factorial(len(S)) * factorial(n - len(S) - 1) / n_fact
                phi[i] += weight * marginal
    return phi


def is_in_core(n, v, allocation):
    """Check whether an allocation is in the core of a cooperative game.

    The core requires:
      (1) efficiency:  Σ x_i = v(N)
      (2) coalitional rationality:  Σ_{i ∈ S} x_i ≥ v(S) for all S ⊆ N.

    Parameters
    ----------
    n : int
        Number of players.
    v : dict
        Characteristic function (frozenset → value).
    allocation : array-like, shape (n,)
        Proposed payoff vector.

    Returns
    -------
    is_core : bool
        True if the allocation is in the core.
    violated : frozenset or None
        First coalition whose constraint is violated, or None.
    """
    x = np.array(allocation, dtype=float)
    grand = frozenset(range(n))

    # Efficiency check
    if not np.isclose(np.sum(x), v.get(grand, 0)):
        return False, grand

    # Coalitional rationality
    for size in range(1, n):
        for S_tuple in combinations(range(n), size):
            S = frozenset(S_tuple)
            if np.sum(x[list(S)]) < v.get(S, 0) - 1e-10:
                return False, S

    return True, None


# ============================================================
# 5. Dynamic Games
# ============================================================

def backward_induction(game_tree):
    """Solve an extensive-form game by backward induction.

    Parameters
    ----------
    game_tree : dict or tuple
        If tuple: terminal node with payoffs (p1_payoff, p2_payoff, ...).
        If dict:
            'player': int — who moves at this node.
            'actions': dict {action_name: subtree}

    Returns
    -------
    path : list of str
        Sequence of actions on the equilibrium path.
    payoffs : tuple
        Payoff tuple at the terminal node.
    """
    if isinstance(game_tree, (tuple, list)):
        return [], tuple(game_tree)

    player = game_tree["player"]
    actions = game_tree["actions"]

    best_action = None
    best_payoff = None
    best_path = None

    for action, subtree in actions.items():
        sub_path, sub_payoff = backward_induction(subtree)
        if best_payoff is None or sub_payoff[player] > best_payoff[player]:
            best_action = action
            best_payoff = sub_payoff
            best_path = sub_path

    return [best_action] + best_path, best_payoff


def vickrey_auction(bids):
    """Second-price sealed-bid (Vickrey) auction.

    Parameters
    ----------
    bids : array-like
        Bid vector, one per bidder.

    Returns
    -------
    winner : int
        Index of the winning bidder.
    payment : float
        Price paid (= second-highest bid).
    """
    bids = np.array(bids, dtype=float)
    n = len(bids)
    assert n >= 2, "Need at least 2 bidders."

    sorted_idx = np.argsort(bids)[::-1]
    winner = sorted_idx[0]
    payment = bids[sorted_idx[1]]
    return int(winner), float(payment)


def vcg_mechanism(valuations, allocation_fn):
    """Vickrey-Clarke-Groves mechanism for general allocation.

    Parameters
    ----------
    valuations : list of dict
        valuations[i][outcome] = value player i assigns to *outcome*.
        Every player must report values for the same set of outcomes.
    allocation_fn : callable
        Given a list of valuation dicts, returns the socially optimal
        outcome key.

    Returns
    -------
    allocation : hashable
        The chosen outcome.
    payments : np.ndarray
        Payment for each player (positive = pays).
    """
    n = len(valuations)
    allocation = allocation_fn(valuations)

    payments = np.zeros(n)
    for i in range(n):
        # Social welfare of others under chosen allocation
        others_welfare = sum(valuations[j][allocation]
                            for j in range(n) if j != i)

        # Optimal allocation without player i
        others_vals = [valuations[j] for j in range(n) if j != i]
        # Create dummy zero-valuation for player i
        dummy = {k: 0 for k in valuations[0].keys()}
        vals_without_i = []
        for j in range(n):
            if j == i:
                vals_without_i.append(dummy)
            else:
                vals_without_i.append(valuations[j])

        alt_alloc = allocation_fn(vals_without_i)
        alt_others_welfare = sum(valuations[j][alt_alloc]
                                 for j in range(n) if j != i)

        # VCG payment: externality imposed on others
        payments[i] = alt_others_welfare - others_welfare

    return allocation, payments


# ============================================================
# 6. Learning in Games
# ============================================================

def fictitious_play(game, num_rounds=1000):
    """Simulate fictitious play for a two-player game.

    Each player plays a best response to the empirical frequency of
    the opponent's past actions.

    Parameters
    ----------
    game : NormalFormGame
    num_rounds : int

    Returns
    -------
    history : list of (np.ndarray, np.ndarray)
        Empirical strategy profile at each round.
    actions : list of (int, int)
        Pure actions played each round.
    """
    counts1 = np.zeros(game.m)
    counts2 = np.zeros(game.n)

    history = []
    actions = []

    for t in range(num_rounds):
        if t == 0:
            a1, a2 = 0, 0
        else:
            freq2 = counts2 / t
            freq1 = counts1 / t
            br1 = game.best_response(1, freq2)
            br2 = game.best_response(2, freq1)
            a1 = br1[0]
            a2 = br2[0]

        counts1[a1] += 1
        counts2[a2] += 1
        emp1 = counts1 / (t + 1)
        emp2 = counts2 / (t + 1)
        history.append((emp1.copy(), emp2.copy()))
        actions.append((a1, a2))

    return history, actions


def regret_matching(game, num_rounds=1000):
    """Regret matching algorithm for two-player games.

    Each player maintains cumulative regret for each action and plays
    proportional to positive regrets.

    Parameters
    ----------
    game : NormalFormGame
    num_rounds : int

    Returns
    -------
    avg_strategy : (np.ndarray, np.ndarray)
        Time-averaged strategy profile.
    regret_history : list of (np.ndarray, np.ndarray)
        Cumulative regret vectors at each round.
    """
    cum_regret1 = np.zeros(game.m)
    cum_regret2 = np.zeros(game.n)
    strategy_sum1 = np.zeros(game.m)
    strategy_sum2 = np.zeros(game.n)
    regret_history = []

    for t in range(num_rounds):
        # Compute strategies from positive regrets
        pos1 = np.maximum(cum_regret1, 0)
        pos2 = np.maximum(cum_regret2, 0)

        if np.sum(pos1) > 0:
            sigma1 = pos1 / np.sum(pos1)
        else:
            sigma1 = np.ones(game.m) / game.m

        if np.sum(pos2) > 0:
            sigma2 = pos2 / np.sum(pos2)
        else:
            sigma2 = np.ones(game.n) / game.n

        strategy_sum1 += sigma1
        strategy_sum2 += sigma2

        # Expected payoffs
        ev1 = game.payoff1 @ sigma2       # payoff for each P1 action
        ev2 = game.payoff2.T @ sigma1     # payoff for each P2 action

        actual1 = sigma1 @ ev1
        actual2 = sigma2 @ ev2

        cum_regret1 += ev1 - actual1
        cum_regret2 += ev2 - actual2

        regret_history.append((cum_regret1.copy(), cum_regret2.copy()))

    avg1 = strategy_sum1 / num_rounds
    avg2 = strategy_sum2 / num_rounds
    return (avg1, avg2), regret_history


def multiplicative_weights(losses_sequence, eta=0.1):
    """Multiplicative Weights Update (MWU) for online decision-making.

    Parameters
    ----------
    losses_sequence : array-like, shape (T, n)
        Loss vector for each of n actions over T rounds.
    eta : float
        Learning rate.

    Returns
    -------
    weights_history : np.ndarray, shape (T, n)
        Distribution over actions at each round.
    cumulative_regret : float
        Total regret relative to the best fixed action.
    """
    L = np.array(losses_sequence, dtype=float)
    T, n_actions = L.shape

    w = np.ones(n_actions)
    weights_history = np.zeros((T, n_actions))
    algo_loss = 0.0

    for t in range(T):
        p = w / np.sum(w)
        weights_history[t] = p
        algo_loss += p @ L[t]
        # Update weights
        w *= np.exp(-eta * L[t])

    best_fixed_loss = np.min(np.sum(L, axis=0))
    cumulative_regret = algo_loss - best_fixed_loss

    return weights_history, cumulative_regret


# ============================================================
# 7. Congestion Games
# ============================================================

def congestion_game_equilibrium(num_players, routes, cost_functions,
                                 max_iter=1000):
    """Find a Nash equilibrium of a congestion game via best-response dynamics.

    Parameters
    ----------
    num_players : int
    routes : list of set
        Available routes, each a set of edge labels.
    cost_functions : dict
        Maps edge label → callable(load) → cost.
    max_iter : int

    Returns
    -------
    assignment : list of int
        Route index chosen by each player.
    total_cost : float
    poa_estimate : float
        Ratio of equilibrium cost to social optimum (approximate).
    """
    n_routes = len(routes)
    # Initialise: assign each player to a random route
    rng = np.random.RandomState(42)
    assignment = [rng.randint(n_routes) for _ in range(num_players)]

    def _edge_loads(asgn):
        loads = {}
        for p in range(num_players):
            for edge in routes[asgn[p]]:
                loads[edge] = loads.get(edge, 0) + 1
        return loads

    def _player_cost(player, asgn):
        loads = _edge_loads(asgn)
        return sum(cost_functions[e](loads.get(e, 0))
                   for e in routes[asgn[player]])

    for iteration in range(max_iter):
        improved = False
        for p in range(num_players):
            current_cost = _player_cost(p, assignment)
            best_route = assignment[p]
            best_cost = current_cost
            for r in range(n_routes):
                if r == assignment[p]:
                    continue
                trial = assignment.copy()
                trial[p] = r
                trial_cost = sum(
                    cost_functions[e](_edge_loads(trial).get(e, 0))
                    for e in routes[r]
                )
                if trial_cost < best_cost - 1e-12:
                    best_cost = trial_cost
                    best_route = r
            if best_route != assignment[p]:
                assignment[p] = best_route
                improved = True
        if not improved:
            break

    # Total cost at equilibrium
    loads_eq = _edge_loads(assignment)
    total_cost = sum(
        cost_functions[e](loads_eq[e]) * loads_eq[e]
        for e in loads_eq
    )

    # Estimate social optimum by brute force (if small enough)
    if num_players <= 8 and n_routes <= 5:
        best_social = _brute_social_opt(num_players, n_routes, routes,
                                        cost_functions)
        poa_estimate = total_cost / best_social if best_social > 0 else 1.0
    else:
        poa_estimate = float("nan")

    return assignment, total_cost, poa_estimate


def _brute_social_opt(num_players, n_routes, routes, cost_functions):
    """Brute-force social optimum for small games."""
    best = [np.inf]
    asgn = [0] * num_players

    def _recurse(p):
        if p == num_players:
            loads = {}
            for pl in range(num_players):
                for e in routes[asgn[pl]]:
                    loads[e] = loads.get(e, 0) + 1
            cost = sum(cost_functions[e](loads[e]) * loads[e] for e in loads)
            if cost < best[0]:
                best[0] = cost
            return
        for r in range(n_routes):
            asgn[p] = r
            _recurse(p + 1)

    _recurse(0)
    return best[0]


def braess_paradox_demo():
    """Demonstrate Braess's paradox on the classic 4-node network.

    Network:
        S → A  cost = x/100
        S → B  cost = 45 min
        A → T  cost = 45 min
        B → T  cost = x/100
        A → B  cost = 0  (new road)

    With 4000 drivers, adding the A→B shortcut increases travel time.

    Returns
    -------
    cost_without : float
        Total cost without the A→B link.
    cost_with : float
        Total cost with the A→B link.
    """
    num_players = 100  # scaled down for computation

    # Without A→B link: two routes S→A→T and S→B→T
    routes_without = [
        {"SA", "AT"},  # Route 0: S→A→T
        {"SB", "BT"},  # Route 1: S→B→T
    ]
    cost_fns_without = {
        "SA": lambda x: x,         # x/100 * 100 = x (scaled)
        "AT": lambda x: 45,
        "SB": lambda x: 45,
        "BT": lambda x: x,
    }
    asgn1, cost1, _ = congestion_game_equilibrium(
        num_players, routes_without, cost_fns_without)

    # With A→B link: three routes
    routes_with = [
        {"SA", "AT"},       # S→A→T
        {"SB", "BT"},       # S→B→T
        {"SA", "AB", "BT"}, # S→A→B→T
    ]
    cost_fns_with = {
        "SA": lambda x: x,
        "AT": lambda x: 45,
        "SB": lambda x: 45,
        "BT": lambda x: x,
        "AB": lambda x: 0,
    }
    asgn2, cost2, _ = congestion_game_equilibrium(
        num_players, routes_with, cost_fns_with)

    return cost1, cost2


# ============================================================
# 8. Multi-Robot Applications
# ============================================================

def auction_task_allocation(robot_bids, tasks):
    """Sequential single-item auction for task allocation.

    Each task is auctioned off one at a time.  The robot with the
    highest bid wins and is removed from further auctions.

    Parameters
    ----------
    robot_bids : array-like, shape (n_robots, n_tasks)
        Valuation of each robot for each task.
    tasks : list of str
        Task identifiers.

    Returns
    -------
    assignment : dict
        {task_name: robot_index}
    payments : dict
        {task_name: payment} (second-price per task).
    """
    bids = np.array(robot_bids, dtype=float)
    n_robots, n_tasks = bids.shape
    assert len(tasks) == n_tasks

    available_robots = set(range(n_robots))
    assignment = {}
    payments = {}

    for t_idx in range(n_tasks):
        task = tasks[t_idx]
        best_robot = None
        best_bid = -np.inf
        second_bid = -np.inf

        for r in available_robots:
            bid = bids[r, t_idx]
            if bid > best_bid:
                second_bid = best_bid
                best_bid = bid
                best_robot = r
            elif bid > second_bid:
                second_bid = bid

        if best_robot is not None:
            assignment[task] = best_robot
            payments[task] = max(second_bid, 0.0)
            available_robots.discard(best_robot)

    return assignment, payments


# ============================================================
# Self-Tests
# ============================================================

def run_tests():
    """Comprehensive self-tests for all game-theory algorithms."""
    print("Running game theory self-tests...")

    # Test 1: Prisoner's Dilemma — (D,D) is the unique pure NE
    # C=0, D=1.  Payoffs: (R,R)=(3,3), (S,T)=(0,5), (T,S)=(5,0), (P,P)=(1,1)
    pd = NormalFormGame(
        [[3, 0], [5, 1]],
        [[3, 5], [0, 1]],
    )
    ne = pd.find_pure_nash()
    assert ne == [(1, 1)], f"PD pure NE should be [(1,1)], got {ne}"
    # Cooperate is strictly dominated by Defect
    assert pd.is_dominated(1, 0, strict=True), "C should be dominated for P1"
    assert pd.is_dominated(2, 0, strict=True), "C should be dominated for P2"
    print("  [PASS] Test 1: Prisoner's Dilemma")

    # Test 2: Battle of the Sexes — 2 pure NE + 1 mixed
    #   Opera(0) Fight(1)
    # Opera  (3,2)  (0,0)
    # Fight  (0,0)  (2,3)
    bos = NormalFormGame(
        [[3, 0], [0, 2]],
        [[2, 0], [0, 3]],
    )
    pure_ne = bos.find_pure_nash()
    assert len(pure_ne) == 2, f"BoS should have 2 pure NE, got {pure_ne}"
    assert (0, 0) in pure_ne and (1, 1) in pure_ne

    p, q = find_mixed_nash_2x2(bos)
    # P1 plays Opera with prob 3/5, P2 plays Opera with prob 2/5
    assert np.isclose(p[0], 3 / 5, atol=1e-6), f"P1 mixed wrong: {p}"
    assert np.isclose(q[0], 2 / 5, atol=1e-6), f"P2 mixed wrong: {q}"
    print("  [PASS] Test 2: Battle of the Sexes NE")

    # Test 3: Zero-sum LP matches graphical method
    A_zs = np.array([[3, -1, 2], [1, 4, -1]])
    val_g, p1_g, p2_g = solve_zero_sum_graphical(A_zs)
    val_lp, p1_lp, p2_lp = solve_zero_sum_lp(A_zs)
    assert np.isclose(val_g, val_lp, atol=1e-4), \
        f"Graphical value {val_g} ≠ LP value {val_lp}"
    print("  [PASS] Test 3: Zero-sum graphical ≈ LP")

    # Test 4: Shapley value sums to v(N) — simple 3-player majority game
    # v(S) = 1 if |S| >= 2, else 0
    v = {}
    for size in range(4):
        for S in combinations(range(3), size):
            fs = frozenset(S)
            v[fs] = 1.0 if len(S) >= 2 else 0.0
    phi = shapley_value(3, v)
    assert np.isclose(np.sum(phi), v[frozenset({0, 1, 2})]), \
        f"Shapley values {phi} don't sum to v(N)={v[frozenset({0,1,2})]}"
    # By symmetry, all Shapley values should be equal: 1/3 each
    assert np.allclose(phi, 1 / 3), f"Symmetric game: Shapley should be 1/3 each, got {phi}"
    print("  [PASS] Test 4: Shapley value sums to v(N)")

    # Test 5: VCG is incentive-compatible
    # Single-item allocation. 3 bidders with values 10, 7, 5.
    outcomes = ["bidder0", "bidder1", "bidder2"]
    vals = [
        {"bidder0": 10, "bidder1": 0, "bidder2": 0},
        {"bidder0": 0, "bidder1": 7, "bidder2": 0},
        {"bidder0": 0, "bidder1": 0, "bidder2": 5},
    ]

    def _social_opt(v_list):
        best_outcome = None
        best_sw = -np.inf
        for o in outcomes:
            sw = sum(vl[o] for vl in v_list)
            if sw > best_sw:
                best_sw = sw
                best_outcome = o
        return best_outcome

    alloc, pay = vcg_mechanism(vals, _social_opt)
    assert alloc == "bidder0", f"VCG should allocate to bidder 0, got {alloc}"
    # Bidder 0 payment should be 7 (= harm to others = next-best SW without them)
    assert np.isclose(pay[0], 7.0), f"VCG payment should be 7, got {pay[0]}"
    assert np.isclose(pay[1], 0.0), f"Loser payment should be 0, got {pay[1]}"
    print("  [PASS] Test 5: VCG incentive compatibility")

    # Test 6: Backward induction on a simple game (centipede-like)
    # P0 can Take (payoff (1,0)) or Pass → P1 can Take (0,2) or Pass → (3,1)
    tree = {
        "player": 0,
        "actions": {
            "Take": (1, 0),
            "Pass": {
                "player": 1,
                "actions": {
                    "Take": (0, 2),
                    "Pass": (3, 1),
                },
            },
        },
    }
    path, payoffs = backward_induction(tree)
    # P1 at second node prefers Take(0,2) over Pass(3,1)? No, P1 gets 2 vs 1 → Take.
    # P0 compares Take(1,0) vs Pass→Take(0,2). Gets 1 vs 0 → Take.
    assert path == ["Take"], f"BI path should be ['Take'], got {path}"
    assert payoffs == (1, 0), f"BI payoffs should be (1,0), got {payoffs}"
    print("  [PASS] Test 6: Backward induction")

    # Test 7: Fictitious play converges in zero-sum game (Matching Pennies)
    mp = NormalFormGame(
        [[1, -1], [-1, 1]],
        [[-1, 1], [1, -1]],
    )
    hist, _ = fictitious_play(mp, num_rounds=5000)
    final_p1, final_p2 = hist[-1]
    # Should converge toward (0.5, 0.5)
    assert np.allclose(final_p1, [0.5, 0.5], atol=0.05), \
        f"FP P1 should be ~[0.5,0.5], got {final_p1}"
    assert np.allclose(final_p2, [0.5, 0.5], atol=0.05), \
        f"FP P2 should be ~[0.5,0.5], got {final_p2}"
    print("  [PASS] Test 7: Fictitious play convergence (zero-sum)")

    # Test 8: Braess's paradox — adding road increases cost
    cost_without, cost_with = braess_paradox_demo()
    assert cost_with >= cost_without - 1e-6, \
        f"Braess: cost_with={cost_with} should be ≥ cost_without={cost_without}"
    print("  [PASS] Test 8: Braess's paradox")

    # Test 9: Support enumeration finds all NE of Prisoner's Dilemma
    all_ne = support_enumeration(pd)
    assert len(all_ne) >= 1, "Support enum should find at least 1 NE in PD"
    # The unique NE is (D,D) = pure on (1,1)
    found_dd = any(np.isclose(s1[1], 1.0) and np.isclose(s2[1], 1.0)
                   for s1, s2 in all_ne)
    assert found_dd, "Support enum should find (D,D) NE"
    print("  [PASS] Test 9: Support enumeration")

    # Test 10: Regret matching on Rock-Paper-Scissors → uniform
    rps = NormalFormGame(
        [[0, -1, 1], [1, 0, -1], [-1, 1, 0]],
        [[0, 1, -1], [-1, 0, 1], [1, -1, 0]],
    )
    (avg1, avg2), _ = regret_matching(rps, num_rounds=10000)
    assert np.allclose(avg1, 1 / 3, atol=0.05), \
        f"RM P1 should be ~uniform, got {avg1}"
    print("  [PASS] Test 10: Regret matching (RPS → uniform)")

    # Test 11: Multiplicative weights — bounded regret
    rng = np.random.RandomState(123)
    losses = rng.rand(200, 5)
    _, regret = multiplicative_weights(losses, eta=0.1)
    # Regret bound: O(ln(n)/η + η·T) ≈ ln(5)/0.1 + 0.1*200 = 16.1 + 20 = 36.1
    assert regret < 50, f"MW regret {regret:.2f} too high"
    print("  [PASS] Test 11: Multiplicative weights regret bound")

    # Test 12: Vickrey auction
    winner, pay = vickrey_auction([10, 7, 5, 3])
    assert winner == 0, f"Vickrey winner should be 0, got {winner}"
    assert np.isclose(pay, 7.0), f"Vickrey payment should be 7, got {pay}"
    print("  [PASS] Test 12: Vickrey auction")

    # Test 13: Core membership — use a game with non-empty core
    # Superadditive game: v(S) = |S| for all S. Core = {(1,1,1)}.
    v_core = {}
    for size in range(4):
        for S in combinations(range(3), size):
            v_core[frozenset(S)] = float(len(S))
    ok, viol = is_in_core(3, v_core, [1.0, 1.0, 1.0])
    assert ok, f"(1,1,1) should be in core, violated={viol}"
    ok2, viol2 = is_in_core(3, v_core, [2.5, 0.3, 0.2])
    assert not ok2, "Unequal split should violate core"
    # Also verify majority game has EMPTY core
    ok3, _ = is_in_core(3, v, [1 / 3, 1 / 3, 1 / 3])
    assert not ok3, "Majority game core should be empty"
    print("  [PASS] Test 13: Core membership")

    # Test 14: IESDS on Prisoner's Dilemma reduces to (D,D)
    reduced, rows, cols = pd.iesds()
    assert reduced.m == 1 and reduced.n == 1, "IESDS on PD should reduce to 1×1"
    assert rows == [1] and cols == [1], f"IESDS survivors should be [1],[1], got {rows},{cols}"
    print("  [PASS] Test 14: IESDS")

    # Test 15: Auction task allocation
    bids_mat = np.array([
        [10, 5, 8],
        [7, 9, 3],
        [6, 4, 11],
    ])
    tasks_list = ["A", "B", "C"]
    asgn, pays = auction_task_allocation(bids_mat, tasks_list)
    assert asgn["A"] == 0, f"Task A should go to robot 0, got {asgn['A']}"
    print("  [PASS] Test 15: Auction task allocation")

    print("\nAll 15 tests passed!")


# ============================================================
# Demos
# ============================================================

def run_demo():
    """Demonstrate key game-theory algorithms with output and plots."""
    print("=" * 60)
    print("  Game Theory — Algorithm Demos")
    print("=" * 60)

    # --- Demo 1: Find all NE of a 3×3 game ---
    print("\n--- Demo 1: Nash Equilibria of a 3×3 Game ---")
    # A coordination-like game
    g = NormalFormGame(
        [[3, 0, 0], [0, 2, 0], [0, 0, 1]],
        [[3, 0, 0], [0, 2, 0], [0, 0, 1]],
    )
    pure = g.find_pure_nash()
    print(f"  Pure NE: {pure}")
    all_ne = support_enumeration(g)
    print(f"  All NE (support enum): {len(all_ne)} found")
    for idx, (s1, s2) in enumerate(all_ne):
        print(f"    NE {idx+1}: P1={np.round(s1, 4)}, P2={np.round(s2, 4)}")

    # --- Demo 2: Fictitious play convergence plot ---
    print("\n--- Demo 2: Fictitious Play Convergence (Matching Pennies) ---")
    mp = NormalFormGame(
        [[1, -1], [-1, 1]],
        [[-1, 1], [1, -1]],
    )
    hist, _ = fictitious_play(mp, num_rounds=2000)
    p1_probs = [h[0][0] for h in hist]

    fig, ax = plt.subplots(1, 1, figsize=(8, 4))
    ax.plot(p1_probs, linewidth=0.8, label="P1: P(Heads)")
    ax.axhline(y=0.5, color="red", linestyle="--", alpha=0.7, label="NE (0.5)")
    ax.set_xlabel("Round")
    ax.set_ylabel("Empirical Probability")
    ax.set_title("Fictitious Play — Matching Pennies")
    ax.legend()
    fig.tight_layout()
    fig.savefig("demo_fictitious_play.png", dpi=150)
    plt.close(fig)
    print(f"  Final P1 strategy: {np.round(hist[-1][0], 4)}")
    print("  → Saved demo_fictitious_play.png")

    # --- Demo 3: Congestion game equilibrium ---
    print("\n--- Demo 3: Congestion Game Equilibrium ---")
    routes = [
        {"A", "C"},  # Route 0: edges A, C
        {"B", "D"},  # Route 1: edges B, D
        {"A", "E", "D"},  # Route 2: edges A, E, D
    ]
    cost_fns = {
        "A": lambda x: 2 * x,
        "B": lambda x: x + 5,
        "C": lambda x: x + 5,
        "D": lambda x: 2 * x,
        "E": lambda x: x,
    }
    asgn, total, poa = congestion_game_equilibrium(6, routes, cost_fns)
    route_counts = [0] * len(routes)
    for r in asgn:
        route_counts[r] += 1
    print(f"  Assignment: {asgn}")
    print(f"  Route usage: {route_counts}")
    print(f"  Total cost: {total:.2f}")
    print(f"  Price of Anarchy estimate: {poa:.3f}")

    # --- Demo 4: Auction-based task allocation for 4 robots ---
    print("\n--- Demo 4: Auction Task Allocation (4 robots, 4 tasks) ---")
    rng = np.random.RandomState(7)
    bids = rng.randint(1, 20, size=(4, 4))
    task_names = ["Deliver_A", "Inspect_B", "Charge_C", "Clean_D"]
    print("  Bid matrix:")
    print(f"  {'':12s}", "  ".join(f"{t:>10s}" for t in task_names))
    for r in range(4):
        print(f"  Robot {r}:  ", "  ".join(f"{bids[r,t]:10d}" for t in range(4)))

    assignment, payments = auction_task_allocation(bids, task_names)
    print("\n  Allocation:")
    for task, robot in assignment.items():
        print(f"    {task} → Robot {robot}  (payment: {payments[task]:.0f})")

    # --- Demo 5: Braess's Paradox ---
    print("\n--- Demo 5: Braess's Paradox ---")
    c_without, c_with = braess_paradox_demo()
    print(f"  Total cost WITHOUT shortcut: {c_without:.1f}")
    print(f"  Total cost WITH    shortcut: {c_with:.1f}")
    if c_with > c_without:
        print("  ⚠ Adding the shortcut INCREASED total cost (Braess's paradox)!")
    else:
        print("  (No paradox at this scale — try larger player count.)")

    # --- Demo 6: Regret matching convergence ---
    print("\n--- Demo 6: Regret Matching (Rock-Paper-Scissors) ---")
    rps = NormalFormGame(
        [[0, -1, 1], [1, 0, -1], [-1, 1, 0]],
        [[0, 1, -1], [-1, 0, 1], [1, -1, 0]],
    )
    (avg1, avg2), reg_hist = regret_matching(rps, num_rounds=5000)
    print(f"  P1 avg strategy: {np.round(avg1, 4)}")
    print(f"  P2 avg strategy: {np.round(avg2, 4)}")

    fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4))
    reg1_norms = [np.max(np.maximum(r[0], 0)) for r in reg_hist]
    ax2.plot(reg1_norms, linewidth=0.8, label="Max positive regret (P1)")
    ax2.set_xlabel("Round")
    ax2.set_ylabel("Regret")
    ax2.set_title("Regret Matching — Rock-Paper-Scissors")
    ax2.legend()
    fig2.tight_layout()
    fig2.savefig("demo_regret_matching.png", dpi=150)
    plt.close(fig2)
    print("  → Saved demo_regret_matching.png")

    # --- Demo 7: Shapley Value ---
    print("\n--- Demo 7: Shapley Value (Glove Game) ---")
    # 3 players: players 0,1 have left gloves, player 2 has right glove.
    # v(S) = min(#left in S, #right in S)
    v_glove = {}
    for size in range(4):
        for S_tuple in combinations(range(3), size):
            S = frozenset(S_tuple)
            n_left = len(S & {0, 1})
            n_right = len(S & {2})
            v_glove[S] = min(n_left, n_right)
    phi = shapley_value(3, v_glove)
    print(f"  Shapley values: P0={phi[0]:.4f}, P1={phi[1]:.4f}, P2={phi[2]:.4f}")
    print(f"  Sum = {np.sum(phi):.4f} (should = {v_glove[frozenset({0,1,2})]})")

    print("\n" + "=" * 60)
    print("  All demos complete.")
    print("=" * 60)


# ============================================================
# Entry Point
# ============================================================

if __name__ == "__main__":
    if "--test" in sys.argv:
        run_tests()
    else:
        run_demo()