Matrix Essentials

Matrix Mastery — Python Implementations
"""
Matrix Mastery — Python Implementations
==========================================
Companion code for Chapter 08: Matrix Essentials for Optimization.

Covers:
  - Matrix classification and property detection
  - Positive definiteness tests (6 methods)
  - Nearest PD matrix (Higham 1988)
  - SVD analysis and four fundamental subspaces
  - Condition number diagnostics
  - Tikhonov and truncated SVD regularization
  - L-curve method for choosing λ
  - Levenberg-Marquardt with conditioning diagnostics
  - Matrix doctor (comprehensive diagnosis)

Usage:
    python matrix_essentials.py          # Run all demos
    python matrix_essentials.py --test   # Run self-tests only
"""

import numpy as np
import matplotlib
matplotlib.use("Agg")  # non-interactive backend for CI
import matplotlib.pyplot as plt
import sys


# ============================================================
# 1. Matrix Classification
# ============================================================

def classify_matrix(A, tol=1e-10):
    """Classify a matrix's structural and spectral properties.
    
    Returns dict with keys: symmetric, orthogonal, pd, psd, indefinite,
    singular, condition_number, rank.
    """
    m, n = A.shape
    result = {}
    is_square = m == n

    result["symmetric"] = is_square and np.allclose(A, A.T, atol=tol)
    result["skew_symmetric"] = is_square and np.allclose(A, -A.T, atol=tol)

    if is_square:
        result["orthogonal"] = np.allclose(A.T @ A, np.eye(n), atol=tol)
    else:
        result["orthogonal"] = False

    U, s, Vt = np.linalg.svd(A)
    result["rank"] = int(np.sum(s > tol * s[0] * max(m, n)))
    result["singular"] = result["rank"] < min(m, n)
    result["condition_number"] = s[0] / s[-1] if s[-1] > tol else np.inf

    if result["symmetric"]:
        eigvals = np.linalg.eigvalsh(A)
        if np.all(eigvals > tol):
            result["pd"] = True
            result["psd"] = True
            result["indefinite"] = False
        elif np.all(eigvals > -tol):
            result["pd"] = False
            result["psd"] = True
            result["indefinite"] = False
        else:
            result["pd"] = False
            result["psd"] = False
            result["indefinite"] = True
    else:
        result["pd"] = None
        result["psd"] = None
        result["indefinite"] = None

    return result


# ============================================================
# 2. Six Tests for Positive Definiteness
# ============================================================

def test_pd_eigenvalues(A, tol=1e-10):
    """Test 1: All eigenvalues positive."""
    eigvals = np.linalg.eigvalsh(A)
    return bool(np.all(eigvals > tol)), eigvals


def test_pd_cholesky(A):
    """Test 2: Cholesky factorization succeeds."""
    try:
        L = np.linalg.cholesky(A)
        return True, L
    except np.linalg.LinAlgError:
        return False, None


def test_pd_sylvester(A, tol=1e-10):
    """Test 3: All leading principal minors positive."""
    n = A.shape[0]
    minors = []
    for k in range(1, n + 1):
        minor = np.linalg.det(A[:k, :k])
        minors.append(minor)
        if minor <= tol:
            return False, minors
    return True, minors


def test_pd_pivots(A, tol=1e-10):
    """Test 5: All pivots in Gaussian elimination are positive."""
    n = A.shape[0]
    U = A.astype(float).copy()
    pivots = []
    for k in range(n):
        if abs(U[k, k]) <= tol:
            return False, pivots
        pivots.append(U[k, k])
        for i in range(k + 1, n):
            factor = U[i, k] / U[k, k]
            U[i, k:] -= factor * U[k, k:]
    return all(p > tol for p in pivots), pivots


def test_pd_diagonal_dominance(A):
    """Test 6: Strictly diagonally dominant (sufficient, not necessary)."""
    n = A.shape[0]
    for i in range(n):
        if A[i, i] <= np.sum(np.abs(A[i, :])) - np.abs(A[i, i]):
            return False
    return True


def run_all_pd_tests(A):
    """Run all six PD tests and print results."""
    print("Positive Definiteness Tests")
    print("-" * 40)

    is_pd, eigvals = test_pd_eigenvalues(A)
    print(f"1. Eigenvalue test:    {'PASS' if is_pd else 'FAIL'}  λ = {np.round(eigvals, 4)}")

    is_pd, L = test_pd_cholesky(A)
    print(f"2. Cholesky test:      {'PASS' if is_pd else 'FAIL'}")

    is_pd, minors = test_pd_sylvester(A)
    print(f"3. Sylvester criterion: {'PASS' if is_pd else 'FAIL'}  minors = {[round(m, 4) for m in minors]}")

    is_pd, pivots = test_pd_pivots(A)
    print(f"5. Pivot test:         {'PASS' if is_pd else 'FAIL'}  pivots = {[round(p, 4) for p in pivots]}")

    is_dd = test_pd_diagonal_dominance(A)
    print(f"6. Diag. dominance:    {'PASS' if is_dd else 'FAIL (not necessary)'}")


# ============================================================
# 3. Nearest PD Matrix (Higham 1988)
# ============================================================

def nearest_pd(A, epsilon=1e-6):
    """Nearest positive definite matrix via eigenvalue clipping."""
    A_sym = (A + A.T) / 2
    eigvals, Q = np.linalg.eigh(A_sym)
    eigvals_clipped = np.maximum(eigvals, epsilon)
    A_pd = Q @ np.diag(eigvals_clipped) @ Q.T
    return (A_pd + A_pd.T) / 2


# ============================================================
# 4. SVD Analysis
# ============================================================

def svd_analysis(A, tol=None, plot=True, save_path=None):
    """Comprehensive SVD analysis: rank, condition, subspaces, energy."""
    m, n = A.shape
    U, s, Vt = np.linalg.svd(A, full_matrices=True)

    if tol is None:
        tol = max(m, n) * s[0] * np.finfo(float).eps

    r = int(np.sum(s > tol))
    total_energy = np.sum(s ** 2)
    energy = np.cumsum(s ** 2) / total_energy if total_energy > 0 else np.zeros_like(s)

    print("=" * 60)
    print(f"SVD Analysis: {m}×{n} matrix")
    print("=" * 60)
    print(f"Rank: {r} / {min(m, n)}")
    kappa = s[0] / s[min(m, n) - 1] if s[min(m, n) - 1] > 0 else np.inf
    print(f"Condition number: {kappa:.4e}")

    print(f"\nSingular values (top {min(10, len(s))}):")
    for i in range(min(10, len(s))):
        bar = "█" * max(1, int(40 * s[i] / s[0]))
        pct = energy[i] * 100 if i < len(energy) else 100.0
        print(f"  σ_{i + 1:2d} = {s[i]:12.6e}  {bar}  ({pct:.1f}%)")

    print(f"\nFour Fundamental Subspaces:")
    print(f"  C(A):  dim {r}     | N(A):  dim {n - r}")
    print(f"  C(Aᵀ): dim {r}     | N(Aᵀ): dim {m - r}")

    if plot:
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
        ax1.bar(range(1, len(s) + 1), s, color="steelblue")
        ax1.set_xlabel("Index")
        ax1.set_ylabel("Singular Value")
        ax1.set_title("Singular Value Spectrum")
        ax1.set_yscale("log")

        ax2.plot(range(1, len(s) + 1), energy * 100, "ro-")
        ax2.axhline(y=99, color="gray", linestyle="--", alpha=0.5, label="99%")
        ax2.set_xlabel("Components")
        ax2.set_ylabel("Cumulative Energy (%)")
        ax2.set_title("Energy Distribution")
        ax2.legend()

        plt.tight_layout()
        path = save_path or "svd_analysis.png"
        plt.savefig(path, dpi=150)
        plt.close()
        print(f"\nPlot saved to {path}")

    return {"U": U, "s": s, "Vt": Vt, "rank": r, "condition": kappa, "energy": energy}


# ============================================================
# 5. Regularization
# ============================================================

def tikhonov_solve(A, b, lam):
    """Tikhonov-regularized least-squares: min ‖Ax-b‖² + λ‖x‖²."""
    n = A.shape[1]
    return np.linalg.solve(A.T @ A + lam * np.eye(n), A.T @ b)


def truncated_svd_solve(A, b, k):
    """Truncated SVD solution keeping top k singular values."""
    U, s, Vt = np.linalg.svd(A, full_matrices=False)
    x = np.zeros(A.shape[1])
    for i in range(min(k, len(s))):
        if s[i] > 1e-15:
            x += (U[:, i] @ b / s[i]) * Vt[i, :]
    return x


def l_curve(A, b, lambdas=None, save_path=None):
    """Plot L-curve and return optimal λ at maximum curvature."""
    if lambdas is None:
        lambdas = np.logspace(-16, 2, 300)

    residuals = []
    sol_norms = []
    for lam in lambdas:
        x_lam = tikhonov_solve(A, b, lam)
        residuals.append(np.linalg.norm(A @ x_lam - b))
        sol_norms.append(np.linalg.norm(x_lam))

    residuals = np.array(residuals)
    sol_norms = np.array(sol_norms)

    # Curvature in log-log space
    lr = np.log(residuals + 1e-30)
    ln = np.log(sol_norms + 1e-30)
    dr = np.gradient(lr)
    dn = np.gradient(ln)
    ddr = np.gradient(dr)
    ddn = np.gradient(dn)
    curvature = np.abs(dr * ddn - dn * ddr) / (dr ** 2 + dn ** 2 + 1e-30) ** 1.5
    margin = max(10, len(lambdas) // 20)
    corner_idx = np.argmax(curvature[margin:-margin]) + margin

    fig, ax = plt.subplots(figsize=(8, 6))
    ax.loglog(residuals, sol_norms, "b-", linewidth=1.5)
    ax.plot(residuals[corner_idx], sol_norms[corner_idx], "ro", markersize=10)
    ax.annotate(
        f"λ = {lambdas[corner_idx]:.2e}",
        xy=(residuals[corner_idx], sol_norms[corner_idx]),
        xytext=(15, 15),
        textcoords="offset points",
        fontsize=10,
        arrowprops={"arrowstyle": "->"},
    )
    ax.set_xlabel(r"$\|Ax_\lambda - b\|_2$")
    ax.set_ylabel(r"$\|x_\lambda\|_2$")
    ax.set_title("L-Curve for Tikhonov Regularization")
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    path = save_path or "l_curve.png"
    plt.savefig(path, dpi=150)
    plt.close()
    print(f"L-curve saved to {path}. Optimal λ ≈ {lambdas[corner_idx]:.2e}")

    return lambdas[corner_idx]


# ============================================================
# 6. Levenberg-Marquardt with Diagnostics
# ============================================================

def levenberg_marquardt(residual_fn, jacobian_fn, x0, max_iter=100,
                        lam0=1e-3, tol=1e-10):
    """Simplified LM solver with condition-number tracking."""
    x = x0.astype(float).copy()
    lam = lam0
    n = len(x)
    history = {"cost": [], "lambda": [], "kappa_H": [], "kappa_damped": []}

    for it in range(max_iter):
        r = residual_fn(x)
        J = jacobian_fn(x)
        cost = 0.5 * r @ r
        H = J.T @ J
        g = J.T @ r

        kappa_H = np.linalg.cond(H)
        kappa_d = np.linalg.cond(H + lam * np.eye(n))
        history["cost"].append(cost)
        history["lambda"].append(lam)
        history["kappa_H"].append(kappa_H)
        history["kappa_damped"].append(kappa_d)

        dx = np.linalg.solve(H + lam * np.eye(n), -g)
        x_new = x + dx
        cost_new = 0.5 * residual_fn(x_new) @ residual_fn(x_new)
        gain = 0.5 * dx @ (lam * dx - g)
        rho = (cost - cost_new) / (gain + 1e-30)

        if rho > 0:
            x = x_new
            lam = max(lam / 3, 1e-15)
            if np.linalg.norm(dx) < tol:
                break
        else:
            lam = min(lam * 2, 1e10)

    return x, history


def demo_lm_curve_fit(save_path=None):
    """Demo: fit y = a·exp(-b·x) + c with LM."""
    np.random.seed(42)
    x_data = np.linspace(0, 5, 50)
    a_true, b_true, c_true = 3.0, 1.5, 0.5
    y_data = a_true * np.exp(-b_true * x_data) + c_true + 0.1 * np.random.randn(50)

    def residual(p):
        return p[0] * np.exp(-p[1] * x_data) + p[2] - y_data

    def jacobian(p):
        J = np.zeros((len(x_data), 3))
        e = np.exp(-p[1] * x_data)
        J[:, 0] = e
        J[:, 1] = -p[0] * x_data * e
        J[:, 2] = 1.0
        return J

    p0 = np.array([1.0, 0.5, 0.0])
    p_opt, hist = levenberg_marquardt(residual, jacobian, p0)
    print(f"LM result: a={p_opt[0]:.4f}, b={p_opt[1]:.4f}, c={p_opt[2]:.4f}")
    print(f"Truth:     a={a_true}, b={b_true}, c={c_true}")

    fig, axes = plt.subplots(2, 2, figsize=(12, 8))
    axes[0, 0].semilogy(hist["cost"])
    axes[0, 0].set_title("Cost")
    axes[0, 1].semilogy(hist["lambda"])
    axes[0, 1].set_title("λ (damping)")
    axes[1, 0].semilogy(hist["kappa_H"], label="κ(H)")
    axes[1, 0].semilogy(hist["kappa_damped"], label="κ(H+λI)")
    axes[1, 0].legend()
    axes[1, 0].set_title("Condition Number")
    axes[1, 1].scatter(x_data, y_data, s=10, alpha=0.5, label="Data")
    axes[1, 1].plot(
        x_data,
        p_opt[0] * np.exp(-p_opt[1] * x_data) + p_opt[2],
        "r-",
        lw=2,
        label="Fit",
    )
    axes[1, 1].legend()
    axes[1, 1].set_title("Curve Fit")
    for ax in axes.flat:
        ax.set_xlabel("Iteration")
    plt.tight_layout()
    path = save_path or "lm_diagnostics.png"
    plt.savefig(path, dpi=150)
    plt.close()
    print(f"LM diagnostics saved to {path}")


# ============================================================
# 7. Matrix Doctor
# ============================================================

def matrix_doctor(A, verbose=True):
    """Comprehensive matrix diagnosis with solver recommendation."""
    m, n = A.shape
    is_square = m == n
    report = {"shape": (m, n), "square": is_square}

    report["symmetric"] = is_square and np.allclose(A, A.T, atol=1e-12)
    report["skew_symmetric"] = is_square and np.allclose(A, -A.T, atol=1e-12)
    report["orthogonal"] = is_square and np.allclose(A.T @ A, np.eye(n), atol=1e-10) if is_square else False
    report["diagonal"] = is_square and np.allclose(A, np.diag(np.diag(A)), atol=1e-12)
    report["upper_triangular"] = np.allclose(A, np.triu(A), atol=1e-12)
    report["lower_triangular"] = np.allclose(A, np.tril(A), atol=1e-12)

    nnz = np.count_nonzero(np.abs(A) > 1e-15)
    report["sparsity"] = 1 - nnz / (m * n)
    report["sparse"] = report["sparsity"] > 0.5

    _, s, _ = np.linalg.svd(A, full_matrices=False)
    tol = max(m, n) * s[0] * np.finfo(float).eps
    r = int(np.sum(s > tol))
    report["rank"] = r
    report["full_rank"] = r == min(m, n)
    report["condition_number"] = s[0] / s[r - 1] if r > 0 else np.inf

    # Energy-based effective rank
    total_energy = np.sum(s ** 2)
    if total_energy > 0:
        cumulative = np.cumsum(s ** 2) / total_energy
        report["rank_for_99pct_energy"] = int(np.searchsorted(cumulative, 0.99) + 1)
    else:
        report["rank_for_99pct_energy"] = 0

    if report["symmetric"]:
        eigvals = np.linalg.eigvalsh(A)
        if np.all(eigvals > tol):
            report["definiteness"] = "positive definite"
        elif np.all(eigvals > -tol):
            report["definiteness"] = "positive semidefinite"
        elif np.all(eigvals < -tol):
            report["definiteness"] = "negative definite"
        else:
            report["definiteness"] = "indefinite"

    warnings = []
    kappa = report["condition_number"]
    if kappa > 1e12:
        warnings.append(f"SEVERELY ILL-CONDITIONED (κ={kappa:.1e})")
    elif kappa > 1e6:
        warnings.append(f"ILL-CONDITIONED (κ={kappa:.1e})")
    if not report["full_rank"]:
        warnings.append(f"RANK DEFICIENT: {r}/{min(m, n)}")
    if report.get("definiteness") == "indefinite":
        warnings.append("INDEFINITE — not a valid minimum Hessian")
    report["warnings"] = warnings

    # Solver recommendation
    if not is_square:
        solver = "QR (overdetermined)" if m > n else "SVD (underdetermined)"
        if not report["full_rank"]:
            solver = "SVD / pseudoinverse (rank-deficient)"
    elif report.get("definiteness") == "positive definite":
        solver = "Sparse Cholesky" if report["sparse"] and n > 100 else "Dense Cholesky"
    elif report.get("definiteness") == "positive semidefinite":
        solver = "LDLᵀ or regularised Cholesky"
    elif report.get("definiteness") == "indefinite":
        solver = "Modified Cholesky or LDLᵀ with pivoting"
    else:
        solver = "LU with partial pivoting"
    report["solver"] = solver

    if verbose:
        print("=" * 60)
        print("MATRIX DOCTOR REPORT")
        print("=" * 60)
        tags = [k for k in ("symmetric", "skew_symmetric", "orthogonal", "diagonal",
                             "upper_triangular", "lower_triangular", "sparse") if report.get(k)]
        print(f"Shape: {m}×{n} | Properties: {', '.join(tags) or 'general'}")
        if "definiteness" in report:
            print(f"Definiteness: {report['definiteness']}")
        print(f"Rank: {r}/{min(m, n)} | κ = {kappa:.2e} | 99% energy rank: {report['rank_for_99pct_energy']}")
        for w in warnings:
            print(f"  ⚠ {w}")
        print(f"→ Recommended solver: {solver}")
        print("=" * 60)

    return report


# ============================================================
# Self-tests
# ============================================================

def run_tests():
    """Verify implementations against known results."""
    passed = 0
    failed = 0

    def check(name, cond):
        nonlocal passed, failed
        if cond:
            passed += 1
        else:
            failed += 1
            print(f"  FAIL: {name}")

    print("Running self-tests...\n")

    # classify_matrix
    A = np.array([[4, 2, 1], [2, 5, 3], [1, 3, 6]])
    c = classify_matrix(A)
    check("classify: symmetric PD", c["symmetric"] and c["pd"])

    Q = np.array([[1, 2], [2, 3]])
    c = classify_matrix(Q)
    check("classify: indefinite", c["indefinite"])

    R = np.array([[0, -1], [1, 0]], dtype=float)
    c = classify_matrix(R)
    check("classify: orthogonal", c["orthogonal"])

    # PD tests
    check("pd_eigenvalues", test_pd_eigenvalues(A)[0])
    check("pd_cholesky", test_pd_cholesky(A)[0])
    check("pd_sylvester", test_pd_sylvester(A)[0])
    check("pd_pivots", test_pd_pivots(A)[0])

    check("not_pd_eigenvalues", not test_pd_eigenvalues(Q)[0])
    check("not_pd_cholesky", not test_pd_cholesky(Q)[0])
    check("not_pd_sylvester", not test_pd_sylvester(Q)[0])

    # nearest_pd
    bad = np.array([[4.0, 1.0, 3.1], [1.0, 2.0, -0.5], [3.1, -0.5, 1.0]])
    fixed = nearest_pd(bad)
    check("nearest_pd: result is PD", test_pd_cholesky(fixed)[0])

    # tikhonov
    A2 = np.diag([1.0, 1e-8])
    b2 = A2 @ np.array([1.0, 1.0])
    x_tikh = tikhonov_solve(A2, b2, lam=0)
    check("tikhonov(λ=0) recovers exact", np.allclose(x_tikh, [1, 1], atol=1e-6))

    # truncated_svd
    A3 = np.array([[1, 1], [1, 1], [0, 0]], dtype=float)
    b3 = np.array([3.0, 3.0, 0.0])
    x_tsvd = truncated_svd_solve(A3, b3, k=1)
    check("tsvd solution", np.allclose(A3 @ x_tsvd, [3, 3, 0], atol=1e-10))

    # matrix_doctor
    report = matrix_doctor(A, verbose=False)
    check("doctor: PD detected", report.get("definiteness") == "positive definite")
    check("doctor: cholesky recommended", "Cholesky" in report["solver"])

    print(f"\n{passed} passed, {failed} failed")
    return failed == 0


# ============================================================
# Main
# ============================================================

if __name__ == "__main__":
    if "--test" in sys.argv:
        success = run_tests()
        sys.exit(0 if success else 1)

    print("=" * 60)
    print("Matrix Essentials — Demo Suite")
    print("=" * 60)

    # Demo 1: PD tests
    print("\n--- Demo 1: Positive Definiteness Tests ---")
    A = np.array([[4, 2, 1], [2, 5, 3], [1, 3, 6]])
    run_all_pd_tests(A)

    # Demo 2: Nearest PD
    print("\n--- Demo 2: Nearest PD Matrix ---")
    bad = np.array([[4.0, 1.0, 3.1], [1.0, 2.0, -0.5], [3.1, -0.5, 1.0]])
    print(f"Original eigenvalues: {np.round(np.linalg.eigvalsh(bad), 4)}")
    fixed = nearest_pd(bad)
    print(f"Fixed eigenvalues:    {np.round(np.linalg.eigvalsh(fixed), 4)}")

    # Demo 3: SVD analysis
    print("\n--- Demo 3: SVD Analysis ---")
    np.random.seed(42)
    J = np.random.randn(8, 5) @ np.random.randn(5, 6)
    svd_analysis(J, save_path="demo_svd.png")

    # Demo 4: Regularization comparison
    print("\n--- Demo 4: Regularization ---")
    np.random.seed(42)
    n = 15
    s_vals = np.logspace(0, -8, n)
    U0, _ = np.linalg.qr(np.random.randn(n, n))
    V0, _ = np.linalg.qr(np.random.randn(n, n))
    A_ill = U0 @ np.diag(s_vals) @ V0.T
    x_true = np.ones(n)
    b_ill = A_ill @ x_true + 1e-6 * np.random.randn(n)
    print(f"κ(A) = {np.linalg.cond(A_ill):.2e}")

    x_direct = np.linalg.solve(A_ill, b_ill)
    x_tikh = tikhonov_solve(A_ill, b_ill, lam=1e-6)
    x_tsvd = truncated_svd_solve(A_ill, b_ill, k=8)
    print(f"Direct error:  {np.linalg.norm(x_direct - x_true):.4e}")
    print(f"Tikhonov:      {np.linalg.norm(x_tikh - x_true):.4e}")
    print(f"Truncated SVD: {np.linalg.norm(x_tsvd - x_true):.4e}")

    lam_opt = l_curve(A_ill, b_ill, save_path="demo_l_curve.png")

    # Demo 5: LM
    print("\n--- Demo 5: Levenberg-Marquardt ---")
    demo_lm_curve_fit(save_path="demo_lm.png")

    # Demo 6: Matrix doctor
    print("\n--- Demo 6: Matrix Doctor ---")
    matrix_doctor(A_ill)

    print("\nAll demos complete. Check *.png for plots.")