Day 8: Matrix Decompositions for Optimization — Companion Code
⬇ Download matrix_decompositions.py
"""
Day 8: Matrix Decompositions for Optimization — Companion Code
Phase I, Week 2
Run: python matrix_decompositions.py
"""
import numpy as np
import time
# =============================================================================
# Exercise 1: Cholesky from Scratch
# =============================================================================
def cholesky_manual(A: np.ndarray) -> np.ndarray:
"""Cholesky decomposition: A = L L^T for PD matrix A.
Column-by-column algorithm.
Raises ValueError if A is not positive definite.
"""
n = A.shape[0]
L = np.zeros_like(A)
for j in range(n):
# Diagonal entry
val = A[j, j] - np.sum(L[j, :j]**2)
if val <= 0:
raise ValueError(f"Matrix is not PD: pivot {j} = {val:.2e}")
L[j, j] = np.sqrt(val)
# Below-diagonal entries in column j
for i in range(j + 1, n):
L[i, j] = (A[i, j] - np.sum(L[i, :j] * L[j, :j])) / L[j, j]
return L
def test_cholesky():
"""Verify against numpy."""
print("=== Exercise 1: Cholesky from Scratch ===")
rng = np.random.default_rng(42)
A_raw = rng.randn(5, 5)
A = A_raw @ A_raw.T + 0.1 * np.eye(5) # ensure PD
L_manual = cholesky_manual(A)
L_numpy = np.linalg.cholesky(A)
error = np.linalg.norm(L_manual - L_numpy)
recon = np.linalg.norm(L_manual @ L_manual.T - A)
print(f" ||L_manual - L_numpy|| = {error:.2e}")
print(f" ||L L^T - A|| = {recon:.2e}")
# Test non-PD detection
A_bad = np.array([[1.0, 2.0], [2.0, 1.0]]) # eigenvalues: 3, -1
try:
cholesky_manual(A_bad)
print(" Non-PD test: FAILED (should have raised)")
except ValueError as e:
print(f" Non-PD detection: PASS ✓ ({e})")
# =============================================================================
# Exercise 2: Solve via Cholesky (Forward + Back Substitution)
# =============================================================================
def forward_sub(L: np.ndarray, b: np.ndarray) -> np.ndarray:
"""Solve Ly = b where L is lower triangular."""
n = len(b)
y = np.zeros(n)
for i in range(n):
y[i] = (b[i] - L[i, :i] @ y[:i]) / L[i, i]
return y
def back_sub(U: np.ndarray, y: np.ndarray) -> np.ndarray:
"""Solve Ux = y where U is upper triangular."""
n = len(y)
x = np.zeros(n)
for i in range(n - 1, -1, -1):
x[i] = (y[i] - U[i, i+1:] @ x[i+1:]) / U[i, i]
return x
def cholesky_solve(A: np.ndarray, b: np.ndarray) -> np.ndarray:
"""Solve Ax = b via Cholesky: L L^T x = b."""
L = cholesky_manual(A)
y = forward_sub(L, b)
x = back_sub(L.T, y)
return x
def test_cholesky_solve():
"""Compare accuracy with np.linalg.solve."""
print("\n=== Exercise 2: Solve via Cholesky ===")
rng = np.random.default_rng(42)
n = 50
A_raw = rng.randn(n, n)
A = A_raw @ A_raw.T + 0.1 * np.eye(n)
b = rng.randn(n)
x_chol = cholesky_solve(A, b)
x_numpy = np.linalg.solve(A, b)
error = np.linalg.norm(x_chol - x_numpy)
residual = np.linalg.norm(A @ x_chol - b)
print(f" ||x_chol - x_numpy|| = {error:.2e}")
print(f" ||Ax - b|| = {residual:.2e}")
# =============================================================================
# Exercise 3: Benchmark Decompositions
# =============================================================================
def benchmark_solvers():
"""Time comparison: Cholesky vs solve vs inv."""
print("\n=== Exercise 3: Benchmark ===")
rng = np.random.default_rng(42)
print(f" {'n':>6} | {'Cholesky':>12} | {'np.solve':>12} | {'np.inv@b':>12}")
print(f" {'-'*52}")
for n in [100, 500, 1000]:
A_raw = rng.randn(n, n)
A = A_raw @ A_raw.T + 0.1 * np.eye(n)
b = rng.randn(n)
# Cholesky via scipy
from scipy.linalg import cho_factor, cho_solve
t0 = time.perf_counter()
c, low = cho_factor(A)
x1 = cho_solve((c, low), b)
t_chol = time.perf_counter() - t0
# numpy solve
t0 = time.perf_counter()
x2 = np.linalg.solve(A, b)
t_solve = time.perf_counter() - t0
# inv (BAD practice)
t0 = time.perf_counter()
x3 = np.linalg.inv(A) @ b
t_inv = time.perf_counter() - t0
print(f" {n:>6} | {t_chol:>10.4f}ms | {t_solve:>10.4f}ms | {t_inv:>10.4f}ms")
# =============================================================================
# Exercise 5: QR vs Normal Equations
# =============================================================================
def qr_vs_normal_equations():
"""Show QR is more accurate for ill-conditioned LS problems."""
print("\n=== Exercise 5: QR vs Normal Equations ===")
rng = np.random.default_rng(42)
for kappa_target in [10, 1e4, 1e8, 1e12]:
m, n = 200, 50
U, _ = np.linalg.qr(rng.randn(m, m))
V, _ = np.linalg.qr(rng.randn(n, n))
s = np.logspace(0, np.log10(kappa_target), n)
A = U[:, :n] @ np.diag(s) @ V.T
x_true = rng.randn(n)
b = A @ x_true + rng.randn(m) * 1e-10
# Normal equations: (A^T A) x = A^T b
try:
x_ne = np.linalg.solve(A.T @ A, A.T @ b)
err_ne = np.linalg.norm(x_ne - x_true) / np.linalg.norm(x_true)
except np.linalg.LinAlgError:
err_ne = float('inf')
# QR
x_qr, _, _, _ = np.linalg.lstsq(A, b, rcond=None)
err_qr = np.linalg.norm(x_qr - x_true) / np.linalg.norm(x_true)
print(f" κ={kappa_target:.0e}: NormalEq err={err_ne:.2e}, QR err={err_qr:.2e}")
# =============================================================================
# Main
# =============================================================================
if __name__ == "__main__":
test_cholesky()
test_cholesky_solve()
benchmark_solvers()
qr_vs_normal_equations()