Day 12: Function Landscapes & Conditioning — Companion Code
"""
Day 12: Function Landscapes & Conditioning — Companion Code
Phase I, Week 2
Run: python conditioning.py
"""
import numpy as np
import time
# =============================================================================
# Exercise 1: Condition Number Visualization
# =============================================================================
def gradient_descent_quadratic(H: np.ndarray, x0: np.ndarray,
lr: float = None,
max_iter: int = 500,
tol: float = 1e-8) -> dict:
"""Gradient descent on f(x) = 0.5 * x^T H x.
If lr is None, uses optimal lr = 2/(λ_max + λ_min).
Returns trajectory and iteration count.
"""
eigvals = np.linalg.eigvalsh(H)
if lr is None:
lr = 2.0 / (eigvals[-1] + eigvals[0])
x = x0.copy()
trajectory = [x.copy()]
for i in range(max_iter):
g = H @ x
x = x - lr * g
trajectory.append(x.copy())
if np.linalg.norm(g) < tol:
break
return {
"trajectory": np.array(trajectory),
"iterations": len(trajectory) - 1,
"final_x": x,
"kappa": eigvals[-1] / eigvals[0],
}
def condition_number_demo():
"""Show effect of condition number on gradient descent."""
print("=== Exercise 1: Condition Number & GD ===")
x0 = np.array([5.0, 5.0])
for kappa in [1, 10, 100, 1000]:
# Create 2x2 PD matrix with desired condition number
# Eigenvalues: 1 and kappa
theta = np.pi / 6 # rotate to make it non-axis-aligned
R = np.array([[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]])
D = np.diag([1.0, float(kappa)])
H = R @ D @ R.T
result = gradient_descent_quadratic(H, x0)
print(f" κ = {kappa:>5}: {result['iterations']:>4} iterations, "
f"final ||x|| = {np.linalg.norm(result['final_x']):.2e}")
# =============================================================================
# Exercise 2: Digits Lost
# =============================================================================
def digits_lost_demo():
"""Show precision loss as condition number increases."""
print("\n=== Exercise 2: Digits Lost ===")
for log_kappa in [0, 3, 6, 9, 12, 15]:
n = 20
kappa = 10.0 ** log_kappa
# Create PD matrix with specified condition number
rng = np.random.default_rng(42)
Q, _ = np.linalg.qr(rng.randn(n, n))
eigvals = np.logspace(0, log_kappa, n)
H = Q @ np.diag(eigvals) @ Q.T
# True solution
x_true = rng.randn(n)
b = H @ x_true
# Solve
x_computed = np.linalg.solve(H, b)
rel_error = np.linalg.norm(x_computed - x_true) / np.linalg.norm(x_true)
digits_correct = max(0, -np.log10(rel_error + 1e-20))
print(f" κ = 10^{log_kappa:>2}: rel_error = {rel_error:.2e}, "
f"~{digits_correct:.1f} correct digits")
# =============================================================================
# Exercise 3: Preconditioning
# =============================================================================
def conjugate_gradient(H, b, x0=None, M_inv=None, max_iter=None, tol=1e-10):
"""Preconditioned conjugate gradient.
M_inv: function that applies M^{-1} to a vector (preconditioner).
"""
n = len(b)
if x0 is None:
x0 = np.zeros(n)
if max_iter is None:
max_iter = 2 * n
if M_inv is None:
M_inv = lambda v: v # no preconditioning
x = x0.copy()
r = b - H @ x
z = M_inv(r)
p = z.copy()
rz = r @ z
iterations = 0
for i in range(max_iter):
Hp = H @ p
alpha = rz / (p @ Hp)
x = x + alpha * p
r = r - alpha * Hp
if np.linalg.norm(r) < tol:
iterations = i + 1
break
z = M_inv(r)
rz_new = r @ z
beta = rz_new / rz
p = z + beta * p
rz = rz_new
iterations = i + 1
return x, iterations
def preconditioning_demo():
"""Compare CG with and without preconditioning."""
print("\n=== Exercise 3: Preconditioning ===")
n = 200
rng = np.random.default_rng(42)
# Create ill-conditioned PD matrix
Q, _ = np.linalg.qr(rng.randn(n, n))
eigvals = np.logspace(0, 4, n) # κ = 10^4
H = Q @ np.diag(eigvals) @ Q.T
b = rng.randn(n)
x_true = np.linalg.solve(H, b)
# No preconditioning
x_cg, iters_cg = conjugate_gradient(H, b)
err_cg = np.linalg.norm(x_cg - x_true) / np.linalg.norm(x_true)
# Jacobi preconditioning: M = diag(H)
diag_H = np.diag(H)
M_inv_jacobi = lambda v: v / diag_H
x_pcg, iters_pcg = conjugate_gradient(H, b, M_inv=M_inv_jacobi)
err_pcg = np.linalg.norm(x_pcg - x_true) / np.linalg.norm(x_true)
kappa_orig = eigvals[-1] / eigvals[0]
# Effective kappa after Jacobi
H_precond = np.diag(1.0/np.sqrt(diag_H)) @ H @ np.diag(1.0/np.sqrt(diag_H))
eigvals_precond = np.linalg.eigvalsh(H_precond)
kappa_precond = eigvals_precond[-1] / eigvals_precond[0]
print(f" n = {n}, κ(H) = {kappa_orig:.0f}")
print(f" CG without precond: {iters_cg} iters, rel_err = {err_cg:.2e}")
print(f" CG with Jacobi: {iters_pcg} iters, rel_err = {err_pcg:.2e}")
print(f" κ after Jacobi: {kappa_precond:.0f}")
print(f" Iteration reduction: {iters_cg - iters_pcg} fewer iterations")
# =============================================================================
# Exercise 4: Variable Scaling
# =============================================================================
def variable_scaling_demo():
"""SLAM-like problem with mixed scales."""
print("\n=== Exercise 4: Variable Scaling ===")
# Simulate: 2 translation params (meters, ~100) + 1 rotation (radians, ~0.01)
# Information matrix reflects measurement precision
rng = np.random.default_rng(42)
# Unscaled: translation info = 1/σ_t² = 10000, rotation info = 1/σ_θ² = 1
H_unscaled = np.diag([10000.0, 10000.0, 1.0])
# Add some coupling
H_unscaled[0, 2] = 50.0
H_unscaled[2, 0] = 50.0
H_unscaled[1, 2] = 30.0
H_unscaled[2, 1] = 30.0
b = np.array([100.0, 80.0, 0.5])
kappa_unscaled = np.linalg.cond(H_unscaled)
# Scale: s = [0.01, 0.01, 1] (divide translation by 100)
s = np.array([0.01, 0.01, 1.0])
S = np.diag(s)
H_scaled = S @ H_unscaled @ S
b_scaled = S @ b
kappa_scaled = np.linalg.cond(H_scaled)
# Solve both
x_unscaled = np.linalg.solve(H_unscaled, b)
x_tilde = np.linalg.solve(H_scaled, b_scaled)
x_from_scaled = x_tilde / s # unscale
print(f" Unscaled κ = {kappa_unscaled:.0f}")
print(f" Scaled κ = {kappa_scaled:.0f}")
print(f" Improvement: {kappa_unscaled/kappa_scaled:.1f}×")
print(f" Solutions match: {np.allclose(x_unscaled, x_from_scaled)}")
# GD comparison
x0 = np.array([1.0, 1.0, 1.0])
r_unscaled = gradient_descent_quadratic(H_unscaled, x0)
r_scaled = gradient_descent_quadratic(H_scaled, x0 * s)
print(f" GD iterations unscaled: {r_unscaled['iterations']}")
print(f" GD iterations scaled: {r_scaled['iterations']}")
# =============================================================================
# Main
# =============================================================================
if __name__ == "__main__":
condition_number_demo()
digits_lost_demo()
preconditioning_demo()
variable_scaling_demo()