Day 21: Optimizer Shootout — Benchmarking Framework
⬇ Download optimizer_shootout.py
"""
Day 21: Optimizer Shootout — Benchmarking Framework
Phase II, Week 3
Run: python optimizer_shootout.py
"""
import numpy as np
import time
from collections import deque
# =============================================================================
# Test Functions: (f, grad, hessian_or_None, x_star, name, dim)
# =============================================================================
def rosenbrock_2d():
"""Rosenbrock 2D — narrow curved valley."""
def f(x):
return (1-x[0])**2 + 100*(x[1]-x[0]**2)**2
def grad(x):
return np.array([
-2*(1-x[0]) - 400*x[0]*(x[1]-x[0]**2),
200*(x[1]-x[0]**2)])
def hess(x):
return np.array([
[2+1200*x[0]**2-400*x[1], -400*x[0]],
[-400*x[0], 200]])
return f, grad, hess, np.array([1.0, 1.0]), "Rosenbrock 2D", 2
def beale():
"""Beale function — flat regions."""
def f(x):
t1 = (1.5 - x[0] + x[0]*x[1])**2
t2 = (2.25 - x[0] + x[0]*x[1]**2)**2
t3 = (2.625 - x[0] + x[0]*x[1]**3)**2
return t1 + t2 + t3
def grad(x):
g = np.zeros(2)
r1 = 1.5 - x[0] + x[0]*x[1]
r2 = 2.25 - x[0] + x[0]*x[1]**2
r3 = 2.625 - x[0] + x[0]*x[1]**3
g[0] = 2*r1*(-1+x[1]) + 2*r2*(-1+x[1]**2) + 2*r3*(-1+x[1]**3)
g[1] = 2*r1*x[0] + 2*r2*2*x[0]*x[1] + 2*r3*3*x[0]*x[1]**2
return g
return f, grad, None, np.array([3.0, 0.5]), "Beale 2D", 2
def rosenbrock_nd(n=20):
"""Extended Rosenbrock nD."""
def f(x):
return sum(100*(x[i+1]-x[i]**2)**2 + (1-x[i])**2
for i in range(n-1))
def grad(x):
g = np.zeros(n)
for i in range(n-1):
g[i] += -2*(1-x[i]) + 200*(x[i+1]-x[i]**2)*(-2*x[i])
g[i+1] += 200*(x[i+1]-x[i]**2)
return g
return f, grad, None, np.ones(n), f"Rosenbrock {n}D", n
def quadratic_50d():
"""Quadratic 50D with κ = 1000."""
rng = np.random.RandomState(42)
n = 50
Q, _ = np.linalg.qr(rng.randn(n, n))
eigvals = np.linspace(1, 1000, n)
A = Q @ np.diag(eigvals) @ Q.T
b = rng.randn(n)
x_star = np.linalg.solve(A, b)
def f(x): return 0.5 * x @ A @ x - b @ x
def grad(x): return A @ x - b
def hess(x): return A
return f, grad, hess, x_star, "Quadratic 50D (κ=1000)", n
def styblinski_tang_5d():
"""Styblinski-Tang 5D — many local minima."""
n = 5
def f(x):
return 0.5 * sum(x[i]**4 - 16*x[i]**2 + 5*x[i] for i in range(n))
def grad(x):
return np.array([2*x[i]**3 - 16*x[i] + 2.5 for i in range(n)])
x_star = np.full(n, -2.903534)
return f, grad, None, x_star, "Styblinski-Tang 5D", n
# =============================================================================
# Optimizers
# =============================================================================
def wolfe_ls(f, grad, x, d, c1=1e-4, c2=0.9, max_iter=20):
"""Wolfe line search."""
alpha = 1.0
f0 = f(x)
g0 = grad(x)
dg0 = g0 @ d
if dg0 >= 0:
return 1e-4
n_eval = 0
for _ in range(max_iter):
n_eval += 1
fn = f(x + alpha * d)
if fn > f0 + c1 * alpha * dg0:
alpha *= 0.5
continue
gn = grad(x + alpha * d)
if abs(gn @ d) <= c2 * abs(dg0):
return alpha
if gn @ d > 0:
alpha *= 0.5
else:
alpha *= 2.0
return alpha
def opt_gd(f, grad, hess, x0, max_iter=10000, tol=1e-8):
"""Gradient descent with backtracking."""
x = x0.copy()
for k in range(max_iter):
g = grad(x)
if np.linalg.norm(g) < tol:
return x, k+1
alpha = 1.0
fval = f(x)
for _ in range(30):
if f(x - alpha*g) <= fval - 1e-4*alpha*(g@g):
break
alpha *= 0.5
x = x - alpha * g
return x, max_iter
def opt_newton_ls(f, grad, hess, x0, max_iter=500, tol=1e-8):
"""Newton with line search (requires Hessian)."""
if hess is None:
return x0, -1 # Can't run
x = x0.copy()
for k in range(max_iter):
g = grad(x)
if np.linalg.norm(g) < tol:
return x, k+1
H = hess(x)
eigvals = np.linalg.eigvalsh(H)
mu = max(0, -np.min(eigvals) + 1e-6)
delta = np.linalg.solve(H + mu*np.eye(len(x)), -g)
alpha = wolfe_ls(f, grad, x, delta)
x = x + alpha * delta
return x, max_iter
def opt_trust_region(f, grad, hess, x0, max_iter=500, tol=1e-8):
"""Trust region with dogleg (requires Hessian)."""
if hess is None:
return x0, -1
x = x0.copy()
delta_r = 1.0
for k in range(max_iter):
g = grad(x)
gnorm = np.linalg.norm(g)
if gnorm < tol:
return x, k+1
H = hess(x)
eigvals = np.linalg.eigvalsh(H)
if np.min(eigvals) > 1e-10:
pn = np.linalg.solve(H, -g)
else:
mu = -np.min(eigvals) + 1e-4
pn = np.linalg.solve(H + mu*np.eye(len(x)), -g)
if np.linalg.norm(pn) <= delta_r:
p = pn
else:
p = -delta_r / gnorm * g
fval = f(x)
fn = f(x + p)
pred = -(g@p + 0.5*p@H@p)
if pred > 1e-16:
rho = (fval - fn) / pred
else:
rho = 0.0
if rho > 0.01:
x = x + p
if rho < 0.25:
delta_r *= 0.25
elif rho > 0.75:
delta_r = min(2*delta_r, 100)
return x, max_iter
def opt_bfgs(f, grad, hess, x0, max_iter=2000, tol=1e-8):
"""BFGS with Wolfe line search."""
n = len(x0)
x = x0.copy()
H = np.eye(n)
g = grad(x)
for k in range(max_iter):
if np.linalg.norm(g) < tol:
return x, k+1
d = -H @ g
alpha = wolfe_ls(f, grad, x, d)
s = alpha * d
x_new = x + s
g_new = grad(x_new)
y = g_new - g
sy = s @ y
if sy > 1e-10:
rho = 1.0/sy
I = np.eye(n)
V = I - rho*np.outer(s, y)
H = V @ H @ V.T + rho*np.outer(s, s)
x = x_new
g = g_new
return x, max_iter
def opt_lbfgs(f, grad, hess, x0, m=10, max_iter=2000, tol=1e-8):
"""L-BFGS with two-loop recursion."""
x = x0.copy()
g = grad(x)
s_h = deque(maxlen=m)
y_h = deque(maxlen=m)
r_h = deque(maxlen=m)
for k in range(max_iter):
if np.linalg.norm(g) < tol:
return x, k+1
q = g.copy()
als = []
for i in range(len(s_h)-1, -1, -1):
a = r_h[i] * (s_h[i] @ q)
als.insert(0, a)
q -= a * y_h[i]
gam = (s_h[-1]@y_h[-1])/(y_h[-1]@y_h[-1]) if len(s_h) > 0 else 1.0
r = gam * q
for i in range(len(s_h)):
b = r_h[i] * (y_h[i] @ r)
r += s_h[i] * (als[i] - b)
d = -r
alpha = wolfe_ls(f, grad, x, d)
s = alpha * d
x_new = x + s
g_new = grad(x_new)
y = g_new - g
sy = s @ y
if sy > 1e-10:
s_h.append(s)
y_h.append(y)
r_h.append(1.0/sy)
x = x_new
g = g_new
return x, max_iter
def opt_cg(f, grad, hess, x0, max_iter=5000, tol=1e-8):
"""Nonlinear CG with PR+."""
x = x0.copy()
g = grad(x)
d = -g.copy()
for k in range(max_iter):
gnorm = np.linalg.norm(g)
if gnorm < tol:
return x, k+1
alpha = wolfe_ls(f, grad, x, d, c2=0.4)
x_new = x + alpha * d
g_new = grad(x_new)
gg = g @ g
beta = max(0.0, g_new@(g_new - g) / (gg + 1e-30))
if abs(g_new @ g) > 0.2 * (g_new @ g_new):
beta = 0.0
d = -g_new + beta * d
x = x_new
g = g_new
return x, max_iter
# =============================================================================
# Benchmarking Framework
# =============================================================================
def run_shootout():
"""Run all optimizers on all test functions."""
print("=" * 80)
print("OPTIMIZER SHOOTOUT — Week 3 Benchmark")
print("=" * 80)
problems = [
rosenbrock_2d(),
beale(),
rosenbrock_nd(20),
quadratic_50d(),
styblinski_tang_5d(),
]
methods = [
("GD", opt_gd),
("Newton+LS", opt_newton_ls),
("Trust Reg", opt_trust_region),
("BFGS", opt_bfgs),
("L-BFGS", opt_lbfgs),
("CG (PR+)", opt_cg),
]
for f_func, grad_func, hess_func, x_star, name, ndim in problems:
print(f"\n{'─' * 70}")
print(f"Problem: {name}")
print(f"{'─' * 70}")
print(f" {'Method':<12} {'Iters':>7} {'Error':>10} "
f"{'||g||':>10} {'Time(ms)':>9}")
# Starting point
rng = np.random.RandomState(123)
x0 = rng.uniform(-2, 2, ndim)
for mname, mfunc in methods:
t0 = time.perf_counter()
try:
x_final, iters = mfunc(f_func, grad_func, hess_func,
x0.copy())
elapsed = (time.perf_counter() - t0) * 1000
if iters < 0:
print(f" {mname:<12} {'N/A (no H)':>7}")
continue
err = np.linalg.norm(x_final - x_star)
gnorm = np.linalg.norm(grad_func(x_final))
print(f" {mname:<12} {iters:>7} {err:>10.2e} "
f"{gnorm:>10.2e} {elapsed:>9.1f}")
except Exception as e:
elapsed = (time.perf_counter() - t0) * 1000
print(f" {mname:<12} {'FAIL':>7} {str(e)[:30]}")
def print_summary():
"""Print decision guide."""
print(f"\n{'=' * 70}")
print("DECISION GUIDE")
print(f"{'=' * 70}")
print("""
Small n + Hessian available → Trust Region (Dogleg) or Newton+LS
Medium n + no Hessian → BFGS
Large n + gradients only → L-BFGS (m=10)
Very large n + minimal mem → CG (PR+)
Non-smooth or noisy → GD with momentum (Week 4)
In Ceres:
Default → TRUST_REGION + LEVENBERG_MARQUARDT
Large sparse → TRUST_REGION + ITERATIVE_SCHUR
Smooth trajectory opt → LINE_SEARCH + LBFGS
""")
# =============================================================================
# Main
# =============================================================================
if __name__ == "__main__":
run_shootout()
print_summary()