Day 28: miniopt — A Minimal Optimization Library
"""
Day 28: miniopt — A Minimal Optimization Library
Phase II Capstone, Week 4
Usage:
from miniopt import minimize, OptimizeResult
result = minimize(rosenbrock, x0, method='bfgs', jac=rosenbrock_grad)
Run standalone: python miniopt.py
"""
import numpy as np
from dataclasses import dataclass, field
from typing import Callable, Optional, List, Tuple
# =============================================================================
# Result container
# =============================================================================
@dataclass
class OptimizeResult:
"""Result of an optimization run (mirrors scipy.optimize.OptimizeResult)."""
x: np.ndarray
fun: float
nit: int
nfev: int
ngrad: int
success: bool
message: str
history: List[Tuple[float, np.ndarray]] = field(default_factory=list)
# =============================================================================
# Standard test functions
# =============================================================================
def rosenbrock(x):
"""Rosenbrock: min=0 at (1,...,1)."""
return sum(100 * (x[i+1] - x[i]**2)**2 + (1 - x[i])**2
for i in range(len(x) - 1))
def rosenbrock_grad(x):
n = len(x)
g = np.zeros(n)
for i in range(n - 1):
g[i] += -400 * x[i] * (x[i+1] - x[i]**2) - 2 * (1 - x[i])
g[i+1] += 200 * (x[i+1] - x[i]**2)
return g
def sphere(x):
"""Sphere: min=0 at origin."""
return np.sum(x**2)
def sphere_grad(x):
return 2 * x
def beale(x):
"""Beale (2D only): min=0 at (3, 0.5)."""
return ((1.5 - x[0] + x[0]*x[1])**2 +
(2.25 - x[0] + x[0]*x[1]**2)**2 +
(2.625 - x[0] + x[0]*x[1]**3)**2)
def booth(x):
"""Booth (2D only): min=0 at (1, 3)."""
return (x[0] + 2*x[1] - 7)**2 + (2*x[0] + x[1] - 5)**2
def styblinski_tang(x):
"""Styblinski-Tang: min ≈ -39.166*n at x ≈ -2.903."""
return 0.5 * np.sum(x**4 - 16*x**2 + 5*x)
# =============================================================================
# Utilities
# =============================================================================
def _numerical_gradient(f, x, h=1e-7):
"""Central difference gradient."""
n = len(x)
g = np.zeros(n)
for i in range(n):
xp = x.copy(); xp[i] += h
xm = x.copy(); xm[i] -= h
g[i] = (f(xp) - f(xm)) / (2 * h)
return g
def _backtracking_line_search(f, x, d, g, alpha_init=1.0,
c1=1e-4, rho=0.5, max_ls=40):
"""Backtracking line search satisfying Armijo condition."""
alpha = alpha_init
fx = f(x)
slope = g.dot(d)
for _ in range(max_ls):
if f(x + alpha * d) <= fx + c1 * alpha * slope:
return alpha
alpha *= rho
return alpha
# =============================================================================
# Optimization methods
# =============================================================================
def _gradient_descent(f, x0, jac, max_iter, tol, lr):
"""Gradient descent with backtracking line search."""
x = x0.copy()
nfev, ngrad = 0, 0
history = []
for k in range(max_iter):
fx = f(x); nfev += 1
g = jac(x); ngrad += 1
history.append((fx, x.copy()))
if np.linalg.norm(g) < tol:
return OptimizeResult(x=x, fun=fx, nit=k, nfev=nfev,
ngrad=ngrad, success=True,
message="Converged", history=history)
alpha = _backtracking_line_search(f, x, -g, g)
nfev += 20 # approximate line search evals
x = x - alpha * g
fx = f(x); nfev += 1
return OptimizeResult(x=x, fun=fx, nit=max_iter, nfev=nfev,
ngrad=ngrad, success=False,
message="Max iterations reached", history=history)
def _momentum(f, x0, jac, max_iter, tol, lr, beta=0.9):
"""Heavy ball momentum method."""
x = x0.copy()
v = np.zeros_like(x)
nfev, ngrad = 0, 0
history = []
for k in range(max_iter):
fx = f(x); nfev += 1
g = jac(x); ngrad += 1
history.append((fx, x.copy()))
if np.linalg.norm(g) < tol:
return OptimizeResult(x=x, fun=fx, nit=k, nfev=nfev,
ngrad=ngrad, success=True,
message="Converged", history=history)
v = beta * v - lr * g
x = x + v
fx = f(x); nfev += 1
return OptimizeResult(x=x, fun=fx, nit=max_iter, nfev=nfev,
ngrad=ngrad, success=False,
message="Max iterations reached", history=history)
def _nesterov(f, x0, jac, max_iter, tol, lr, beta=0.9):
"""Nesterov accelerated gradient."""
x = x0.copy()
v = np.zeros_like(x)
nfev, ngrad = 0, 0
history = []
for k in range(max_iter):
fx = f(x); nfev += 1
# Look-ahead gradient
g = jac(x + beta * v); ngrad += 1
nfev += len(x) * 2 # numerical grad cost if applicable
history.append((fx, x.copy()))
if np.linalg.norm(g) < tol:
return OptimizeResult(x=x, fun=fx, nit=k, nfev=nfev,
ngrad=ngrad, success=True,
message="Converged", history=history)
v = beta * v - lr * g
x = x + v
fx = f(x); nfev += 1
return OptimizeResult(x=x, fun=fx, nit=max_iter, nfev=nfev,
ngrad=ngrad, success=False,
message="Max iterations reached", history=history)
def _bfgs(f, x0, jac, max_iter, tol, **kwargs):
"""BFGS quasi-Newton method."""
n = len(x0)
x = x0.copy()
H = np.eye(n)
nfev, ngrad = 0, 0
history = []
g = jac(x); ngrad += 1
for k in range(max_iter):
fx = f(x); nfev += 1
history.append((fx, x.copy()))
if np.linalg.norm(g) < tol:
return OptimizeResult(x=x, fun=fx, nit=k, nfev=nfev,
ngrad=ngrad, success=True,
message="Converged", history=history)
d = -H @ g
alpha = _backtracking_line_search(f, x, d, g)
nfev += 20
s = alpha * d
x_new = x + s
g_new = jac(x_new); ngrad += 1
y = g_new - g
sy = s.dot(y)
if sy > 1e-10:
rho = 1.0 / sy
I = np.eye(n)
V = I - rho * np.outer(s, y)
H = V @ H @ V.T + rho * np.outer(s, s)
x = x_new
g = g_new
fx = f(x); nfev += 1
return OptimizeResult(x=x, fun=fx, nit=max_iter, nfev=nfev,
ngrad=ngrad, success=False,
message="Max iterations reached", history=history)
def _lbfgs(f, x0, jac, max_iter, tol, m=10, **kwargs):
"""L-BFGS with two-loop recursion."""
n = len(x0)
x = x0.copy()
nfev, ngrad = 0, 0
history = []
s_list, y_list, rho_list = [], [], []
g = jac(x); ngrad += 1
for k in range(max_iter):
fx = f(x); nfev += 1
history.append((fx, x.copy()))
if np.linalg.norm(g) < tol:
return OptimizeResult(x=x, fun=fx, nit=k, nfev=nfev,
ngrad=ngrad, success=True,
message="Converged", history=history)
# Two-loop recursion
q = g.copy()
alphas = []
for i in range(len(s_list) - 1, -1, -1):
a = rho_list[i] * s_list[i].dot(q)
alphas.append(a)
q = q - a * y_list[i]
alphas.reverse()
# Initial Hessian approximation
if len(s_list) > 0:
gamma = s_list[-1].dot(y_list[-1]) / y_list[-1].dot(y_list[-1])
r = gamma * q
else:
r = q.copy()
for i in range(len(s_list)):
beta = rho_list[i] * y_list[i].dot(r)
r = r + (alphas[i] - beta) * s_list[i]
d = -r
alpha = _backtracking_line_search(f, x, d, g)
nfev += 20
s = alpha * d
x_new = x + s
g_new = jac(x_new); ngrad += 1
y = g_new - g
sy = s.dot(y)
if sy > 1e-10:
s_list.append(s)
y_list.append(y)
rho_list.append(1.0 / sy)
if len(s_list) > m:
s_list.pop(0)
y_list.pop(0)
rho_list.pop(0)
x = x_new
g = g_new
fx = f(x); nfev += 1
return OptimizeResult(x=x, fun=fx, nit=max_iter, nfev=nfev,
ngrad=ngrad, success=False,
message="Max iterations reached", history=history)
def _adam(f, x0, jac, max_iter, tol, lr=0.001,
beta1=0.9, beta2=0.999, eps=1e-8, **kwargs):
"""Adam optimizer."""
x = x0.copy()
m_vec = np.zeros_like(x)
v_vec = np.zeros_like(x)
nfev, ngrad = 0, 0
history = []
for k in range(1, max_iter + 1):
fx = f(x); nfev += 1
g = jac(x); ngrad += 1
history.append((fx, x.copy()))
if np.linalg.norm(g) < tol:
return OptimizeResult(x=x, fun=fx, nit=k, nfev=nfev,
ngrad=ngrad, success=True,
message="Converged", history=history)
m_vec = beta1 * m_vec + (1 - beta1) * g
v_vec = beta2 * v_vec + (1 - beta2) * g**2
m_hat = m_vec / (1 - beta1**k)
v_hat = v_vec / (1 - beta2**k)
x = x - lr * m_hat / (np.sqrt(v_hat) + eps)
fx = f(x); nfev += 1
return OptimizeResult(x=x, fun=fx, nit=max_iter, nfev=nfev,
ngrad=ngrad, success=False,
message="Max iterations reached", history=history)
# =============================================================================
# Main entry point
# =============================================================================
METHODS = {
'gd': _gradient_descent,
'momentum': _momentum,
'nesterov': _nesterov,
'bfgs': _bfgs,
'lbfgs': _lbfgs,
'adam': _adam,
}
def minimize(fun: Callable, x0: np.ndarray, method: str = 'bfgs',
jac: Optional[Callable] = None,
max_iter: int = 5000, tol: float = 1e-8,
lr: float = 0.01, **kwargs) -> OptimizeResult:
"""
Minimize a scalar function of one or more variables.
Parameters:
fun: Objective function f(x) → float
x0: Initial guess (1D array)
method: One of 'gd', 'momentum', 'nesterov', 'bfgs', 'lbfgs', 'adam'
jac: Gradient function. If None, uses numerical differentiation.
max_iter: Maximum iterations
tol: Gradient norm tolerance for convergence
lr: Learning rate (for gd, momentum, nesterov, adam)
**kwargs: Additional method-specific parameters
Returns:
OptimizeResult with x, fun, nit, nfev, ngrad, success, message, history
"""
x0 = np.asarray(x0, dtype=float).copy()
method_lower = method.lower()
if method_lower not in METHODS:
raise ValueError(
f"Unknown method '{method}'. Choose from: {list(METHODS.keys())}")
# Default to numerical gradient if not provided
if jac is None:
jac = lambda x: _numerical_gradient(fun, x)
solver = METHODS[method_lower]
return solver(fun, x0, jac, max_iter, tol, lr=lr, **kwargs)
# =============================================================================
# Demo
# =============================================================================
if __name__ == "__main__":
print("=" * 60)
print("miniopt — Phase II Capstone Optimization Library")
print("=" * 60)
# Test on Rosenbrock 5D
n = 5
x0 = np.zeros(n)
x_star = np.ones(n)
print(f"\nRosenbrock {n}D — all methods:")
print(f" {'Method':>10} {'f(x*)':>12} {'||x-x*||':>10} "
f"{'nit':>6} {'ok?':>4}")
print(" " + "-" * 55)
configs = [
('gd', {'lr': 0.001, 'max_iter': 20000}),
('momentum', {'lr': 0.0005, 'max_iter': 20000}),
('nesterov', {'lr': 0.0005, 'max_iter': 20000}),
('bfgs', {}),
('lbfgs', {}),
('adam', {'lr': 0.005, 'max_iter': 20000}),
]
for method, opts in configs:
res = minimize(rosenbrock, x0, method=method,
jac=rosenbrock_grad, **opts)
dist = np.linalg.norm(res.x - x_star)
print(f" {method:>10} {res.fun:>12.6e} {dist:>10.6f} "
f"{res.nit:>6} {'✓' if res.success else '✗':>4}")
# Quick tests on other functions
print("\nSphere 10D (BFGS):")
res = minimize(sphere, np.ones(10) * 5, method='bfgs', jac=sphere_grad)
print(f" f={res.fun:.2e}, ||x||={np.linalg.norm(res.x):.2e}, "
f"nit={res.nit}, success={res.success}")
print("\nBooth 2D (L-BFGS):")
res = minimize(booth, np.array([0.0, 0.0]), method='lbfgs')
print(f" f={res.fun:.2e}, x={res.x}, nit={res.nit}")
print("\nAvailable methods:", list(METHODS.keys()))