Day 3: Gradients and Jacobians — Companion Code
⬇ Download gradients_jacobians.py
"""
Day 3: Gradients and Jacobians — Companion Code
Phase I, Week 1
Run: python gradients_jacobians.py
"""
import numpy as np
import sympy as sp
# =============================================================================
# Exercise 1: Symbolic Gradient Verification with SymPy
# =============================================================================
def symbolic_gradient_demo():
"""Compute gradients symbolically and verify."""
x, y = sp.symbols("x y")
# f(x,y) = x²y + sin(xy)
f = x**2 * y + sp.sin(x * y)
grad_f = [sp.diff(f, var) for var in [x, y]]
print("=== Symbolic Gradients ===")
print(f"f(x,y) = {f}")
print(f"∂f/∂x = {grad_f[0]}")
print(f"∂f/∂y = {grad_f[1]}")
# Evaluate at a point
point = {x: 1.0, y: 2.0}
grad_at_point = [g.evalf(subs=point) for g in grad_f]
print(f"\n∇f(1,2) = {[float(g) for g in grad_at_point]}")
# =============================================================================
# Exercise 2: Jacobian of Vector Function
# =============================================================================
def symbolic_jacobian_demo():
"""Compute Jacobian of f: R³ → R³."""
x, y, z = sp.symbols("x y z")
f = sp.Matrix([x**2 + y, y * z, x * z**2])
vars = sp.Matrix([x, y, z])
J = f.jacobian(vars)
print("\n=== Symbolic Jacobian ===")
print(f"f(x,y,z) = {f.T}")
print(f"J =\n{J}")
# Evaluate at point
point = {x: 1.0, y: 2.0, z: 3.0}
J_at_point = J.evalf(subs=point)
print(f"\nJ(1,2,3) =\n{np.array(J_at_point, dtype=float)}")
# =============================================================================
# Exercise 3: Numerical Gradient via Finite Differences
# =============================================================================
def numerical_gradient(f, x: np.ndarray, h: float = 1e-5) -> np.ndarray:
"""
Central difference gradient: ∂f/∂xᵢ ≈ [f(x+h·eᵢ) - f(x-h·eᵢ)] / 2h
"""
n = len(x)
grad = np.zeros(n)
for i in range(n):
e_i = np.zeros(n)
e_i[i] = 1.0
grad[i] = (f(x + h * e_i) - f(x - h * e_i)) / (2 * h)
return grad
def numerical_jacobian(f, x: np.ndarray, h: float = 1e-5) -> np.ndarray:
"""Central difference Jacobian for f: Rⁿ → Rᵐ."""
n = len(x)
f0 = f(x)
m = len(f0)
J = np.zeros((m, n))
for i in range(n):
e_i = np.zeros(n)
e_i[i] = 1.0
J[:, i] = (f(x + h * e_i) - f(x - h * e_i)) / (2 * h)
return J
# =============================================================================
# Exercise 4: Error Analysis — numerical vs analytic
# =============================================================================
def gradient_error_analysis():
"""Plot numerical gradient error vs step size h."""
# Test function: Rosenbrock
def f(x):
return (1 - x[0])**2 + 100 * (x[1] - x[0]**2)**2
def grad_f_analytic(x):
dfdx = -2 * (1 - x[0]) + 100 * 2 * (x[1] - x[0]**2) * (-2 * x[0])
dfdy = 100 * 2 * (x[1] - x[0]**2)
return np.array([dfdx, dfdy])
x0 = np.array([0.5, 0.5])
g_analytic = grad_f_analytic(x0)
h_values = np.logspace(-15, 0, 100)
errors_forward = []
errors_central = []
for h in h_values:
# Forward difference
g_fwd = np.zeros(2)
for i in range(2):
e_i = np.zeros(2)
e_i[i] = 1.0
g_fwd[i] = (f(x0 + h * e_i) - f(x0)) / h
# Central difference
g_central = numerical_gradient(f, x0, h)
errors_forward.append(np.linalg.norm(g_fwd - g_analytic))
errors_central.append(np.linalg.norm(g_central - g_analytic))
print("\n=== Gradient Error Analysis ===")
print("Step size h | Forward Error | Central Error")
print("-" * 50)
for h, ef, ec in zip(
[1e-1, 1e-3, 1e-5, 1e-8, 1e-10, 1e-13],
[errors_forward[i] for i in [90, 70, 50, 30, 20, 5]],
[errors_central[i] for i in [90, 70, 50, 30, 20, 5]]
):
print(f" h={h:.0e} | {ef:.2e} | {ec:.2e}")
# Optimal h analysis
best_central = min(errors_central)
best_h_idx = errors_central.index(best_central)
print(f"\nBest central difference error: {best_central:.2e} at h={h_values[best_h_idx]:.2e}")
print("Theory: optimal h for central diff ≈ ε^(1/3) ≈ 6e-6 for float64")
# =============================================================================
# Gradient Checker Utility
# =============================================================================
def gradient_check(f, grad_f, x: np.ndarray, h: float = 1e-5, tol: float = 1e-5
) -> bool:
"""
Check analytic gradient against numerical gradient.
Returns True if they agree within tolerance.
"""
g_analytic = grad_f(x)
g_numerical = numerical_gradient(f, x, h)
# Relative error with safety denominator
eps = 1e-7
rel_errors = np.abs(g_analytic - g_numerical) / (np.abs(g_analytic) + np.abs(g_numerical) + eps)
max_error = np.max(rel_errors)
if max_error > tol:
print(f" ❌ Gradient check FAILED: max relative error = {max_error:.2e}")
print(f" Analytic: {g_analytic}")
print(f" Numerical: {g_numerical}")
return False
else:
print(f" ✓ Gradient check passed: max relative error = {max_error:.2e}")
return True
if __name__ == "__main__":
symbolic_gradient_demo()
symbolic_jacobian_demo()
gradient_error_analysis()
print("\n=== Gradient Checker Demo ===")
# Rosenbrock
def rosenbrock(x):
return (1 - x[0])**2 + 100 * (x[1] - x[0]**2)**2
def rosenbrock_grad(x):
dfdx = -2 * (1 - x[0]) - 400 * x[0] * (x[1] - x[0]**2)
dfdy = 200 * (x[1] - x[0]**2)
return np.array([dfdx, dfdy])
for pt in [np.array([0.5, 0.5]), np.array([1.0, 1.0]), np.array([-1.2, 1.0])]:
print(f" At x={pt}:")
gradient_check(rosenbrock, rosenbrock_grad, pt)