Newtons Method

Day 17: Newton's Method — Companion Code
"""
Day 17: Newton's Method — Companion Code
Phase II, Week 3

Run: python newtons_method.py
"""

import numpy as np


# =============================================================================
# Test Functions
# =============================================================================

def rosenbrock_fgh(x):
    """Rosenbrock: f, gradient, Hessian."""
    f = (1 - x[0])**2 + 100 * (x[1] - x[0]**2)**2

    g = np.array([
        -2*(1 - x[0]) + 100 * 2*(x[1] - x[0]**2) * (-2*x[0]),
        100 * 2*(x[1] - x[0]**2)
    ])

    H = np.array([
        [2 + 100*(12*x[0]**2 - 4*x[1]), -400*x[0]],
        [-400*x[0], 200]
    ])

    return f, g, H


def quadratic_fgh(A, b=None):
    """Quadratic f = 0.5 x^T A x - b^T x: returns (f_func, g_func, H_func)."""
    n = A.shape[0]
    if b is None:
        b = np.zeros(n)

    def f(x): return 0.5 * x @ A @ x - b @ x
    def g(x): return A @ x - b
    def H(x): return A

    x_star = np.linalg.solve(A, b)
    return f, g, H, x_star


# =============================================================================
# Exercise 1: Pure Newton's Method
# =============================================================================

def newtons_method(fgh, x0, max_iter=100, tol=1e-12):
    """Pure Newton's method.

    fgh: callable returning (f, grad, hessian).
    """
    x = x0.copy().astype(float)
    history = {"f": [], "grad_norm": [], "x": [], "error": []}

    for k in range(max_iter):
        f, g, H = fgh(x)
        gnorm = np.linalg.norm(g)

        history["f"].append(f)
        history["grad_norm"].append(gnorm)
        history["x"].append(x.copy())

        if gnorm < tol:
            break

        # Newton step: solve H δ = -g
        try:
            delta = np.linalg.solve(H, -g)
        except np.linalg.LinAlgError:
            print(f"  Singular Hessian at iteration {k}")
            break

        x = x + delta

    return x, history


# =============================================================================
# Exercise 2: Newton on Rosenbrock
# =============================================================================

def test_rosenbrock():
    """Compare Newton vs GD on Rosenbrock."""
    print("=== Exercise 2: Newton vs GD on Rosenbrock ===")
    x0 = np.array([-1.0, 1.0])
    x_star = np.array([1.0, 1.0])

    # Newton
    x_newton, hist_newton = newtons_method(rosenbrock_fgh, x0, max_iter=100)
    iters_n = len(hist_newton["f"])
    err_n = np.linalg.norm(x_newton - x_star)

    # GD with backtracking for comparison
    x_gd, hist_gd = gd_backtracking(x0, max_iter=50000)
    iters_gd = len(hist_gd["f"])
    err_gd = np.linalg.norm(x_gd - x_star)

    print(f"  Newton:  {iters_n:>6} iters, error = {err_n:.2e}")
    print(f"  GD:      {iters_gd:>6} iters, error = {err_gd:.2e}")
    print(f"  Speedup: {iters_gd / max(iters_n, 1):.0f}×")


def gd_backtracking(x0, max_iter=50000, tol=1e-8):
    """Simple GD for comparison."""
    x = x0.copy()
    history = {"f": [], "x": []}

    for k in range(max_iter):
        f, g, _ = rosenbrock_fgh(x)
        history["f"].append(f)
        history["x"].append(x.copy())

        gnorm = np.linalg.norm(g)
        if gnorm < tol:
            break

        # Backtracking
        alpha = 1.0
        d = -g
        for _ in range(50):
            f_new, _, _ = rosenbrock_fgh(x + alpha * d)
            if f_new <= f + 1e-4 * alpha * (g @ d):
                break
            alpha *= 0.5

        x = x + alpha * d

    return x, history


# =============================================================================
# Exercise 3: Quadratic Convergence Verification
# =============================================================================

def verify_quadratic_convergence():
    """Show error_k+1 ≈ C * error_k^2."""
    print("\n=== Exercise 3: Quadratic Convergence ===")
    x0 = np.array([2.0, -3.0])
    x_star = np.array([1.0, 1.0])

    _, hist = newtons_method(rosenbrock_fgh, x0, max_iter=30)

    errors = [np.linalg.norm(np.array(xi) - x_star)
              for xi in hist["x"]]

    print(f"  {'k':>3}  {'||e_k||':>12}  {'||e_{k+1}||/||e_k||^2':>22}")
    for k in range(len(errors) - 1):
        if errors[k] > 1e-15 and errors[k+1] > 0:
            ratio = errors[k+1] / errors[k]**2 if errors[k] > 1e-12 else float('inf')
            print(f"  {k:>3}  {errors[k]:>12.4e}  {ratio:>22.4f}")


# =============================================================================
# Exercise 4: Failure Case — Indefinite Hessian
# =============================================================================

def test_failure_case():
    """Newton fails when started far from minimum (indefinite H)."""
    print("\n=== Exercise 4: Failure Case ===")

    # f(x) = x^4/4 - x^2/2 (non-convex, minima at ±1, saddle at 0)
    def fgh_quartic(x):
        x1 = x[0]
        f = x1**4/4 - x1**2/2
        g = np.array([x1**3 - x1])
        H = np.array([[3*x1**2 - 1]])
        return f, g, H

    x0 = np.array([0.4])  # Hessian = 3*0.16 - 1 = -0.52 < 0 (indefinite!)
    _, g0, H0 = fgh_quartic(x0)
    print(f"  At x0 = {x0[0]}: H = {H0[0,0]:.2f} ({'PD' if H0[0,0] > 0 else 'INDEFINITE'})")

    x, hist = newtons_method(fgh_quartic, x0, max_iter=50)
    print(f"  Pure Newton converges to: x = {x[0]:.6f} (saddle at 0!)")

    # Damped Newton
    x_d, hist_d = damped_newton(fgh_quartic, x0, max_iter=50)
    print(f"  Damped Newton converges to: x = {x_d[0]:.6f} (minimum at ±1)")


# =============================================================================
# Exercise 5: Modified Newton (H + μI)
# =============================================================================

def modified_newton(fgh, x0, max_iter=100, tol=1e-12, mu_init=1e-3):
    """Newton with H + μI regularization for indefinite Hessians."""
    x = x0.copy().astype(float)
    history = {"f": [], "grad_norm": [], "x": []}

    for k in range(max_iter):
        f, g, H = fgh(x)
        gnorm = np.linalg.norm(g)
        history["f"].append(f)
        history["grad_norm"].append(gnorm)
        history["x"].append(x.copy())

        if gnorm < tol:
            break

        # Ensure H + μI is PD
        eigvals = np.linalg.eigvalsh(H)
        min_eig = np.min(eigvals)
        mu = max(0, -min_eig + mu_init)

        H_reg = H + mu * np.eye(len(x))
        delta = np.linalg.solve(H_reg, -g)
        x = x + delta

    return x, history


def damped_newton(fgh, x0, max_iter=100, tol=1e-12):
    """Newton with line search for global convergence."""
    x = x0.copy().astype(float)
    history = {"f": [], "grad_norm": [], "x": []}

    for k in range(max_iter):
        f, g, H = fgh(x)
        gnorm = np.linalg.norm(g)
        history["f"].append(f)
        history["grad_norm"].append(gnorm)
        history["x"].append(x.copy())

        if gnorm < tol:
            break

        # Modified Newton direction
        eigvals = np.linalg.eigvalsh(H)
        min_eig = np.min(eigvals)
        mu = max(0, -min_eig + 1e-4)
        H_reg = H + mu * np.eye(len(x))
        delta = np.linalg.solve(H_reg, -g)

        # Backtracking line search
        alpha = 1.0
        for _ in range(50):
            f_new, _, _ = fgh(x + alpha * delta)
            if f_new <= f + 1e-4 * alpha * (g @ delta):
                break
            alpha *= 0.5

        x = x + alpha * delta

    return x, history


def test_modified_newton():
    """Modified Newton on Rosenbrock from a bad start."""
    print("\n=== Exercise 5: Modified Newton ===")
    x0 = np.array([-5.0, 5.0])
    x_star = np.array([1.0, 1.0])

    # Pure Newton may diverge from far away
    x_pure, hist_pure = newtons_method(rosenbrock_fgh, x0, max_iter=100)
    err_pure = np.linalg.norm(x_pure - x_star)

    # Modified Newton
    x_mod, hist_mod = modified_newton(rosenbrock_fgh, x0, max_iter=100)
    err_mod = np.linalg.norm(x_mod - x_star)

    # Damped Newton (with line search)
    x_damp, hist_damp = damped_newton(rosenbrock_fgh, x0, max_iter=100)
    err_damp = np.linalg.norm(x_damp - x_star)

    print(f"  From x0 = {x0}:")
    print(f"  Pure Newton:     {len(hist_pure['f']):>4} iters, err = {err_pure:.2e}")
    print(f"  Modified (H+μI): {len(hist_mod['f']):>4} iters, err = {err_mod:.2e}")
    print(f"  Damped (LS):     {len(hist_damp['f']):>4} iters, err = {err_damp:.2e}")


# =============================================================================
# Main
# =============================================================================

if __name__ == "__main__":
    test_rosenbrock()
    verify_quadratic_convergence()
    test_failure_case()
    test_modified_newton()