Trust Region

Day 18: Trust Region Methods — Companion Code
"""
Day 18: Trust Region Methods — Companion Code
Phase II, Week 3

Run: python trust_region.py
"""

import numpy as np


# =============================================================================
# Test Functions
# =============================================================================

def rosenbrock_fgh(x):
    """Rosenbrock: f, gradient, Hessian."""
    f = (1 - x[0])**2 + 100 * (x[1] - x[0]**2)**2
    g = np.array([
        -2*(1 - x[0]) + 100 * 2*(x[1] - x[0]**2) * (-2*x[0]),
        100 * 2*(x[1] - x[0]**2)
    ])
    H = np.array([
        [2 + 100*(12*x[0]**2 - 4*x[1]), -400*x[0]],
        [-400*x[0], 200]
    ])
    return f, g, H


def saddle_fgh(x):
    """f = x^2 - y^2 — saddle point at origin."""
    f = x[0]**2 - x[1]**2
    g = np.array([2*x[0], -2*x[1]])
    H = np.array([[2.0, 0.0], [0.0, -2.0]])
    return f, g, H


# =============================================================================
# Exercise 1: Cauchy Point Trust Region
# =============================================================================

def cauchy_point(g, H, delta):
    """Compute the Cauchy point within trust region ||p|| <= delta."""
    gnorm = np.linalg.norm(g)
    if gnorm < 1e-16:
        return np.zeros_like(g)

    gHg = g @ H @ g

    if gHg > 0:
        tau = min(1.0, gnorm**3 / (delta * gHg))
    else:
        tau = 1.0

    p_c = -tau * (delta / gnorm) * g
    return p_c


def trust_region_cauchy(fgh, x0, delta0=1.0, max_iter=500, tol=1e-10):
    """Trust region method using Cauchy point only."""
    x = x0.copy().astype(float)
    delta = delta0
    history = {"f": [], "grad_norm": [], "delta": [], "rho": []}

    for k in range(max_iter):
        f, g, H = fgh(x)
        gnorm = np.linalg.norm(g)
        history["f"].append(f)
        history["grad_norm"].append(gnorm)
        history["delta"].append(delta)

        if gnorm < tol:
            break

        # Cauchy point
        p = cauchy_point(g, H, delta)

        # Evaluate actual vs predicted reduction
        f_new, _, _ = fgh(x + p)
        actual_red = f - f_new
        predicted_red = -(g @ p + 0.5 * p @ H @ p)

        if predicted_red < 1e-16:
            rho = 0.0
        else:
            rho = actual_red / predicted_red

        history["rho"].append(rho)

        # Update x
        if rho > 0.01:
            x = x + p

        # Update radius
        if rho < 0.25:
            delta *= 0.25
        elif rho > 0.75 and abs(np.linalg.norm(p) - delta) < 1e-10:
            delta = min(2 * delta, 100.0)

    return x, history


# =============================================================================
# Exercise 2: Dogleg Trust Region
# =============================================================================

def dogleg_step(g, H, delta):
    """Compute the dogleg step within trust region ||p|| <= delta."""
    gnorm = np.linalg.norm(g)
    if gnorm < 1e-16:
        return np.zeros_like(g)

    # Full Newton step
    try:
        eigvals = np.linalg.eigvalsh(H)
        if np.min(eigvals) > 1e-10:
            p_n = np.linalg.solve(H, -g)
        else:
            # Hessian not PD — use regularized
            mu = max(0, -np.min(eigvals) + 1e-4)
            p_n = np.linalg.solve(H + mu * np.eye(len(g)), -g)
    except np.linalg.LinAlgError:
        p_n = -g  # fallback to gradient

    # If Newton step is inside trust region, use it
    if np.linalg.norm(p_n) <= delta:
        return p_n

    # Cauchy point (steepest descent direction scaled)
    gHg = g @ H @ g
    if gHg > 0:
        alpha_sd = gnorm**2 / gHg
    else:
        alpha_sd = delta / gnorm
    p_c = -alpha_sd * g

    # If Cauchy point is outside trust region, scale gradient to boundary
    if np.linalg.norm(p_c) >= delta:
        return -delta / gnorm * g

    # Dogleg: find tau such that ||p_c + tau*(p_n - p_c)|| = delta
    diff = p_n - p_c
    a = diff @ diff
    b = 2 * p_c @ diff
    c = p_c @ p_c - delta**2
    discriminant = b**2 - 4*a*c

    if discriminant < 0 or a < 1e-16:
        return -delta / gnorm * g

    tau = (-b + np.sqrt(discriminant)) / (2 * a)
    tau = np.clip(tau, 0.0, 1.0)

    return p_c + tau * diff


def trust_region_dogleg(fgh, x0, delta0=1.0, max_iter=500, tol=1e-10):
    """Trust region method with dogleg step."""
    x = x0.copy().astype(float)
    delta = delta0
    history = {"f": [], "grad_norm": [], "delta": [], "rho": []}

    for k in range(max_iter):
        f, g, H = fgh(x)
        gnorm = np.linalg.norm(g)
        history["f"].append(f)
        history["grad_norm"].append(gnorm)
        history["delta"].append(delta)

        if gnorm < tol:
            break

        # Dogleg step
        p = dogleg_step(g, H, delta)

        # Evaluate ratio
        f_new, _, _ = fgh(x + p)
        actual_red = f - f_new
        predicted_red = -(g @ p + 0.5 * p @ H @ p)

        if predicted_red < 1e-16:
            rho = 0.0
        else:
            rho = actual_red / predicted_red

        history["rho"].append(rho)

        # Accept/reject
        if rho > 0.01:
            x = x + p

        # Radius update
        if rho < 0.25:
            delta *= 0.25
        elif rho > 0.75 and abs(np.linalg.norm(p) - delta) < 1e-8 * delta:
            delta = min(2 * delta, 100.0)

    return x, history


# =============================================================================
# Exercise 3: Comparison with Line Search Newton
# =============================================================================

def newton_line_search(fgh, x0, max_iter=200, tol=1e-10):
    """Newton with backtracking for comparison."""
    x = x0.copy().astype(float)
    history = {"f": [], "grad_norm": []}

    for k in range(max_iter):
        f, g, H = fgh(x)
        gnorm = np.linalg.norm(g)
        history["f"].append(f)
        history["grad_norm"].append(gnorm)

        if gnorm < tol:
            break

        # Modified Newton direction
        eigvals = np.linalg.eigvalsh(H)
        mu = max(0, -np.min(eigvals) + 1e-4)
        delta_n = np.linalg.solve(H + mu * np.eye(len(g)), -g)

        # Backtracking
        alpha = 1.0
        for _ in range(50):
            f_new, _, _ = fgh(x + alpha * delta_n)
            if f_new <= f + 1e-4 * alpha * (g @ delta_n):
                break
            alpha *= 0.5

        x = x + alpha * delta_n

    return x, history


def compare_methods():
    """Compare Cauchy, Dogleg, and Line Search Newton."""
    print("=== Exercise 3: Method Comparison on Rosenbrock ===")
    x_star = np.array([1.0, 1.0])

    starts = [
        np.array([-1.0, 1.0]),
        np.array([5.0, -5.0]),
        np.array([-3.0, 3.0]),
    ]

    for x0 in starts:
        x_c, h_c = trust_region_cauchy(rosenbrock_fgh, x0, delta0=1.0)
        x_d, h_d = trust_region_dogleg(rosenbrock_fgh, x0, delta0=1.0)
        x_n, h_n = newton_line_search(rosenbrock_fgh, x0)

        print(f"\n  x0 = [{x0[0]:>5.1f}, {x0[1]:>5.1f}]:")
        print(f"    Cauchy TR:  {len(h_c['f']):>5} iters, "
              f"err = {np.linalg.norm(x_c - x_star):.2e}")
        print(f"    Dogleg TR:  {len(h_d['f']):>5} iters, "
              f"err = {np.linalg.norm(x_d - x_star):.2e}")
        print(f"    Newton LS:  {len(h_n['f']):>5} iters, "
              f"err = {np.linalg.norm(x_n - x_star):.2e}")


# =============================================================================
# Exercise 4: Saddle Point Handling
# =============================================================================

def test_saddle_point():
    """Trust region navigates away from saddle point."""
    print("\n=== Exercise 4: Saddle Point f = x² - y² ===")
    x0 = np.array([0.1, 0.1])

    x_tr, h_tr = trust_region_dogleg(saddle_fgh, x0, delta0=0.5, max_iter=100)
    print(f"  Dogleg TR from {x0}: converged to [{x_tr[0]:.4f}, {x_tr[1]:.4f}], "
          f"f = {saddle_fgh(x_tr)[0]:.4f}")
    print(f"  (Should move toward y → ±∞ since f = x² - y² is unbounded below)")


# =============================================================================
# Exercise 5: Radius Adaptation Visualization (text)
# =============================================================================

def visualize_radius_adaptation():
    """Show how trust region radius evolves."""
    print("\n=== Exercise 5: Radius Adaptation ===")
    x0 = np.array([-2.0, 2.0])

    _, hist = trust_region_dogleg(rosenbrock_fgh, x0, delta0=0.5, max_iter=100)

    print(f"  {'Iter':>4}  {'f':>12}  {'||g||':>10}  {'Δ':>10}  {'ρ':>8}")
    n_show = min(20, len(hist["f"]))
    for i in range(n_show):
        rho = hist["rho"][i] if i < len(hist["rho"]) else float('nan')
        print(f"  {i:>4}  {hist['f'][i]:>12.4f}  "
              f"{hist['grad_norm'][i]:>10.2e}  "
              f"{hist['delta'][i]:>10.4f}  {rho:>8.3f}")


# =============================================================================
# Main
# =============================================================================

if __name__ == "__main__":
    compare_methods()
    test_saddle_point()
    visualize_radius_adaptation()