Concept
where is a scalar line search parameter and is gradient.
Cost
linear convergence rate
Code
def steepest_descent(
f: Callable[[np.ndarray], float],
grad_f: Callable[[np.ndarray], np.ndarray],
x0: np.ndarray,
alpha: float = 0.01, # fixed or line search
max_iter: int = 1000,
tol: float = 1e-6
) -> Tuple[np.ndarray, float, int]:
x = x0.copy()
iterations = 0
for i in range(max_iter):
g = grad_f(x)
if np.linalg.norm(g) < tol:
break
# x_{k+1} = x_k - α_k \nabla f(x_k)
x = x - alpha * g
iterations += 1
return x, f(x), iterations
line search
import numpy as np
from typing import Callable, Tuple
def backtracking_line_search(
f: Callable[[np.ndarray], float],
grad_f: Callable[[np.ndarray], np.ndarray],
x: np.ndarray,
d: np.ndarray, # direction, usually - \nabla grad_f
alpha_init: float = 1.0,
rho: float = 0.5, # back trace factor
c: float = 1e-4 # Armijo cond
) -> float:
alpha = alpha_init
f_x = f(x)
grad_dot_d = np.dot(grad_f(x), d) # \nabla f(x)^Td
# Armijo cond: f(x + α*d) \leqd f(x) + c*α* \nabla f(x)^Td
while f(x + alpha * d) > f_x + c * alpha * grad_dot_d:
alpha *= rho # backtrace
return alpha