Concept

where is a scalar line search parameter and is gradient.

Cost

linear convergence rate

Code

def steepest_descent(
    f: Callable[[np.ndarray], float],
    grad_f: Callable[[np.ndarray], np.ndarray],
    x0: np.ndarray,
    alpha: float = 0.01,  # fixed or line search
    max_iter: int = 1000,
    tol: float = 1e-6
) -> Tuple[np.ndarray, float, int]:
    x = x0.copy()
    iterations = 0
    
    for i in range(max_iter):
        g = grad_f(x)
        
        if np.linalg.norm(g) < tol:
            break
            
        # x_{k+1} = x_k - α_k \nabla f(x_k)
        x = x - alpha * g
        iterations += 1
        
    return x, f(x), iterations

line search

import numpy as np
from typing import Callable, Tuple
 
def backtracking_line_search(  
	f: Callable[[np.ndarray], float],  
	grad_f: Callable[[np.ndarray], np.ndarray],  
	x: np.ndarray,  
	d: np.ndarray, # direction, usually - \nabla grad_f 
	alpha_init: float = 1.0,  
	rho: float = 0.5, # back trace factor  
	c: float = 1e-4 # Armijo cond 
) -> float:
	alpha = alpha_init  
	f_x = f(x)  
	grad_dot_d = np.dot(grad_f(x), d) # \nabla f(x)^Td  
	
	# Armijo cond: f(x + α*d) \leqd f(x) + c*α* \nabla f(x)^Td  
	while f(x + alpha * d) > f_x + c * alpha * grad_dot_d:  
		alpha *= rho # backtrace
		 
	return alpha