我正在尝试使用带有Broyden-Fletcher-Goldfarb-Shanno (BFGS) 算法的准牛顿法找到函数的最小值。
但我想更改以下实现,以便:
1) Wolfe-Powell 步长控制用于使用拟牛顿法确定函数的最小值。
2) Levenberg-Marquardt 方法用于保证Hk(Hessian) 保持正定。
我怎样才能做到这一点?我必须更改代码的哪一部分?我不明白。
这是使用 BFGS 的准牛顿的工作实现:
import numpy as np
import scipy.optimize, scipy.linalg
from scipy.optimize import minimize
##
## Finite difference quotient for vectors
##
def finite_differences(f, x, h = 1e-5):
g = np.array([0.0]*len(x))
x_tmp = np.array([0.0]*len(x))
for i in range(len(x)):
np.copyto(x_tmp, x)
x_tmp[i] += h
g[i] = ( f(x) - f(x_tmp) ) / h
return g
##
## limit some quotient
##
def ensure_quotient(q, eps):
if q >= 0:
q = max(q, eps)
else:
q = min(q, -eps)
return q
##
## DFP Update of the Inverse
## M_{k+1} = M_k + \alpha_k \frac{d_k d_k^T}{\delta g_k^T d_k} - \frac{p_k \delta g_k^T}{p_k^T\delta g_k^T} M_k
##
def Update_Inverse_DFP(M_k, alpha_k, d_k, dg_k):
##
## get difference of search directions for conjugating
##
p_k = M_k.dot(dg_k)
##
## get quotients and limit them so that there is no devision by zero
##
q_1 = np.inner(d_k, dg_k)
q_1 = ensure_quotient(q_1, 1e-8)
q_2 = np.inner(p_k, dg_k)
q_2 = ensure_quotient(q_2, 1e-8)
##
## calculate update
##
M_new = M_k + alpha_k * np.outer(d_k, d_k) / q_1 - np.outer(p_k, dg_k).dot(M_k) / q_2
return M_new
##
## BFGS Update of the Inverse
##
def Update_Inverse_BFGS(M_k, dx_k, dg_k):
##
## get quotients and limit them so that there is no devision by zero
##
q = np.inner(dx_k, dg_k)
q = ensure_quotient(q, 1e-98)
##
## get difference of search directions for conjugating
##
U = np.outer(dx_k, dg_k)
##
## Identity
##
assert U.shape[0] == U.shape[1], "Matrix dimension mixed up!"
I = np.identity(np.shape(U)[0])
##
## calculate update
##
M_new = ( I - U/q ).dot(M_k).dot( I - U.T/q ) + np.outer(dx_k, dx_k) / q
return M_new
##
## perform a step of a quasi newton's method
##
def Quasi_Newton_Step(f, x_k, g_k, M_k, h = 1e-6, grad=lambda x: finite_differences(f, x), Update_Inverse = Update_Inverse_BFGS):
##
## determine direction
## d_k = - M_k g_k
##
d_k = -M_k.dot(g_k)
##
## do golden rationline search to find an optimal alpha
## \alpha_k = argmin_\alpha \{ f(x_k + \alpha d_k) \}
##
line = lambda alpha: f( x_k + alpha * d_k )
alpha_k = scipy.optimize.golden( line )
##
## get update x
## x_{k+1} = x_k + \alpha_k d_k
##
x_new = x_k + alpha_k * d_k
##
## get gradient at new position
## g_{k+1} = \nabla f(x_{k+1})
##
g_new = grad(x_new)
##
## get difference of gradients
##
dg_k = g_new - g_k
##
## get difference of values
##
dx_k = x_new - x_k
##
## get new M matrix
##
M_new = Update_Inverse(M_k, dx_k, dg_k)
return([x_new, g_new, M_new])
##
## Break condition for newton method
##
def Break_Condition(f, x_new, x_old, eps):
return sum(abs(x_new-x_old))**2 < eps and abs(f(x_new) - f(x_old)) < eps
##
## Quasi Newton algorithm
##
## TODO: This implementation is very memory consuming because it keeps track of all iterates
## TODO: The precision is inherently dependent on the differencing step with h. What is the best value for that? eps?
def Quasi_Newton(f, x_0, max_iter = 100, h = 1e-6, eps = 1e-8, method = "BFGS"):
if method == "BFGS":
Update_Inverse = Update_Inverse_BFGS
elif method == "DFP":
Update_Inverse = Update_Inverse_DFP
else:
print('ERROR! Unknown Method: {}'.format(method))
return ["-", "-"], 0
##
## store all intermediate values in lists
##
x, g, M = [], [], []
##
## starting configuration
##
g_0 = finite_differences(f, x_0)
M_0 = np.identity(len(g_0))
##
## append to lists
##
x.append(x_0)
g.append(g_0)
M.append(M_0)
##
## start iterations
##
for k in range(max_iter):
##
## perform one quasi newton update step
##
[ x_new, g_new, M_new ] = Quasi_Newton_Step(f, x[-1], g[-1], M[-1], h = h, Update_Inverse = Update_Inverse)
##
## append results of step to lists
##
x.append(x_new)
g.append(g_new)
M.append(M_new)
##
## break condition
##
if Break_Condition(f, x[-1], x[-2], eps):
print('Converged after {} iterations'.format(k+1))
break
if not Break_Condition(f, x[-1], x[-2], eps):
print('Diverged after {}!'.format(max_iter))
return x[-1], f(x[-1])
测试实施:
def himmelblau(x, y):
return (x**2 + y - 11)**2 + (x + y**2 - 7)**2
f = lambda x: himmelblau(x[0], x[1])
# Test with Himmelblau function
x_0 = np.array([6.0,0.0])
print("Starting Point:", x_0)
x_opt, f_opt = Quasi_Newton(himmel_blau, x_0, max_iter = 100, h = 1e-6, eps = 1e-6 )
print('Optimal value {} found at: {}'.format(f_opt, list(x_opt)))