为什么这个 Python 代码比使用 F2PY 的 Fortran Wrapper 更快?

计算科学 Python 正则
2021-12-07 14:51:06

我正在编写牛顿方法的概念验证实现,用于最小化逻辑回归模型中的负对数似然项。我正在比较本地 python 实现和我通过 f2py 链接的 fortran 实现的性能。令人惊讶的是,Fortran 版本比 Python 版本慢。有谁知道为什么会这样,或者对如何提高 Fortran 代码的性能有任何建议?

这是蟒蛇

import numpy as np
import scipy.linalg as la

def logistic(z):
    return np.exp(z)/(1+np.exp(z))

def f(X, y, beta):
    logist = logistic(X@beta)
    return (-y*np.log(logist) - (1-y)*np.log(1-logist)).mean()

def gradf(X, y, beta):
    logist = logistic(X@beta)
    return ((logist-y)[:,None]*X).mean(axis=0)

def hessf(X, y, beta):
    logist = logistic(X@beta)
    D = np.diag(logist*(1-logist))
    return X.T @ D @ X/X.shape[0]

def Newtons_logreg(X, y, beta_init, max_iter=10000, tol=1e-4):
    beta = beta_init.copy()
    i = 0
    while i < max_iter:
        #g = np.linalg.inv(hessf(X, y, beta)) @ gradf(X, y, beta) #compute inverse
        #g = np.linalg.solve(hessf(X, y, beta), gradf(X, y, beta)) #general system solve
        g = la.solve(hessf(X, y, beta), gradf(X, y, beta), assume_a='pos')
        beta -= g
        if np.linalg.norm(g) < tol:
            return beta
    Warning("Convergence not reached. Increase iterations or decrease tolerance")
    return beta

Fortran

! FILE: FORTRAN_VERSION.F
! Compile with f2py -c -m fortran_v fortran_version.f90
    SUBROUTINE logistic(Z, N, W)
!
!   Compute the logistic function of an array Z of length N
!
    implicit none
    INTEGER N
    REAL*8 Z(N)
    REAL*8 W(N)
!f2py intent(in) z
!f2py integer intent(hide),depend(z) :: n=shape(z,0)
!f2py intent(out) w
    W = EXP(Z)/(1D0+EXP(Z))
    END


    SUBROUTINE f(X, y, beta, N, M, L)
!
!   Compute the logistic regression likelihood w/ design matrix X,
!   response vector y and coefficient vector beta
!
    implicit none
    INTEGER N,M
    REAL*8 S(N), y(N), X(N,M), beta(M), L
!f2py intent(in) X,y,beta
!f2py integer intent(hide),depend(X) :: n=shape(X,0), m = shape(X,1)
!f2py intent(out) L
    CALL logistic (MATMUL(X, RESHAPE(beta, (/M,1/))), N, S)
    L = SUM((-y*LOG(S) - (1-y)*LOG(1D0-S))/FLOAT(N))
    END



    SUBROUTINE gradf(X, y, beta, N, M, G)
!
!   Compute the gradient of the logistic regression likelihood f
!
    implicit none
    INTEGER N,M
    REAL*8 S(N), y(N), X(N,M), beta(M), G(M)
!f2py intent(in) X,y,beta
!f2py integer intent(hide),depend(X) :: n=shape(X,0), m = shape(X,1)
!f2py intent(out) G
    CALL logistic (MATMUL(X, RESHAPE(beta, (/M,1/))), N, S)
    !may be able to use a ptr instead
    G = RESHAPE(MATMUL(RESHAPE(S-y, (/1,N/))/float(N), X), (/M/)) 
    END



    SUBROUTINE hessf(X, beta, N, M, H)
!
!   Compute the Hessian of the logistic regression likelihood f
!
    implicit none
    INTEGER N,M
    REAL*8 S(N), X(N,M), beta(M), H(M,M)
!f2py intent(in) X,beta
!f2py integer intent(hide),depend(X) :: n=shape(X,0), m = shape(X,1)
!f2py intent(out) H
    CALL logistic (MATMUL(X, RESHAPE(beta, (/M,1/))), N, S) 
    H = MATMUL(TRANSPOSE(X), SPREAD(S*(1D0-S)/float(N), 2, M)*X)
    END


    SUBROUTINE Newtons_logreg(X, y, beta_i, max_iter, tol, N, M) 
!
!   Compute the maximum likelihood estimate of logistic regression w/ design
!   matrix X and response variable y
!
    implicit none
    INTEGER N,M,it,max_iter,O
    REAL*8 y(N), X(N,M), tol
    REAL*8 beta_i(M)
    REAL*8 H(M,M), G(M)
!f2py intent(in) X,y,beta_i,max_iter,tol
!f2py integer intent(hide),depend(X) :: n=shape(X,0), m = shape(X,1)
!f2py intent(out) beta_i
    external DPOSV !double precision symmetric positive definite linear solve
    !external DSYSV !double precision symmetric linear solve
    !external DGELSV
    it=0
    DO WHILE ( it .LT. max_iter)
        call hessf(X, beta_i, N, M, H)
        call gradf(X, y, beta_i, N, M, G)
        call DPOSV('U', M, 1, H, M, G, M, O)
        beta_i = beta_i - G
        IF (NORM2(G) .LT. tol) RETURN
        it = it + 1
    ENDDO
    END    
        
! END OF FILE FORTRAN_VERSION.F

测试脚本

import numpy as np
import sys
sys.path.append("./")
import fortran_v
import python_version
import time

def run_both_tests(n, p, max_iter=10000, tol=1e-4, runs=1000):
    fortran_times = np.zeros(runs)
    fortran_errs = np.zeros(runs)
    python_times = np.zeros(runs)
    python_errs = np.zeros(runs)
    for i in range(runs):
        np.random.seed(i)
        X = np.random.normal(size=(n,p))
        beta = np.random.normal(size=p)
        y = np.random.binomial(n=1, p=python_version.logistic(X@beta), size=n)
        start = time.time()
        beta_init = np.zeros(p)
        python_errs[i] = np.linalg.norm(python_version.Newtons_logreg(X, y, beta_init, max_iter, tol))
        python_times[i] = time.time()-start
        start = time.time()
        beta_init = np.zeros(p)
        fortran_errs[i] = np.linalg.norm(fortran_v.newtons_logreg(X, y, beta_init, max_iter, tol))
        fortran_times[i] = time.time()-start
    print("Mean/Std of Python Runs: {} +/- {}".format(python_times.mean(), python_times.std()))
    print("Mean/Std of Fortran Runs: {} +/- {}".format(fortran_times.mean(), fortran_times.std()))
    print("Mean/Std of Differences in Errors: {} +/- {}".format(np.abs(python_errs-fortran_errs).mean(), np.abs(python_errs-fortran_errs).std()))

及其输出

In [4]: test_script.run_both_tests(50000, 500, runs=10)
Mean/Std of Python Runs: 127.61297235488891 +/- 2.759745817424147
Mean/Std of Fortran Runs: 165.95721955299376 +/- 3.9609319826282143
Mean/Std of Differences in Errors: 1.8474111129762605e-14 +/- 1.1435103132435445e-14

请注意,这是我的第一个 Fortran 程序,因此特别感谢低挂的果实。

1个回答

Kyle MandliEndulum 致敬,他们在评论中都为这个答案做出了贡献。

首先,我采纳了 Endulum 的建议,去掉了多余reshape的 s。在此更改之后,Fortran 版本在小规模示例上击败了 Python,但在规模上,Python 版本仍然更快。然后我实施了 Kyle Mandli 的建议,并将所有matmuls 替换为对 LAPACK 的调用dgemm令人惊讶的是,这种变化将平均运行时间从约 166 秒缩短到不到 4 秒。50000×500例子。

我学到的是

LAPACK 和 gfortran 的隐式线性代数函数之间存在一个数量级的差异。如果您要将任何线性代数繁重的 Python 代码外包给 Fortran 以提高速度,请使用 LAPACK FUNCTIONS。它使一切变得不同

这是为好奇的人准备的运行输出和最终的 Fortran 版本。

In [3]: test_script.run_both_tests(50000, 500, runs=10)
Mean/Std of Python Runs: 125.60022213459015 +/- 4.542739296415188
Mean/Std of Fortran Runs: 3.9391146421432497 +/- 0.057918332079517035
Mean/Std of Differences in Errors: 1.8474111129762605e-14 +/- 1.5305558938321453e-14

和 Fortran 文件。请注意,停止条件中还有一个附加NORM2条件可以替换为 LAPACK 的DLANGE.

! Compile with f2py -c -m fortran_v fortran_version.f90
    SUBROUTINE logistic(Z, N, W)
!
!   Compute the logistic function of an array Z of length N
!
    implicit none
    INTEGER N
    REAL*8 Z(N)
    REAL*8 W(N)
!f2py intent(in) z
!f2py integer intent(hide),depend(z) :: n=shape(z,0)
!f2py intent(out) w
    W = EXP(Z)/(1D0+EXP(Z))
    END


    SUBROUTINE f(X, y, beta, N, M, L)
!
!   Compute the logistic regression likelihood w/ design matrix X,
!   response vector y and coefficient vector beta
!
    implicit none
    INTEGER N,M
    REAL*8 S(N), y(N), X(N,M), beta(M), L, Xbeta(N)
!f2py intent(in) X,y,beta
!f2py integer intent(hide),depend(X) :: n=shape(X,0), m = shape(X,1)
!f2py intent(out) L
    !CALL logistic (MATMUL(X, beta), N, S)
    CALL DGEMM('N', 'N', N, 1, M, 1D0, X, N, beta, M, 0D0, Xbeta, N)
    CALL logistic(Xbeta, N, S)
    L = SUM((-y*LOG(S) - (1-y)*LOG(1D0-S))/FLOAT(N))
    END


    SUBROUTINE gradf(X, y, beta, N, M, G)
!
!   Compute the gradient of the logistic regression likelihood f
!
    implicit none
    INTEGER N,M
    REAL*8 S(N), y(N), X(N,M), beta(M), G(M), Xbeta(N)
!f2py intent(in) X,y,beta
!f2py integer intent(hide),depend(X) :: n=shape(X,0), m = shape(X,1)
!f2py intent(out) G
    !CALL logistic (MATMUL(X, beta), N, S)
    CALL DGEMM('N', 'N', N, 1, M, 1D0, X, N, beta, M, 0D0, Xbeta, N)
    CALL logistic(Xbeta, N, S)
    !may be able to use a ptr instead
    G = MATMUL((S-y)/float(N), X) 
    END

    SUBROUTINE hessf(X, beta, N, M, H)
!
!   Compute the Hessian of the logistic regression likelihood f
!
    implicit none
    INTEGER N,M
    REAL*8 S(N), X(N,M), beta(M), H(M,M), Xbeta(N)
!f2py intent(in) X,beta
!f2py integer intent(hide),depend(X) :: n=shape(X,0), m = shape(X,1)
!f2py intent(out) H
    !CALL logistic (MATMUL(X, beta), N, S) 
    CALL DGEMM('N', 'N', N, 1, M, 1D0, X, N, beta, M, 0D0, Xbeta, N)
    CALL logistic(Xbeta, N, S)
    CALL DGEMM('T', 'N', M, M, N, 1D0, X, N, SPREAD(S*(1D0-S)/float(N), 2, M)*X, N, 0D0, H, M)
    END

    SUBROUTINE Newtons_logreg(X, y, beta_i, max_iter, tol, N, M) 
!
!   Compute the maximum likelihood estimate of logistic regression w/ design
!   matrix X and response variable y
!
    implicit none
    INTEGER N,M,it,max_iter,O
    REAL*8 y(N), X(N,M), tol
    REAL*8 beta_i(M)
    REAL*8 H(M,M), G(M)
!f2py intent(in) X,y,beta_i,max_iter,tol
!f2py integer intent(hide),depend(X) :: n=shape(X,0), m = shape(X,1)
!f2py intent(out) beta_i
    !external DPOSV !double precision symmetric positive definite linear solve
    !external DSYSV !double precision symmetric linear solve
    !external DGELSV
    it=0
    DO WHILE ( it .LT. max_iter)
        call hessf(X, beta_i, N, M, H)
        call gradf(X, y, beta_i, N, M, G)
        call DPOSV('U', M, 1, H, M, G, M, O)
        beta_i = beta_i - G
        IF (NORM2(G) .LT. tol) RETURN
        !IF (O .NE. 0) WRITE('ERROR IN MATRIX SOLVE')
        it = it + 1
    ENDDO
    END    
        
! END OF FILE FORTRAN_VERSION.F