计算科学 - 如何使用 ScaLapack/PBLAS 进行矩阵向量乘法？ - 吾爱随笔录

如何使用 ScaLapack/PBLAS 进行矩阵向量乘法？

计算科学拉帕克布拉斯

2021-12-11 22:05:31

在浏览了 ScaLapack 的所有可能的“介绍”之后，我仍然无法理解如何PDGEMV使用它进行简单的操作。

这是我必须做的：

我必须使用生成矩阵

do i=1,n
  x(i) = i*i*1.0D+00
  do j=1,n
    A(i,j) = (i+j)*j*1.00D+00
  end do
end do

然后简单地相乘Matrix A by Vector x (b=Ax)。

对于 Lapack，我只是使用 CALL DGEMV('N',n,n,1.0D+00,a,n,x,1,0.0D+00,b,1)

我如何在 PBLAS 中实现这一目标？我已经运行了示例（example1.f来自网站）程序并且它可以工作。

什么是矩阵的划分？我该怎么做？

每个人（在教程中）似乎都在专注于DGESV我想要的时候，DGEMV/DGEMM即BLAS

2个回答

scalapack 需要的数据分布是块循环分布。（这里有一个小的交互式计算器）。一旦你掌握了窍门，Scalapack 就非常简单了。

对于给定的块大小（这是一个自由参数）并将处理器分解为网格（例如，4 个处理器 -> 2 行，2 列处理器），您可以从矩阵块的本地索引转到全局索引使用此 NERSC 示例中的 l2g 或 g2l 的矩阵索引；这是一种非常简单的入门方式。行或列向量被视为 nx1 或 1xn 矩阵。

下面是对角矩阵 A(1,2,3...) 乘以单位向量 X 并将结果放入 Y 的示例；这里唯一真正的技巧是并非所有处理器都有一大块 X 或 Y 向量。

program gemv1
      use mpi
      implicit none

      integer :: n, nb    ! problem size and block size
      integer :: myArows, myAcols   ! size of local subset of global matrix
      integer :: myXrows, myXcols   ! size of local subset of global vector
      integer :: i,j, myi, myj
      real, dimension(:,:), allocatable :: myA,myX,myY
      integer :: ierr

      integer, external :: numroc   ! blacs routine
      integer :: me, procs, icontxt, prow, pcol, myrow, mycol  ! blacs data
      integer :: info    ! scalapack return value
      integer, dimension(9)   :: ides_a, ides_x, ides_y ! matrix descriptors
      integer, dimension(2) :: dims
      real :: error, globerror

! Initialize blacs processor grid

      call blacs_pinfo   (me,procs)

! create as square as possible a grid of processors

      dims = 0
      call MPI_Dims_create(procs, 2, dims, ierr)
      prow = dims(1)
      pcol = dims(2)

! create the BLACS context

      call blacs_get     (0, 0, icontxt)
      call blacs_gridinit(icontxt, 'R', prow, pcol)
      call blacs_gridinfo(icontxt, prow, pcol, myrow, mycol)

! Construct local arrays
! Global structure:  matrix A of n rows and n columns

      n = int(25000.*sqrt(dble(procs)))

! blocksize - a free parameter.

      nb = 100

! how big is "my" chunk of matrix A?

      myArows = numroc(n, nb, myrow, 0, prow)
      myAcols = numroc(n, nb, mycol, 0, pcol)

! how big is "my" chunk of vector x?

      myXrows = numroc(n, nb, myrow, 0, prow)
      myXcols = 1

! Initialize local arrays    

      allocate(myA(myArows,myAcols)) 
      allocate(myX(myXrows,myXcols)) 
      allocate(myY(myXrows,myXcols)) 

      myA = 0.
      do myj=1,myAcols
          ! get global index from local index
          call l2g(myj,mycol,n,pcol,nb,j)
          do myi=1,myArows
              ! get global index from local index
              call l2g(myi,myrow,n,prow,nb,i)
              if (i == j) myA(myi,myj) = i
          enddo
      enddo

      myX = 0.
      call l2g(1,mycol,n,pcol,nb,j)
      if (j == 1) then
          do myi=1,myXrows
              call l2g(myi,myrow,n,prow,nb,i)
              myX(myi,1) = 1.
          enddo
      endif

      myY = 0.

! Prepare array descriptors for ScaLAPACK 

      call descinit( ides_a, n, n, nb, nb, 0, 0, icontxt, myArows, info )
      call descinit( ides_x, n, 1, nb, nb, 0, 0, icontxt, myXrows, info )
      call descinit( ides_y, n, 1, nb, nb, 0, 0, icontxt, myXrows, info )

! Call ScaLAPACK library routine

      call psgemv('N',n,n,1.,mya,1,1,ides_a,myx,1,1,ides_x,1,0.,myy,1,1,ides_y,1)
      if (me == 0) then
        if (info /= 0) then
             print *, 'Error -- info = ', info
        endif
      endif

! Deallocate the local arrays

      deallocate(myA, myX)

! Print results - Y should be 1,2,3...

      error = 0.
      call l2g(1,mycol,n,pcol,nb,j)
      if (j == 1) then
          do myi=1,myXrows
              call l2g(myi,myrow,n,prow,nb,i)
              error = error + (myY(myi,1)-1.*i)**2.
          enddo
      endif

      call MPI_Reduce(error, globerror, 1, MPI_REAL, MPI_SUM, 0, MPI_COMM_WORLD, ierr)

      if (me == 0) then
        print *,'Y l2 error = ', sqrt(globerror/n)
      endif

      deallocate(myY)

! End blacs for processors that are used

      call blacs_gridexit(icontxt)
      call blacs_exit(0)

contains

! convert global index to local index in block-cyclic distribution

   subroutine g2l(i,n,np,nb,p,il)

   implicit none
   integer, intent(in) :: i    ! global array index, input
   integer, intent(in) :: n    ! global array dimension, input
   integer, intent(in) :: np   ! processor array dimension, input
   integer, intent(in) :: nb   ! block size, input
   integer, intent(out):: p    ! processor array index, output
   integer, intent(out):: il   ! local array index, output
   integer :: im1   

   im1 = i-1
   p   = mod((im1/nb),np)
   il  = (im1/(np*nb))*nb + mod(im1,nb) + 1

   return
   end subroutine g2l

! convert local index to global index in block-cyclic distribution

   subroutine l2g(il,p,n,np,nb,i)

   implicit none
   integer :: il   ! local array index, input
   integer :: p    ! processor array index, input
   integer :: n    ! global array dimension, input
   integer :: np   ! processor array dimension, input
   integer :: nb   ! block size, input
   integer :: i    ! global array index, output
   integer :: ilm1   

   ilm1 = il-1
   i    = (((ilm1/nb) * np) + p)*nb + mod(ilm1,nb) + 1

   return
   end subroutine l2g

end program gemv1

使用 pblas GEMV。

http://www.netlib.org/scalapack/pblas_qref.html#PvGEMV

其它你可能感兴趣的问题

上一篇特征值分析与傅立叶分析的稳定性及其等价性下一篇曲面网格库