迭代求解器的内存问题

计算科学 迭代法 C 泊松 内存管理
2021-11-28 22:18:24

试图使用共轭梯度法实现泊松 2d 求解器,因此从 10x10 网格开始,矩阵变为 100x100(因为我们有 100 个节点来查找值),100x100 网格变为 10000x10000 矩阵,但矩阵稀疏,每个矩阵有 5 个元素行(不是五边形!)。

因此,接收分段错误,可能是由于内存分配错误,但仍然是一个相当大的矩阵来存储和使用,因为零仍然被认为是双精度数,这可能会减慢求解器的速度。

考虑实现稀疏矩阵的数据结构以克服内存问题。关于求解器中的内存管理还有其他想法吗?

观看这些讲座 并阅读 Sandip Mazumder 的视频讲师“偏微分方程的数值方法:有限差分和有限体积方法”的书籍。

下面是将所有节点的系数矩阵填充到密集矩阵(不是稀疏矩阵)的草稿 C 代码。

#include <stdio.h>
#include <math.h>
#include <stdlib.h>

 

int main(){
    double L = 1;
    int N = 4, M = 3;
    int kmax = M*N;
    double sum = 0;
    int fi = 1;
    double dx = L/(N-1);
    double dy = L/(M-1);
    double dx2 = dx*dx; 
    double dy2 = dy*dy;
    double *S; 
    S = (double *) malloc(N*M * sizeof(double));
    double *Q; 
    Q = (double *) malloc(N*M * sizeof(double));

    int** Acoord = (int**)malloc(kmax * sizeof(int*));
    for (int i = 0; i < kmax; i++)
        Acoord[i] = (int*)malloc(5 * sizeof(int));
    double** Aval = (double**)malloc(kmax * sizeof(double*));
    for (int i = 0; i < kmax; i++)
        Aval[i] = (double*)malloc(5 * sizeof(double));

    // fill S(ource) rhs f function
    for (int i = 0; i< N; i++){
        for (int j = 0; j < M ; j++){
            int k = j*N+i;
            S[k] = -exp(-pow(i*dx-L/2,2)-pow(j*dy-L/2,2));
            sum = sum + S[k];
        }
    }
    sum = sum/N/M;
    for (int i = 0; i < N; i++){
        for (int j = 0; j < M; j++){
            int k = j*N+i;
            S[k] = S[k] - sum;
        }
    }

    for (int i = 1; i < N-1; i++){
        for (int j = 1; j < M-1; j++){
            int k = j*N+i;

            Aval[k][2] = -(2/dx2+2/dy2);
            Aval[k][1] = 1/dx2;
            Aval[k][3] = 1/dx2;
            Aval[k][0] = 1/dy2;
            Aval[k][4] = 1/dy2;

            Acoord[k][0] = k - N;
            Acoord[k][1] = k - 1;
            Acoord[k][2] = k;
            Acoord[k][3] = k + 1;
            Acoord[k][4] = k + N;
            Q[k] = S[k];
        }
    }
    


    
    // fill boundary part
    int i = N-1; // RIGHT
    double JR = 0; // FLUX
    for (int j = 1; j < M-1; j++){ 
        int k = j*N+i;

        Aval[k][0] = 1/dy2;
        Aval[k][1] = 1/dx2;
        Aval[k][2] = -1/dx2-2/dy2;
        Aval[k][3] = 0;
        Aval[k][4] = 1/dy2;
        

        Acoord[k][0] = k - N;
        Acoord[k][1] = k - 1;
        Acoord[k][2] = k;
        Acoord[k][3] = -1;
        Acoord[k][4] = k + N;


        Q[k] = S[k];
    }

    i = 0; // LEFT
    double JL = 0; // FLUX
    for (int j = 1; j < M-1; j++){
        int k = j*N+i;

        Aval[k][0] = 1/dy2;
        Aval[k][1] = 0;
        Aval[k][2] = -1/dx2-2/dy2;
        Aval[k][3] = 1/dx2;
        Aval[k][4] = 1/dy2;

        Acoord[k][0] = k - N;
        Acoord[k][1] = - 1;
        Acoord[k][2] = k;
        Acoord[k][3] = k + 1;
        Acoord[k][4] = k + N;
        
        Q[k] = S[k];

    }

    int j = 0; // BOT
    double JB = 0; // FLUX
    for (int i = 1; i < N-1; i++){
        int k = j*N+i;

        Aval[k][0] = 0;
        Aval[k][1] = 1/dx2;
        Aval[k][2] = -2/dx2-1/dy2;
        Aval[k][3] = 1/dx2;
        Aval[k][4] = 1/dy2;

        Acoord[k][0] = -1;
        Acoord[k][1] = k - 1;
        Acoord[k][2] = k;
        Acoord[k][3] = k + 1;
        Acoord[k][4] = k + N;

        Q[k] = S[k];
        // printf("k = %d kmax = %d\n",k,kmax);
    }

    j = M-1; // TOP
    double JT = 0; // FLUX
    for (int i = 1; i < N-1; i++){
        int k = j*N+i;

        Aval[k][0] = 1/dy2;
        Aval[k][1] = 1/dx2;
        Aval[k][2] = -2/dx2-1/dy2;
        Aval[k][3] = 1/dx2;
        Aval[k][4] = 0;

        Acoord[k][0] = k - N;
        Acoord[k][1] = k - 1;
        Acoord[k][2] = k;
        Acoord[k][3] = k + 1;
        Acoord[k][4] = -1;


        Q[k] = S[k];
    }

    i = 0; // LEFT
    j = 0; // BOT 
    int k = j*N+i;

    Aval[k][0] = 0;
    Aval[k][1] = 0;
    Aval[k][2] = -1/dx2 - 1/dy2;
    Aval[k][3] = 1/dx2;
    Aval[k][4] = 1/dy2;

    Acoord[k][0] = -1;
    Acoord[k][1] = -1;
    Acoord[k][2] = k;
    Acoord[k][3] = k + 1;
    Acoord[k][4] = k + N;

    Q[k] = S[k]; 

    i = 0; // LEFT
    j = M-1; // TOP 
    k = j*N+i;

    Aval[k][0] = 1/dy2;
    Aval[k][1] = 0;
    Aval[k][2] = -1/dx2 - 1/dy2;
    Aval[k][3] = 1/dx2;
    Aval[k][4] = 0;

    Acoord[k][0] = k - N;
    Acoord[k][1] = -1;
    Acoord[k][2] = k;
    Acoord[k][3] = k + 1;
    Acoord[k][4] = -1;

    Q[k] = S[k]; 

    i = N-1; // RIGHT
    j = M-1; // TOP 
    k = j*N+i;

    Aval[k][0] = 1/dy2;
    Aval[k][1] = 1/dx2;
    Aval[k][2] = -1/dx2 - 1/dy2;
    Aval[k][3] = 0;
    Aval[k][4] = 0;

    Acoord[k][0] = k - N;
    Acoord[k][1] = k - 1;
    Acoord[k][2] = k;
    Acoord[k][3] = -1;
    Acoord[k][4] = -1;

    Q[k] = S[k]; 

    i = N-1; // RIGHT
    j = 0; // BOT 
    k = (j)*M+i;

    Aval[k][0] = 0;
    Aval[k][1] = 1/dx2;
    Aval[k][2] = -1/dx2 - 1/dy2;
    Aval[k][3] = 0;
    Aval[k][4] = 1/dy2;

    Acoord[k][0] = -1;
    Acoord[k][1] = k - 1;
    Acoord[k][2] = k;
    Acoord[k][3] = -1;
    Acoord[k][4] = k + N;

    Q[k] = S[k];     

    // CJ solver



    // CJ solver ends

    FILE *f1 = fopen("testCJ_coord.txt", "w+");
    if (f1 == NULL)
    {
        printf("Error opening file!\n");
        exit(1);
    }

    for (int i = 0; i < kmax; i++){
        fprintf(f1,"%d\t",i);
        for (int j = 0; j < 5; j++){
                fprintf(f1,"%d\t",Acoord[i][j]);
        }
        fprintf(f1,"\n");
    }
    fclose(f1);

    FILE *f2 = fopen("testCJ_val.txt", "w+");
    if (f2 == NULL)
    {
        printf("Error opening file!\n");
        exit(1);
    }

    for (int i = 0; i < kmax; i++){
        fprintf(f2,"%d\t",i);
        for (int j = 0; j < 5; j++){
                fprintf(f2,"%.4f\t",Aval[i][j]);

        }
        fprintf(f2,"\n");
    }
    fclose(f2);

    FILE *f3 = fopen("testCJ_Q.txt", "w+");
    if (f3 == NULL)
    {
        printf("Error opening file!\n");
        exit(1);
    }

    for (int i = 0; i < kmax; i++){
        fprintf(f3,"%d\t",i);
        fprintf(f3,"%12.5f",Q[i]);
        fprintf(f3,"\n");
    }
    fclose(f3);
    

    free(S);
    free(Q);
    for (int i = 0; i < kmax; i++)
        free(Aval[i]);
    free(Aval);

    for (int i = 0; i < kmax; i++)
        free(Acoord[i]);
    free(Acoord);
    
    return 0;
}
1个回答

正如您所指出的,您的矩阵是稀疏的,这意味着非零的数量与零的数量相比很小。有几种格式可以存储这样的矩阵,例如 COO、CSC、CSR 等。这些格式只存储非零条目,并假设每隔一个条目为零。此外,还有求解器和例程(稀疏矩阵向量积)利用这些格式来节省计算时间和内存。我建议对提到的格式进行研究,找出最适合您的情况的格式,然后围绕它构建其他所有内容。