C : Matrix Multiplication stuck if matrix size 4096x4096

356 views Asked by At

Here is the code, matrix multiplication in C, the matrix's fill is integer. Optimization for loop nest and loop unrolling. Run well till N is 7168. For N 8196, insufficient memory.If you run it, just screen, wait till get time result. This is an alarm from my computer (Asus A456UQ) or code is still wrong?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include<time.h>
#include <sys/time.h>
#include<assert.h>

#define mat(m, col, row) \
m.data[row*m.cols + col]

#define N 8192
typedef struct
{
    int cols, rows;
    int *data;
}matrix_t;

void matrix_init(matrix_t *matrix, int cols, int rows)
{
    int i,j;
    matrix->cols = cols;
    matrix->rows = rows;
    matrix->data = malloc(sizeof(int)*cols*rows);

    if (!matrix->data)
    {
        fprintf(stderr, "Insufficient memory for a %dx%d matrix", cols, rows);
        exit(EXIT_FAILURE);
    }

    // 0 fill
    memset(matrix->data, 0, cols*rows);

}

void matrix_print(matrix_t *matrix)
{
    int i, n;

    n = matrix->cols * matrix->rows;

    printf("--M%dx%d--\n", matrix->rows, matrix->cols);
    for (i=0; i<n; i++)
    {
        printf("%d ", matrix->data[i]);

        if ((i+1)%matrix->cols == 0) printf("\n");
    }
}

void free_array(matrix_t *matrix) {
    free(matrix->data);
    matrix->data = NULL;
    matrix->cols = matrix->rows = 0;
}

void mul_matrix(matrix_t A, matrix_t B, matrix_t C){
    int ii, kk,ib,kb,j,i,k;
    int acc00, acc01, acc10,acc11;

    int size = A.cols;
    ib=16;
    kb=16;

    for (ii = 0; ii < size; ii += ib)
    {
        for (kk = 0; kk < size; kk += kb)
        {
            for (j=0; j < size; j += 2)
            {
                for(i = ii; i < ii + ib; i += 2 )
                {
                    if (kk == 0)
                        acc00 = acc01 = acc10 = acc11 = 0;
                    else
                    {
                        acc00 = mat(C,i,j);
                        acc01 = mat(C,i,(j+1));
                        acc10 = mat(C,(i+1),j);
                        acc11 = mat(C,(i+1),(j+1));
                    }
                }
                for (k = kk; k < kk + kb; k++)
                {
                    acc00 += mat(A,i,k) * mat(B,k,j);
                    acc01 += mat(A,i,k) * mat(B,k,j+1);
                    acc10 += mat(A,(i+1),k) * mat(B,k,j);
                    acc11 += mat(A,(i+1),k) * mat(B,k,(j+1));
                }

                mat(C,i,j) = acc00;
                mat(C,i,(j+1)) = acc01;
                mat(C,(i+1),j) = acc10;
                mat(C,(i+1),(j+1)) = acc11;

            }
        }
    }
}
/*
void mul_matrix(matrix_t A, matrix_t B, matrix_t C){
    int i,j,k;
    int t;
    int size = A.cols;


    for (int j=0; j<size; j++) {
       // iterates the j-th row of c
       for (int i=0; i<size; i++) {
           mat(C,i,j)=0;
       }

       // iterates the j-th row of b
       for (int k=0; k<size; k++) {
          t = mat(B,k,j);
          // iterates the j-th row of c
          // iterates the k-th row of a
          for (int i=0; i<size; i++) {
            mat(C,i,j) += mat(A,i,k) * t;
          }
       }
    }

 */

/*

    for (i = 0; i < size; i++){
        for (j = 0; j < size; j++) {
            mat(C,i,j) = 0;
            for (k = 0; k < size; k++)
                mat(C,i,j) += mat(A,i,k) * mat(B, k,j);
        }
    }
}
*/
void sum_matrix (matrix_t A, matrix_t B, matrix_t C){
    int i,j;
    int size = A.cols;

    for(i=0; i< size; i++) {
        for (j=0; j<size; j++)
            mat(C,i,j) = mat(A,i,j) + mat(B, i,j);
    }

}

void sub_matrix (matrix_t A, matrix_t B, matrix_t C){
    int i,j;
    int size = A.cols;

    for(i=0; i< size; i++) {
        for (j=0; j<size; j++)
            mat(C,i,j) = mat(A,i,j) - mat(B, i,j);
    }

}

int main(int argc, char **argv)
{
    matrix_t x;
    matrix_t y;
    matrix_t z;
    matrix_t m1,m2,m3,m4,m5,m6,m7;
    matrix_t a11,a12,a21,a22;
    matrix_t b11,b12,b21,b22;
    matrix_t aresult, bresult;
    matrix_t c11,c12,c21,c22;



    int i,j,k =0;

    int half = N/2;

    matrix_init(&x, N, N);
    matrix_init(&y, N, N);
    matrix_init(&z, N, N);
    //for(i = 0; i<N; i++)
      //  for(j = 0; j<N; j++)
         //   mat(z,i,j) =0;
    //matrix_print(&z);

    matrix_init(&a11, half, half);
    matrix_init(&a12, half, half);
    matrix_init(&a21, half, half);
    matrix_init(&a22, half, half);
    matrix_init(&b11, half, half);
    matrix_init(&b12, half, half);
    matrix_init(&b21, half, half);
    matrix_init(&b22, half, half);

    matrix_init(&aresult,half,half);
    matrix_init(&bresult,half,half);

    matrix_init(&m1, half, half);
    matrix_init(&m2, half, half);
    matrix_init(&m3, half, half);
    matrix_init(&m4, half, half);
    matrix_init(&m5, half, half);
    matrix_init(&m6, half, half);
    matrix_init(&m7, half, half);

    matrix_init(&c11, half, half);
    matrix_init(&c12, half, half);
    matrix_init(&c21, half, half);
    matrix_init(&c22, half, half);


    struct timeval  tv1, tv2;

    //srand((unsigned)time(NULL));


    for(i = 0; i<N; i++)
        for(j = 0; j<N; j++)
            mat(x,i,j) =1;// rand() % 10;

    for(i = 0; i<N; i++)
        for(j = 0; j<N; j++)
            mat(y,i,j) =1;// rand() % 10;




    gettimeofday(&tv1, NULL);


    //dividing the matrices in 4 sub-matrices:
    for(i=0; i<half; i++){
        for (j=0; j<half;j++){
            mat(a11,i,j)= mat(x,i,j);
            mat(a12,i,j)= mat(x,i,j+half);
            mat(a21,i,j)= mat(x,i+half,j);
            mat(a22,i,j)= mat(x,i+half,j+half);

            mat(b11,i,j)= mat(y,i,j);
            mat(b12,i,j)= mat(y,i,j+half);
            mat(b21,i,j)= mat(y,i+half, j);
            mat(b22,i,j)= mat(y, i+half, j+half);
        }
    }
    //matrix_print(&a11);
    //matrix_print(&a12);
    //matrix_print(&a21);
    //matrix_print(&a22);

    // Calculating m1 to m7:
    sum_matrix(a11,a22,aresult);
    sum_matrix(b11,b22,bresult);
    mul_matrix(aresult,bresult,m1);

    sum_matrix(a21,a22,aresult);
    mul_matrix(aresult,b11,m2);

    sub_matrix(b12,b22,aresult);
    mul_matrix(a11,aresult,m3);

    sub_matrix(b21,b11, aresult);
    mul_matrix(a22,aresult,m4);

    sum_matrix(a11,a12,aresult);
    mul_matrix(aresult,b22,m5);

    sub_matrix(a21,a11,aresult);
    sum_matrix(b11,b12,bresult);
    mul_matrix(aresult,bresult,m6);

    sub_matrix(a12,a22,aresult);
    sum_matrix(b21,b22,bresult);
    mul_matrix(aresult,bresult,m7);

    sum_matrix(m1,m4,aresult);
    sub_matrix(aresult,m5,bresult);
    sum_matrix(bresult,m7,c11);

    sum_matrix(m3,m5,c12);
    sum_matrix(m2,m4,c21);
    sub_matrix(m1,m2, aresult);
    sum_matrix(aresult,m3,bresult);
    sum_matrix(bresult,m6,c22);

    //matrix_print(&c11);
    //matrix_print(&c12);
    //matrix_print(&c21);
    //matrix_print(&c22);

    //combine matrix

    for(i=0; i < half; i++){
        for (j=0; j<half;j++){
            mat(z,i,j)= mat(c11,i,j);
            mat(z,(i+half),j)= mat(c12,i,j);
            mat(z,i,(j+half))= mat(c21,i,j);
            mat(z,(i+half),(j+half))= mat(c22,i,j);
       }
    }









    gettimeofday(&tv2, NULL);

    //matrix_print(&x);
    //matrix_print(&y);
    //matrix_print(&z);
    printf ("\n \nTotal time = %f seconds\n",(double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + (double) (tv2.tv_sec - tv1.tv_sec));

    free_array(&x);
    free_array(&y);
    free_array(&z);

    free_array(&m1);
    free_array(&m2);
    free_array(&m3);
    free_array(&m4);
    free_array(&m5);
    free_array(&m6);
    free_array(&m7);

    free_array(&a11);
    free_array(&a12);
    free_array(&a21);
    free_array(&a22);
    free_array(&b11);
    free_array(&b12);
    free_array(&b21);
    free_array(&b22);

    free_array(&aresult);
    free_array(&bresult);

    free_array(&c11);
    free_array(&c12);
    free_array(&c21);
    free_array(&c22);
    return 0;
}
0

There are 0 answers