HPC/並列プログラミングポータルでは、HPC(High Performance Computing)プログラミングや並列プログラミングに関する情報を集積・発信しています。

新着トピックス

リスト2 『インテル謹製の数値演算ライブラリ「MKL」を使ってプログラムを高速化』記事内で使用したBLASを使った行列演算プログラム
/* mkl1.c */

#include stdio.h
#include stdlib.h
#include memory.h
#include time.h
#include mkl_cblas.h


#define ITEST_PARAM_N 100
#define ITEST_PARAM_REPEAT 100
#define AT(matrix,i,j) (matrix[i*dim+j])

static unsigned long next_rand = 1;
void print_matrix( double* matrix, int dim );
double my_rand(void);
void prod_matrix_with_blas( double* matrixA, double* matrixB, double* matrixC, int dim );

void cp_matrix( double* matrixA, double* matrixB,int dim );
void mult_matrix( double* matrixA, double* matrixB, double x, int dim );
void mult_matrix_with_blas( double* matrixA, double x, int dim );

int main( int argc, const char **argv) {
  int i, j;
  int rep;
  int dim = ITEST_PARAM_N;
  double* matrixA;
  double* matrixB;
  double* matrixC;
  clock_t clock_start, clock_end;

  clock_start = clock();

  /* allocate matrix */
  matrixA = (double*)malloc( sizeof(double) * dim * dim );
  matrixB = (double*)malloc( sizeof(double) * dim * dim );
  matrixC = (double*)malloc( sizeof(double) * dim * dim );

  if( (matrixA == NULL) ||
      (matrixB == NULL) ||
      (matrixC == NULL) ) {
    printf( "cannot allocate memory. exit.\n");
    free(matrixA);
    free(matrixB);
    free(matrixC);
    return -1;
  }

  /* initialize matrix */
  for( i = 0; i  dim; i++ ) {
    for( j = 0; j  dim; j++ ) {
      AT(matrixA,i,j) = my_rand();
      AT(matrixB,i,j) = my_rand();
    }
  }


  mult_matrix_with_blas( matrixB, 2.0/(double)dim, dim );

#ifdef TEST_SHOW_RESULT
  print_matrix(matrixB, dim);
#endif

  /* calculate C = A*(B/a)^REPEAT */
  for( rep = 0; rep  ITEST_PARAM_REPEAT; rep++ ) {
    prod_matrix_with_blas( matrixC, matrixA, matrixB, dim );
  }

#ifdef TEST_SHOW_RESULT
  print_matrix(matrixC, dim);
#endif

  free(matrixA);
  free(matrixB);
  free(matrixC);

  clock_end = clock();

  printf( "start: %d end: %d elapse: %d.\n", clock_start, clock_end, clock_end - clock_start );
  printf( "elapse time: %lf sec.", ((double)(clock_end - clock_start)) / (double)CLOCKS_PER_SEC );

  return 0;
}

/* A = B; copy matrix B to matrix A */
void cp_matrix( double* matrixA, double* matrixB,int dim ) {
  memcpy( matrixA, matrixB, sizeof(double)*dim*dim );
}

/* A = B * C; matrixA = matrixB * matrixC */
/*
void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_TRANSPOSE TransB,
                 const int M, const int N, const int K, const double alpha,
                 const double *A, const int lda,
                 const double *B, const int ldb,
                 const double beta, double *C, const int ldc);
A:(M,K)
B:(K,N)
C:(M,N)
C = ALPHA*A*B + BETA*C
LDA,LDB,LDC=DIM
 */

void prod_matrix_with_blas( double* matrixA, double* matrixB, double* matrixC, int dim ) {
  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
              dim, dim, dim, 1.0,
              matrixB, dim,
              matrixC, dim, 0.0, matrixA, dim);
}

/* matrixA = matrixB * x */
void mult_matrix( double* matrixA, double* matrixB, double x, int dim ) {
  int i, j;

  for( i = 0; i  dim; i++ ) {
    for( j = 0; j  dim; j++ ) {
      AT(matrixA,i,j) = AT(matrixB,i,j) * x;
    }
  }
}

/* matrixA = matrixA * x */
/*
  SUBROUTINE DSCAL(N,DA,DX,INCX)
  DX = DA * DX
  DA:scalar
  DX:(N)
  INCX:1
 */
void mult_matrix_with_blas( double* matrixA, double x, int dim ) {
  cblas_dscal( dim*dim, x, matrixA, 1 );
}

void print_matrix( double* matrix, int dim ) {
  int i,j;

  for( i = 0; i  dim; i++ ) {
    for( j = 0; j  dim; j++ ) {
      // output
      printf( "%lf ", AT(matrix,i,j) );
    }
    printf( "\n" );
  }

}

double my_rand(void) {
    next_rand = next_rand * 1103515245 + 12345;
    return (((double)((unsigned)(next_rand/65536) % 32768))/32768);
}