mercredi 24 juin 2020

How can I make my c++ code run faster with eigen library?

I have written a parallelized c++ code, which functions as follows :

  1. There are 75 'w' points, and each of them is sent to one processor.
  2. For each 'w' point, I am defining a matrix. Then I am diagonalizing it. I am using the eigenvectors to compute a particular quantity by summing over the fourth power of each of the matrix elements. And then I average this quantity over 300 iterations of the matrix.

So I am using Eigen package for this calculation, and I compile the code with mpiCC -I eigen -Ofast filename.cpp. For a 512 x 512 matrix, the whole procedure takes 2.5 hours. Currently I need to do the same for a 2748 x 2748 matrix, and it's still going on after approx. 12:30 hrs. Is there anyway I can make the code run faster?

The code is given here for reference :

#include <iostream>
#include <complex>
#include <cmath>
#include<math.h>
#include<stdlib.h>
#include<time.h>
#include<Eigen/Dense>
#include<fstream>
#include<random>
#include "mpi.h"

#define pi 3.14159

using namespace std;
using namespace Eigen;


#define no_of_processor 75 // no of processors used for computing 
#define no_of_disorder_av 300 //300 iterations for each w
#define A_ratio    1      //aspect ratio Ly/Lx
#define Lx        8
#define w_init    0.1    // initial value of potential strength 
#define del_w     0.036    // increment of w in each loop
#define w_loop     75      // no of different w
#define alpha    (sqrt(5.0)-1.0)/(double)2.0

double onsite_pot(int x,int y, int z, double phi, double alpha_0){

 double B11=alpha;
 double B12=alpha;
 double B13=alpha;
 double b1= (double)B11*x+(double)B12*y+(double)B13*z;
 double c11= 1.0-cos(2*M_PI*b1+phi); //printf("%f\n",c1);
 double c12= 1.0+(alpha_0*cos(2*M_PI*b1+phi));
 double c1=c11/c12;

 return c1;
 }

int main(int argc, char *argv[])
{
clock_t begin = clock();

/*golden ratio----------------------------*/

char filename[200];
double t=1.0;
int i,j,k,l,m;//for loops
double alpha_0=0;

int Ly=A_ratio*Lx;
int Lz= A_ratio*Lx;
int A=Lx*Ly;
int V=A*Lz; //size of the matrix   

int numtasks,rank,RC;
RC=MPI_Init(&argc,&argv);
if (RC != 0) {
 printf ("Error starting MPI program. Terminating.\n");
 MPI_Abort(MPI_COMM_WORLD, RC);
 }
MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);

sprintf(filename,"IPR3D%dalpha%g.dat",rank+1,alpha_0);
ofstream myfile;
myfile.open(filename, ios::app); //preparing file to write in

int n = w_loop/no_of_processor;
double w=w_init+(double)(n*rank*del_w);
int var_w_loop = 0;
MatrixXcd H(V,V); // matrix getting defined here
MatrixXcd evec(V,V); // matrix for eigenvector
VectorXcd temp(V); // vector for a temporary space used later in calculation
double IPR[V], E_levels[V]; // for average value of the quantity and eigen values.

do{
for(i=0;i<V;i++)
 {
   IPR[i]=E_levels[i]=0.0;
 }

/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
/*----loop for disorder average---------------------------*/
for (l=0;l<no_of_disorder_av;l++){

 for (i=V; i<V; i++)
 { 
  for (j=V; j<V; j++)
      H(i,j)=0;
 }
double  phi=(2*M_PI*(double)l)/(double)no_of_disorder_av;
//matrix assignment starts
int z=0; 
for (int plane=0; plane<Lz; plane++)
 {
  z += 1 ;
  int y=0;
  int indx1= plane*A  ;  //initial index of each plane
  int indx2= indx1+A-1;  // last index of each plane
  for (int linchain=0; linchain<Ly; linchain++){
      y += 1;
      int x=0;
      int indx3= indx1 + linchain*Lx ;  //initial index of each chain
      int indx4= indx3 + Lx-1  ;  //last index of each chain
      for (int latpoint=0; latpoint<Lx; latpoint++){
          x += 1;
          int indx5= indx3 +latpoint;  //index of each lattice point
          H(indx5,indx5)= 2*w*onsite_pot(x,y,z,phi,alpha_0);  //onsite potential
          if (indx5<indx4){    //hopping inside a chain
             H(indx5,(indx5+1))= t; //printf("%d %d\n",indx5,indx5+1);
             H((indx5+1),indx5)= t;
             }
          if (indx5<=(indx2-Lx)){  //hopping between different chain
             H(indx5,(indx5+Lx))= t; //printf("%d %d\n",indx5,indx5+Lx);
             H((indx5+Lx),indx5)= t;
             printf("%d\n",indx5);
             }
          if (indx5<(V-A)){
             H(indx5,(indx5+A))= t;   //printf("%d %d\n",indx5,indx5+A);// hopping between different plane
             H((indx5+A),indx5)= t;
             }                 
         } //latpoint loop
     }//linchain loop
  }//plane loop


  //PB..............................................
  for (int plane=0; plane<Lz; plane++){
  int indx1= plane*A;  //initial index of each plane
  int indx2 = indx1+A-1 ;//last indx of each plane
  //periodic boundary condition x
    for (int linchain=0; linchain<Ly; linchain++){
    int indx3 = indx1 + linchain*Lx;   // initital index of each chain
    int indx4=indx3+ Lx-1;          //last index of each chain
    H(indx3,indx4)= t; //printf("%d %d\n",indx3,indx4);
    H(indx4,indx3)= t;
     }//linchain loop
 //periodic boundary condition y
    for (int i=0; i<Lx; i++){
    int indx5 = indx1+i;
    int indx6 = indx5+(Ly-1)*Lx;  //printf("%d %d\n",indx5,indx6);
    H(indx5,indx6)=t;
    H(indx6,indx5)=t;
   }
   }//plane loop

 //periodic boundary condition in z
 for (int i=0; i<A; i++){
 int indx1=i ;
 int indx2=(Lz-1)*A+i ;
 H(indx1,indx2)= t; //printf("%d %d\n",indx1,indx2);
 H(indx2,indx1)= t ; 
 }
//matrix assignment ends
/**-------------------------------------------------------*/
double Tr = abs(H.trace());
for(i=0;i<V;i++)
  {
    for(j=0;j<V;j++)
    {
      if(i==j)
      {
      H(i,j) = H(i,j)-(Tr/(double)V);
      }
    }
  }

  SelfAdjointEigenSolver<MatrixXcd> es(H); //defining the diagonalizing function
   double *E = NULL; 
   E = new double[V]; // for the eigenvalues
   for(i=0;i<V;i++)
    {
     E[i]=es.eigenvalues()[i];
     //cout<<"original eigenvalues "<<E[i]<<"\n";
    }
   evec=es.eigenvectors();
   double bandwidth = E[V-1] - E[0];
   for(i=0;i<V;i++)
   E[i]=E[i]/bandwidth;

   for(i=0;i<V;i++)
    {
     E_levels[i] = E_levels[i]+E[i]; //summing over energies for each iteration
    }

   delete[] E;
   E=NULL;
   //main calculation process
   for(i=0;i<V;i++)
   {
     temp = evec.col(i);
     double num=0.0,denom=0.0; 
     for(j=0;j<V;j++)
      {
       num=num+pow(abs(temp(j)),4);
       denom=denom+pow(abs(temp(j)),2);
      }
     IPR[i] = IPR[i]+(num/(denom*denom)); 
   } //calculation ends
   }//no_of_disorder_av loop (l)
   for(i=0; i<V; i++)
  {
    myfile<<w<<"\t"<<(E_levels[i]/(double)no_of_disorder_av)<<"\t" 
    <<(IPR[i]/(double)no_of_disorder_av)<<"\n"; //taking output in file
  }
  var_w_loop++; // counts number of w loop
  w+= del_w; // proceeds to next w

  }while(var_w_loop<n) ;    // w varying do while loop

  MPI_Finalize();

  clock_t end = clock();
  double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
  printf("time spent %f s\n\n",time_spent);

  return 0;
 }

Aucun commentaire:

Enregistrer un commentaire