• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

Revisión50f6dfa1ce8b7359ed19d60177ebf851710a5b4c (tree)
Tiempo2013-10-14 17:17:50
AutorMikiya Fujii <mikiya.fujii@gmai...>
CommiterMikiya Fujii

Log Message

trunk.r1540 is merged to branches/fx10. #32094

git-svn-id: https://svn.sourceforge.jp/svnroot/molds/branches/fx10@1541 1136aad2-a195-0410-b898-f5ea1d11b9d8

Cambiar Resumen

Diferencia incremental

--- a/src/cndo/Cndo2.cpp
+++ b/src/cndo/Cndo2.cpp
@@ -514,6 +514,7 @@ void Cndo2::DoSCF(bool requiresGuess){
514514 double*** diisStoredDensityMatrix = NULL;
515515 double*** diisStoredErrorVect = NULL;
516516 double** diisErrorProducts = NULL;
517+ double** tmpDiisErrorProducts = NULL;
517518 double* diisErrorCoefficients = NULL;
518519
519520 try{
@@ -521,6 +522,7 @@ void Cndo2::DoSCF(bool requiresGuess){
521522 &diisStoredDensityMatrix,
522523 &diisStoredErrorVect,
523524 &diisErrorProducts,
525+ &tmpDiisErrorProducts,
524526 &diisErrorCoefficients);
525527 // calculate electron integral
526528 this->CalcGammaAB(this->gammaAB, *this->molecule);
@@ -590,6 +592,7 @@ void Cndo2::DoSCF(bool requiresGuess){
590592 diisStoredDensityMatrix,
591593 diisStoredErrorVect,
592594 diisErrorProducts,
595+ tmpDiisErrorProducts,
593596 diisErrorCoefficients,
594597 diisError,
595598 hasAppliedDIIS,
@@ -612,6 +615,7 @@ void Cndo2::DoSCF(bool requiresGuess){
612615 &diisStoredDensityMatrix,
613616 &diisStoredErrorVect,
614617 &diisErrorProducts,
618+ &tmpDiisErrorProducts,
615619 &diisErrorCoefficients);
616620
617621 throw ex;
@@ -620,6 +624,7 @@ void Cndo2::DoSCF(bool requiresGuess){
620624 &diisStoredDensityMatrix,
621625 &diisStoredErrorVect,
622626 &diisErrorProducts,
627+ &tmpDiisErrorProducts,
623628 &diisErrorCoefficients);
624629
625630 double ompEndTime = omp_get_wtime();
@@ -750,6 +755,7 @@ void Cndo2::FreeSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation,
750755 double**** diisStoredDensityMatrix,
751756 double**** diisStoredErrorVect,
752757 double*** diisErrorProducts,
758+ double*** tmpDiisErrorProducts,
753759 double** diisErrorCoefficients) const{
754760
755761 int diisNumErrorVect = Parameters::GetInstance()->GetDiisNumErrorVectSCF();
@@ -767,6 +773,9 @@ void Cndo2::FreeSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation,
767773 MallocerFreer::GetInstance()->Free<double>(diisErrorProducts,
768774 diisNumErrorVect+1,
769775 diisNumErrorVect+1);
776+ MallocerFreer::GetInstance()->Free<double>(tmpDiisErrorProducts,
777+ diisNumErrorVect+1,
778+ diisNumErrorVect+1);
770779 MallocerFreer::GetInstance()->Free<double>(diisErrorCoefficients,
771780 diisNumErrorVect+1);
772781 }
@@ -775,6 +784,7 @@ void Cndo2::MallocSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation,
775784 double**** diisStoredDensityMatrix,
776785 double**** diisStoredErrorVect,
777786 double*** diisErrorProducts,
787+ double*** tmpDiisErrorProducts,
778788 double** diisErrorCoefficients){
779789
780790 int diisNumErrorVect = Parameters::GetInstance()->GetDiisNumErrorVectSCF();
@@ -791,6 +801,7 @@ void Cndo2::MallocSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation,
791801 this->molecule->GetTotalNumberAOs(),
792802 this->molecule->GetTotalNumberAOs());
793803 MallocerFreer::GetInstance()->Malloc<double>(diisErrorProducts, diisNumErrorVect+1, diisNumErrorVect+1);
804+ MallocerFreer::GetInstance()->Malloc<double>(tmpDiisErrorProducts, diisNumErrorVect+1, diisNumErrorVect+1);
794805 MallocerFreer::GetInstance()->Malloc<double>(diisErrorCoefficients, diisNumErrorVect+1);
795806 }
796807 }
@@ -805,6 +816,7 @@ void Cndo2::DoDIIS(double** orbitalElectronPopulation,
805816 double*** diisStoredDensityMatrix,
806817 double*** diisStoredErrorVect,
807818 double** diisErrorProducts,
819+ double** tmpDiisErrorProducts,
808820 double* diisErrorCoefficients,
809821 double& diisError,
810822 bool& hasAppliedDIIS,
@@ -873,7 +885,13 @@ void Cndo2::DoDIIS(double** orbitalElectronPopulation,
873885 if(diisNumErrorVect <= step && diisEndError<diisError && diisError<diisStartError){
874886 hasAppliedDIIS = true;
875887 try{
876- MolDS_wrappers::Lapack::GetInstance()->Dsysv(diisErrorProducts,
888+#pragma omp parallel for schedule(auto)
889+ for(int i=0; i<diisNumErrorVect+1; i++){
890+ for(int j=0; j<diisNumErrorVect+1; j++){
891+ tmpDiisErrorProducts[i][j] = diisErrorProducts[i][j];
892+ }
893+ }
894+ MolDS_wrappers::Lapack::GetInstance()->Dsysv(tmpDiisErrorProducts,
877895 diisErrorCoefficients,
878896 diisNumErrorVect+1);
879897 }catch(MolDSException ex){
@@ -1295,6 +1313,7 @@ double Cndo2::GetMolecularIntegralElement(int moI, int moJ, int moK, int moL,
12951313 void Cndo2::UpdateOldOrbitalElectronPopulation(double** oldOrbitalElectronPopulation,
12961314 double const* const* orbitalElectronPopulation,
12971315 int numberAOs) const{
1316+#pragma omp parallel for schedule(auto)
12981317 for(int i=0; i<numberAOs; i++){
12991318 for(int j=0; j<numberAOs; j++){
13001319 oldOrbitalElectronPopulation[i][j] = orbitalElectronPopulation[i][j];
@@ -1371,11 +1390,9 @@ void Cndo2::CalcFockMatrix(double** fockMatrix,
13711390 int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
13721391 int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
13731392 int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1374- int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalNumberAOs);
13751393 MolDS_mpi::AsyncCommunicator asyncCommunicator;
13761394 boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
1377- &asyncCommunicator,
1378- mPassingTimes) );
1395+ &asyncCommunicator) );
13791396
13801397 MallocerFreer::GetInstance()->Initialize<double>(fockMatrix, totalNumberAOs, totalNumberAOs);
13811398 for(int A=totalNumberAtoms-1; 0<=A; A--){
@@ -1437,26 +1454,25 @@ void Cndo2::CalcFockMatrix(double** fockMatrix,
14371454 } // end of if(mpiRank == calcRank)
14381455
14391456 // set data to gather in mpiHeadRank with asynchronous MPI
1440- int tag = mu;
1441- int source = calcRank;
1442- int dest = mpiHeadRank;
1457+ int tag = mu;
1458+ int source = calcRank;
1459+ int dest = mpiHeadRank;
1460+ double* buff = &fockMatrix[mu][mu];
1461+ MolDS_mpi::molds_mpi_int num = totalNumberAOs-mu;
14431462 if(mpiRank == mpiHeadRank && mpiRank != calcRank){
1444- asyncCommunicator.SetRecvedVector(&fockMatrix[mu][mu],
1445- totalNumberAOs-mu,
1446- source,
1447- tag);
1463+ asyncCommunicator.SetRecvedMessage(buff, num, source, tag);
14481464 }
14491465 if(mpiRank != mpiHeadRank && mpiRank == calcRank){
1450- asyncCommunicator.SetSentVector(&fockMatrix[mu][mu],
1451- totalNumberAOs-mu,
1452- dest,
1453- tag);
1466+ asyncCommunicator.SetSentMessage(buff, num, dest, tag);
14541467 }
14551468 } // end of loop mu parallelized with MPI
14561469 } // end of loop A
14571470 // Delete the communication thread.
1471+ asyncCommunicator.Finalize();
14581472 communicationThread.join();
1459- MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&fockMatrix[0][0], totalNumberAOs*totalNumberAOs, mpiHeadRank);
1473+ double* buff = &fockMatrix[0][0];
1474+ MolDS_mpi::molds_mpi_int num = totalNumberAOs*totalNumberAOs;
1475+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(buff, num, mpiHeadRank);
14601476
14611477 /*
14621478 this->OutputLog("fock matrix\n");
@@ -1570,12 +1586,10 @@ void Cndo2::CalcAtomicElectronPopulation(double* atomicElectronPopulation,
15701586 const Molecule& molecule) const{
15711587 int totalNumberAtoms = molecule.GetNumberAtoms();
15721588 MallocerFreer::GetInstance()->Initialize<double>(atomicElectronPopulation, totalNumberAtoms);
1573-
1574- int firstAOIndex = 0;
1575- int numberAOs = 0;
1589+#pragma omp parallel for schedule(auto)
15761590 for(int A=0; A<totalNumberAtoms; A++){
1577- firstAOIndex = molecule.GetAtom(A)->GetFirstAOIndex();
1578- numberAOs = molecule.GetAtom(A)->GetValenceSize();
1591+ int firstAOIndex = molecule.GetAtom(A)->GetFirstAOIndex();
1592+ int numberAOs = molecule.GetAtom(A)->GetValenceSize();
15791593 for(int i=firstAOIndex; i<firstAOIndex+numberAOs; i++){
15801594 atomicElectronPopulation[A] += orbitalElectronPopulation[i][i];
15811595 }
@@ -1591,11 +1605,9 @@ void Cndo2::CalcGammaAB(double** gammaAB, const Molecule& molecule) const{
15911605 int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
15921606 int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
15931607 int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1594- int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber);
15951608 MolDS_mpi::AsyncCommunicator asyncCommunicator;
15961609 boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
1597- &asyncCommunicator,
1598- mPassingTimes) );
1610+ &asyncCommunicator) );
15991611
16001612 // This loop (A) is parallelized by MPI
16011613 for(int A=0; A<totalAtomNumber; A++){
@@ -1669,24 +1681,23 @@ void Cndo2::CalcGammaAB(double** gammaAB, const Molecule& molecule) const{
16691681 } // end of if(mpiRank==calcRank)
16701682
16711683 // set data to gater in mpiHeadRank with asynchronous MPI
1672- int tag = A;
1673- int source = calcRank;
1674- int dest = mpiHeadRank;
1684+ int tag = A;
1685+ int source = calcRank;
1686+ int dest = mpiHeadRank;
1687+ double* buff = &gammaAB[A][A];
1688+ MolDS_mpi::molds_mpi_int num = totalAtomNumber-A;
16751689 if(mpiRank == mpiHeadRank && mpiRank != calcRank){
1676- asyncCommunicator.SetRecvedVector(&gammaAB[A][A],
1677- totalAtomNumber-A,
1678- source,
1679- tag);
1690+ asyncCommunicator.SetRecvedMessage(buff, num, source, tag);
16801691 }
16811692 if(mpiRank != mpiHeadRank && mpiRank == calcRank){
1682- asyncCommunicator.SetSentVector(&gammaAB[A][A],
1683- totalAtomNumber-A,
1684- dest,
1685- tag);
1693+ asyncCommunicator.SetSentMessage(buff, num, dest, tag);
16861694 }
16871695 } // end of loop A prallelized by MPI
1696+ asyncCommunicator.Finalize();
16881697 communicationThread.join();
1689- MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&gammaAB[0][0], totalAtomNumber*totalAtomNumber, mpiHeadRank);
1698+ double* buff = &gammaAB[0][0];
1699+ MolDS_mpi::molds_mpi_int num = totalAtomNumber*totalAtomNumber;
1700+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(buff, num, mpiHeadRank);
16901701
16911702 #pragma omp parallel for schedule(auto)
16921703 for(int A=0; A<totalAtomNumber; A++){
@@ -1792,12 +1803,9 @@ void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix,
17921803 int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
17931804 int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
17941805 int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1795- int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber);
1796- mPassingTimes *= CartesianType_end;
17971806 MolDS_mpi::AsyncCommunicator asyncCommunicator;
17981807 boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
1799- &asyncCommunicator,
1800- mPassingTimes) );
1808+ &asyncCommunicator) );
18011809
18021810 // This loop (A and mu) is parallelized by MPI
18031811 for(int A=0; A<totalAtomNumber; A++){
@@ -1836,43 +1844,32 @@ void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix,
18361844 } // end lof if(mpiRank == calcRank)
18371845
18381846 // set data to gater in mpiHeadRank with asynchronous MPI
1839- int tagX = A* CartesianType_end + XAxis;
1840- int tagY = A* CartesianType_end + YAxis;
1841- int tagZ = A* CartesianType_end + ZAxis;
1842- int source = calcRank;
1843- int dest = mpiHeadRank;
1847+ int tagX = A* CartesianType_end + XAxis;
1848+ int tagY = A* CartesianType_end + YAxis;
1849+ int tagZ = A* CartesianType_end + ZAxis;
1850+ int source = calcRank;
1851+ int dest = mpiHeadRank;
1852+ double* buffX = &cartesianMatrix[XAxis][firstAOIndexA][0];
1853+ double* buffY = &cartesianMatrix[YAxis][firstAOIndexA][0];
1854+ double* buffZ = &cartesianMatrix[ZAxis][firstAOIndexA][0];
1855+ MolDS_mpi::molds_mpi_int num = numValenceAOsA*totalAONumber;
18441856 if(mpiRank == mpiHeadRank && mpiRank != calcRank){
1845- asyncCommunicator.SetRecvedVector(&cartesianMatrix[XAxis][firstAOIndexA][0],
1846- numValenceAOsA*totalAONumber,
1847- source,
1848- tagX);
1849- asyncCommunicator.SetRecvedVector(&cartesianMatrix[YAxis][firstAOIndexA][0],
1850- numValenceAOsA*totalAONumber,
1851- source,
1852- tagY);
1853- asyncCommunicator.SetRecvedVector(&cartesianMatrix[ZAxis][firstAOIndexA][0],
1854- numValenceAOsA*totalAONumber,
1855- source,
1856- tagZ);
1857+ asyncCommunicator.SetRecvedMessage(buffX, num, source, tagX);
1858+ asyncCommunicator.SetRecvedMessage(buffY, num, source, tagY);
1859+ asyncCommunicator.SetRecvedMessage(buffZ, num, source, tagZ);
18571860 }
18581861 if(mpiRank != mpiHeadRank && mpiRank == calcRank){
1859- asyncCommunicator.SetSentVector(&cartesianMatrix[XAxis][firstAOIndexA][0],
1860- numValenceAOsA*totalAONumber,
1861- dest,
1862- tagX);
1863- asyncCommunicator.SetSentVector(&cartesianMatrix[YAxis][firstAOIndexA][0],
1864- numValenceAOsA*totalAONumber,
1865- dest,
1866- tagY);
1867- asyncCommunicator.SetSentVector(&cartesianMatrix[ZAxis][firstAOIndexA][0],
1868- numValenceAOsA*totalAONumber,
1869- dest,
1870- tagZ);
1862+ asyncCommunicator.SetSentMessage(buffX, num, dest, tagX);
1863+ asyncCommunicator.SetSentMessage(buffY, num, dest, tagY);
1864+ asyncCommunicator.SetSentMessage(buffZ, num, dest, tagZ);
18711865 }
18721866 } // end of loop for int A with MPI
18731867 // Delete the communication thread.
1868+ asyncCommunicator.Finalize();
18741869 communicationThread.join();
1875- MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&cartesianMatrix[0][0][0], CartesianType_end*totalAONumber*totalAONumber, mpiHeadRank);
1870+ double* buff = &cartesianMatrix[0][0][0];
1871+ MolDS_mpi::molds_mpi_int num = CartesianType_end*totalAONumber*totalAONumber;
1872+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(buff, num, mpiHeadRank);
18761873
18771874 /*
18781875 // communication to collect all matrix data on head-rank
@@ -3908,11 +3905,9 @@ void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{
39083905 int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
39093906 int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
39103907 int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
3911- int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber);
39123908 MolDS_mpi::AsyncCommunicator asyncCommunicator;
39133909 boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
3914- &asyncCommunicator,
3915- mPassingTimes) );
3910+ &asyncCommunicator) );
39163911
39173912 MallocerFreer::GetInstance()->Initialize<double>(overlapAOs,
39183913 totalAONumber,
@@ -3976,24 +3971,23 @@ void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{
39763971 } // end of if(mpiRank == calcRnak)
39773972
39783973 // set data to gather in mpiHeadRank with asynchronous MPI
3979- int tag = A;
3980- int source = calcRank;
3981- int dest = mpiHeadRank;
3974+ int tag = A;
3975+ int source = calcRank;
3976+ int dest = mpiHeadRank;
3977+ double* buff = overlapAOs[firstAOIndexA];
3978+ MolDS_mpi::molds_mpi_int num = totalAONumber*numValenceAOs;
39823979 if(mpiRank == mpiHeadRank && mpiRank != calcRank){
3983- asyncCommunicator.SetRecvedVector(overlapAOs[firstAOIndexA],
3984- totalAONumber*numValenceAOs,
3985- source,
3986- tag);
3980+ asyncCommunicator.SetRecvedMessage(buff, num, source, tag);
39873981 }
39883982 if(mpiRank != mpiHeadRank && mpiRank == calcRank){
3989- asyncCommunicator.SetSentVector(overlapAOs[firstAOIndexA],
3990- totalAONumber*numValenceAOs,
3991- dest,
3992- tag);
3983+ asyncCommunicator.SetSentMessage(buff, num, dest, tag);
39933984 }
39943985 } // end of loop A parallelized with MPI
3986+ asyncCommunicator.Finalize();
39953987 communicationThread.join();
3996- MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&overlapAOs[0][0], totalAONumber*totalAONumber, mpiHeadRank);
3988+ double* buff = &overlapAOs[0][0];
3989+ MolDS_mpi::molds_mpi_int num = totalAONumber*totalAONumber;
3990+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(buff, num, mpiHeadRank);
39973991
39983992 #pragma omp parallel for schedule(auto)
39993993 for(int mu=0; mu<totalAONumber; mu++){
--- a/src/cndo/Cndo2.h
+++ b/src/cndo/Cndo2.h
@@ -485,6 +485,7 @@ private:
485485 double*** diisStoredDensityMatrix,
486486 double*** diisStoredErrorVect,
487487 double** diisErrorProducts,
488+ double** tmpDiisErrorProducts,
488489 double* diisErrorCoefficients,
489490 double& diisError,
490491 bool& hasAppliedDIIS,
@@ -510,11 +511,13 @@ private:
510511 double**** diisStoredDensityMatrix,
511512 double**** diisStoredErrorVect,
512513 double*** diisErrorProducts,
514+ double*** tmpDiisErrorProducts,
513515 double** diisErrorCoefficients) const;
514516 void MallocSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation,
515517 double**** diisStoredDensityMatrix,
516518 double**** diisStoredErrorVect,
517519 double*** diisErrorProducts,
520+ double*** tmpDiisErrorProducts,
518521 double** diisErrorCoefficients);
519522 };
520523
--- a/src/mndo/Mndo.cpp
+++ b/src/mndo/Mndo.cpp
@@ -3443,11 +3443,9 @@ void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore,
34433443 int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
34443444 int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
34453445 int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
3446- int mPassingTimes = totalNumberAtoms-1;
34473446 MolDS_mpi::AsyncCommunicator asyncCommunicator;
34483447 boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
3449- &asyncCommunicator,
3450- mPassingTimes) );
3448+ &asyncCommunicator) );
34513449 #ifdef MOLDS_DBG
34523450 if(twoElecTwoCore == NULL){
34533451 throw MolDSException(this->errorMessageCalcTwoElecTwoCoreNullMatrix);
@@ -3515,9 +3513,10 @@ void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore,
35153513 OrbitalType twoElecLimit = dxy;
35163514 int numBuff = (twoElecLimit+1)*twoElecLimit/2;
35173515 int num = (totalNumberAtoms-b)*numBuff*numBuff;
3518- asyncCommunicator.SetBroadcastedVector(&this->twoElecTwoCoreMpiBuff[a][b][0][0], num, calcRank);
3516+ asyncCommunicator.SetBroadcastedMessage(&this->twoElecTwoCoreMpiBuff[a][b][0][0], num, calcRank);
35193517 }
35203518 } // end of loop a parallelized with MPI
3519+ asyncCommunicator.Finalize();
35213520 communicationThread.join();
35223521
35233522 #pragma omp parallel for schedule(auto)
--- a/src/mpi/AsyncCommunicator.cpp
+++ b/src/mpi/AsyncCommunicator.cpp
@@ -35,8 +35,15 @@
3535 #include"AsyncCommunicator.h"
3636 using namespace std;
3737 namespace MolDS_mpi{
38-AsyncCommunicator::AsyncCommunicator(){}
38+AsyncCommunicator::AsyncCommunicator(){
39+ this->hasAllMessagesSet=false;
40+}
3941 AsyncCommunicator::~AsyncCommunicator(){}
42+void AsyncCommunicator::Finalize(){
43+ boost::mutex::scoped_lock lk(this->stateGuard);
44+ this->hasAllMessagesSet = true;
45+ this->stateChange.notify_all();
46+}
4047 }
4148
4249
--- a/src/mpi/AsyncCommunicator.h
+++ b/src/mpi/AsyncCommunicator.h
@@ -28,29 +28,28 @@ class AsyncCommunicator{
2828 public:
2929 AsyncCommunicator();
3030 ~AsyncCommunicator();
31- template<typename T> void Run(int passingTimes){
31+ template<typename T> void Run(){
3232 int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
33- while(0<passingTimes){
34- sleep(0.1);
33+ while(true){
3534 boost::mutex::scoped_lock lk(this->stateGuard);
3635 try{
37- DataInfo dInfo = this->dataQueue.FrontPop();
38- if(dInfo.mpiFuncType == MolDS_base::Send){
39- MolDS_mpi::MpiProcess::GetInstance()->Send(dInfo.dest,
40- dInfo.tag,
41- reinterpret_cast<T*>(dInfo.vectorPtr),
42- dInfo.num);
36+ MessageInfo mInfo = this->messageQueue.FrontPop();
37+ if(mInfo.mpiFuncType == MolDS_base::Send){
38+ MolDS_mpi::MpiProcess::GetInstance()->Send(mInfo.dest,
39+ mInfo.tag,
40+ reinterpret_cast<T*>(mInfo.vectorPtr),
41+ mInfo.num);
4342 }
44- else if(dInfo.mpiFuncType == MolDS_base::Recv){
45- MolDS_mpi::MpiProcess::GetInstance()->Recv(dInfo.source,
46- dInfo.tag,
47- reinterpret_cast<T*>(dInfo.vectorPtr),
48- dInfo.num);
43+ else if(mInfo.mpiFuncType == MolDS_base::Recv){
44+ MolDS_mpi::MpiProcess::GetInstance()->Recv(mInfo.source,
45+ mInfo.tag,
46+ reinterpret_cast<T*>(mInfo.vectorPtr),
47+ mInfo.num);
4948 }
50- else if(dInfo.mpiFuncType == MolDS_base::Broadcast){
51- MolDS_mpi::MpiProcess::GetInstance()->Broadcast(reinterpret_cast<T*>(dInfo.vectorPtr),
52- dInfo.num,
53- dInfo.source);
49+ else if(mInfo.mpiFuncType == MolDS_base::Broadcast){
50+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(reinterpret_cast<T*>(mInfo.vectorPtr),
51+ mInfo.num,
52+ mInfo.source);
5453 }
5554 else{
5655 std::stringstream ss;
@@ -59,10 +58,12 @@ public:
5958 throw ex;
6059 }
6160 this->stateChange.notify_all();
62- passingTimes--;
6361 }
6462 catch(MolDS_base::MolDSException ex){
65- if(ex.HasKey(MolDS_base::EmptyQueue)){
63+ if(ex.HasKey(MolDS_base::EmptyQueue && this->hasAllMessagesSet)){
64+ break;
65+ }
66+ else if(ex.HasKey(MolDS_base::EmptyQueue && !this->hasAllMessagesSet)){
6667 this->stateChange.wait(lk);
6768 continue;
6869 }
@@ -73,51 +74,54 @@ public:
7374 }
7475 }
7576
76- template<typename T> void SetSentVector(T* vector,
77- molds_mpi_int num,
78- int dest,
79- int tag){
77+ template<typename T> void SetSentMessage(T* vector,
78+ molds_mpi_int num,
79+ int dest,
80+ int tag){
8081 int source = NON_USED;
8182 MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Send;
82- this->SetVector(vector, num, source, dest, tag, mpiFuncType);
83+ this->SetMessage(vector, num, source, dest, tag, mpiFuncType);
8384 }
8485
85- template<typename T> void SetRecvedVector(T* vector,
86- molds_mpi_int num,
87- int source,
88- int tag){
86+ template<typename T> void SetRecvedMessage(T* vector,
87+ molds_mpi_int num,
88+ int source,
89+ int tag){
8990 int dest = NON_USED;
9091 MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Recv;
91- this->SetVector(vector, num, source, dest, tag, mpiFuncType);
92+ this->SetMessage(vector, num, source, dest, tag, mpiFuncType);
9293 }
9394
94- template<typename T> void SetBroadcastedVector(T* vector, molds_mpi_int num, int root){
95+ template<typename T> void SetBroadcastedMessage(T* vector, molds_mpi_int num, int root){
9596 int source = root;
9697 int dest = NON_USED;
9798 int tag = NON_USED;
9899 MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Broadcast;
99- this->SetVector(vector, num, source, dest, tag, mpiFuncType);
100+ this->SetMessage(vector, num, source, dest, tag, mpiFuncType);
100101 }
101102
103+ void Finalize();
104+
102105 private:
103- struct DataInfo{intptr_t vectorPtr;
104- molds_mpi_int num;
105- int source;
106- int dest;
107- int tag;
108- MolDS_base::MpiFunctionType mpiFuncType;};
106+ struct MessageInfo{intptr_t vectorPtr;
107+ molds_mpi_int num;
108+ int source;
109+ int dest;
110+ int tag;
111+ MolDS_base::MpiFunctionType mpiFuncType;};
109112 boost::mutex stateGuard;
110113 boost::condition stateChange;
111- MolDS_base_containers::ThreadSafeQueue<DataInfo> dataQueue;
112- template<typename T> void SetVector(T* vector,
113- molds_mpi_int num,
114- int source,
115- int dest,
116- int tag,
117- MolDS_base::MpiFunctionType mpiFuncType){
114+ bool hasAllMessagesSet;
115+ MolDS_base_containers::ThreadSafeQueue<MessageInfo> messageQueue;
116+ template<typename T> void SetMessage(T* vector,
117+ molds_mpi_int num,
118+ int source,
119+ int dest,
120+ int tag,
121+ MolDS_base::MpiFunctionType mpiFuncType){
118122 boost::mutex::scoped_lock lk(this->stateGuard);
119- DataInfo dInfo = {reinterpret_cast<intptr_t>(vector), num, source, dest, tag, mpiFuncType};
120- this->dataQueue.Push(dInfo);
123+ MessageInfo mInfo = {reinterpret_cast<intptr_t>(vector), num, source, dest, tag, mpiFuncType};
124+ this->messageQueue.Push(mInfo);
121125 this->stateChange.notify_all();
122126 }
123127 };
--- a/src/mpi/MpiProcess.cpp
+++ b/src/mpi/MpiProcess.cpp
@@ -97,22 +97,6 @@ MpiProcess* MpiProcess::GetInstance(){
9797
9898 void MpiProcess::Barrier(){this->communicator->barrier();}
9999
100-int MpiProcess::GetMessagePassingTimes(molds_mpi_int num)const{
101- int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
102- int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
103- int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
104- int calcTimes = num/mpiSize;
105- if(mpiRank < num%mpiSize){calcTimes+=1;}
106- int mpiPassingTimes;
107- if(mpiRank == mpiHeadRank){
108- mpiPassingTimes = num - calcTimes;
109- }
110- else{
111- mpiPassingTimes = calcTimes;
112- }
113- return mpiPassingTimes;
114-}
115-
116100 void MpiProcess::SetMessages(){
117101 this->errorMessageSplitMessageElemLimNegative
118102 = "Error in mpi::MpiProcess::SplitMessage2Chunks: elementsLimit is negative. \nelementsLimit=";
--- a/src/mpi/MpiProcess.h
+++ b/src/mpi/MpiProcess.h
@@ -114,7 +114,6 @@ public:
114114 MolDS_base::MallocerFreer::GetInstance()->Free<double>(&tmpValues, num);
115115 }
116116 void Barrier();
117- int GetMessagePassingTimes(molds_mpi_int num) const;
118117 private:
119118 static MpiProcess* mpiProcess;
120119 MpiProcess();
@@ -142,25 +141,25 @@ private:
142141 int tagBase = origianlTag*numChunks;
143142 if(elementsLimit < 0){
144143 std::stringstream ss;
145- ss << this->errorMessageSplitMessageElemLimNegative << elementsLimit << endl;
144+ ss << this->errorMessageSplitMessageElemLimNegative << elementsLimit << std::endl;
146145 MolDS_base::MolDSException ex(ss.str());
147146 throw ex;
148147 }
149148 if(numChunks < 0){
150149 std::stringstream ss;
151- ss << this->errorMessageSplitMessageNumChnkNegative << numChunks << endl;
150+ ss << this->errorMessageSplitMessageNumChnkNegative << numChunks << std::endl;
152151 MolDS_base::MolDSException ex(ss.str());
153152 throw ex;
154153 }
155154 if(remaining < 0){
156155 std::stringstream ss;
157- ss << this->errorMessageSplitMessageRemainingNegative << remaining << endl;
156+ ss << this->errorMessageSplitMessageRemainingNegative << remaining << std::endl;
158157 MolDS_base::MolDSException ex(ss.str());
159158 throw ex;
160159 }
161160 if(tagBase < 0){
162161 std::stringstream ss;
163- ss << this->errorMessageSplitMessageTagBaseNegative << tagBase << endl;
162+ ss << this->errorMessageSplitMessageTagBaseNegative << tagBase << std::endl;
164163 MolDS_base::MolDSException ex(ss.str());
165164 throw ex;
166165 }
--- a/src/wrappers/Lapack.cpp
+++ b/src/wrappers/Lapack.cpp
@@ -36,7 +36,7 @@
3636 #ifdef __INTEL_COMPILER
3737 #include"mkl.h"
3838 #elif defined __FCC_VERSION
39- #include"fj_lapack.h"
39+ #include"lapacke.h"
4040 #else
4141 #if ( __WORDSIZE == 32 )
4242 #else
@@ -100,18 +100,10 @@ void Lapack::DeleteInstance(){
100100 * ***/
101101 molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapack_int size, bool calcEigenVectors){
102102 molds_lapack_int info = 0;
103- molds_lapack_int k = 0;
104- molds_lapack_int lwork;
105- molds_lapack_int liwork;
106- char job;
107103 char uplo = 'U';
108104 molds_lapack_int lda = size;
109- double* convertedMatrix;
110- double* tempEigenValues;
111- double* work;
112- molds_lapack_int* iwork;
113-
114105 // set job type
106+ char job;
115107 if(calcEigenVectors){
116108 job = 'V';
117109 }
@@ -119,88 +111,49 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa
119111 job = 'N';
120112 }
121113
122- // calc. lwork and liwork
123- if(size < 1 ){
124- stringstream ss;
125- ss << errorMessageDsyevdSize;
126- MolDSException ex(ss.str());
127- ex.SetKeyValue<int>(LapackInfo, info);
128- throw ex;
129- }
130- else if(size == 1){
131- lwork = 1;
132- liwork = 1;
133- }
134- else if(1 < size && job == 'N'){
135- lwork = 2*size + 1;
136- liwork = 2;
137- }
138- else{
139- // calc. k
140- double temp = log((double)size)/log(2.0);
141- if( (double)((molds_lapack_int)temp) < temp ){
142- k = (molds_lapack_int)temp + 1;
143- }
144- else{
145- k = (molds_lapack_int)temp;
146- }
147- lwork = 3*size*size + (5+2*k)*size + 1;
148- liwork = 5*size + 3;
149- }
150-
151- // malloc
152- work = (double*)MOLDS_LAPACK_malloc( sizeof(double)*lwork, 16 );
153- iwork = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*liwork, 16 );
154- convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*size, 16 );
155- tempEigenValues = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size, 16 );
156-
157- for(molds_lapack_int i = 0; i < size; i++){
158- for(molds_lapack_int j = i; j < size; j++){
159- convertedMatrix[i+j*size] = matrix[i][j];
160- }
161- }
162-
163114 // call Lapack
164115 #ifdef __INTEL_COMPILER
165- dsyevd(&job, &uplo, &size, convertedMatrix, &lda, tempEigenValues, work, &lwork, iwork, &liwork, &info);
116+ info = LAPACKE_dsyevd(LAPACK_ROW_MAJOR, job, uplo, size, &matrix[0][0], lda, eigenValues);
166117 #elif defined __FCC_VERSION
167- molds_lapack_int jobLen=1;
168- molds_lapack_int uploLen=1;
169- dsyevd_(&job, &uplo, &size, convertedMatrix, &lda, tempEigenValues, work, &lwork, iwork, &liwork, &info, jobLen, uploLen);
118+ info = LAPACKE_dsyevd(LAPACK_ROW_MAJOR, job, uplo, size, &matrix[0][0], lda, eigenValues);
170119 #else
171- info = LAPACKE_dsyevd_work(LAPACK_COL_MAJOR, job, uplo, size, convertedMatrix, lda, tempEigenValues, work, lwork, iwork, liwork);
120+ info = LAPACKE_dsyevd(LAPACK_ROW_MAJOR, job, uplo, size, &matrix[0][0], lda, eigenValues);
172121 #endif
173122
174- for(molds_lapack_int i = 0; i < size; i++){
175- for(molds_lapack_int j = 0; j < size; j++){
176- matrix[i][j] = convertedMatrix[j+i*size]; //i-th row is i-th eigen vector
177- //matrix[j][i] = convertedMatrix[j+i*size]; //i-th column is i-th eigen vector
123+ // make i-th row i-the eigenvector
124+ double** tmpMatrix=NULL;
125+ try{
126+ MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrix, size, size);
127+ for(molds_lapack_int i = 0; i < size; i++){
128+ for(molds_lapack_int j = 0; j < size; j++){
129+ tmpMatrix[j][i] = matrix[i][j];
130+ }
178131 }
132+ for(molds_lapack_int i = 0; i < size; i++){
133+ for(molds_lapack_int j = 0; j < size; j++){
134+ matrix[i][j] = tmpMatrix[i][j];
135+ }
136+ }
137+ }
138+ catch(MolDSException ex){
139+ MallocerFreer::GetInstance()->Free<double>(&tmpMatrix, size, size);
140+ throw ex;
179141 }
142+ MallocerFreer::GetInstance()->Free<double>(&tmpMatrix, size, size);
180143
144+ // adjust phase of eigenvectors
181145 for(molds_lapack_int i=0;i<size;i++){
182- double temp = 0.0;
146+ double tmp = 0.0;
183147 for(molds_lapack_int j=0;j<size;j++){
184- temp += matrix[i][j];
148+ tmp += matrix[i][j];
185149 }
186- if(temp<0){
150+ if(tmp<0){
187151 for(molds_lapack_int j=0;j<size;j++){
188152 matrix[i][j]*=-1.0;
189153 }
190154 }
191155 }
192156
193- for(molds_lapack_int i = 0; i < size; i++){
194- eigenValues[i] = tempEigenValues[i];
195- }
196- //this->OutputLog(boost::format("size=%d lwork=%d liwork=%d k=%d info=%d\n") % size % lwork % liwork % k % info);
197-
198- // free
199- MOLDS_LAPACK_free(work);
200- MOLDS_LAPACK_free(iwork);
201- MOLDS_LAPACK_free(convertedMatrix);
202- MOLDS_LAPACK_free(tempEigenValues);
203-
204157 if(info != 0){
205158 stringstream ss;
206159 ss << errorMessageDsyevdInfo;
@@ -216,21 +169,15 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa
216169 *
217170 * "matrix*X=b" is solved, then we get X by this method.
218171 * The X will be stored in b.
172+ * The matrix will be overwriten by this method.
219173 *
220174 */
221-molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lapack_int size){
175+molds_lapack_int Lapack::Dsysv(double** matrix, double* b, molds_lapack_int size){
222176 molds_lapack_int info = 0;
223- molds_lapack_int lwork;
224- char uplo = 'U';
225-#ifdef __FCC_VERSION
226- molds_lapack_int uploLen=1;
227-#endif
228- molds_lapack_int lda = size;
229- molds_lapack_int ldb = size;
177+ char uplo = 'U';
230178 molds_lapack_int nrhs = 1;
231- double* convertedMatrix;
232- double* work;
233- double* tempB;
179+ molds_lapack_int lda = size;
180+ molds_lapack_int ldb = nrhs;
234181 molds_lapack_int* ipiv;
235182
236183 if(size < 1 ){
@@ -241,54 +188,17 @@ molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lap
241188
242189 // malloc
243190 ipiv = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*2*size, 16 );
244- convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*size, 16 );
245- tempB = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size, 16 );
246-
247- for(molds_lapack_int i = 0; i < size; i++){
248- for(molds_lapack_int j = i; j < size; j++){
249- convertedMatrix[i+j*size] = matrix[i][j];
250- }
251- }
252- for(molds_lapack_int i = 0; i < size; i++){
253- tempB[i] = b[i];
254- }
255191
256- // calc. lwork
257- double blockSize=0.0;
258-#pragma omp critical
259- {
260- lwork = -1;
261- double tempWork[3]={0.0, 0.0, 0.0};
262192 #ifdef __INTEL_COMPILER
263- dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, tempWork, &lwork, &info);
193+ info = LAPACKE_dsysv(LAPACK_ROW_MAJOR, uplo, size, nrhs, &matrix[0][0], lda, ipiv, b, ldb);
264194 #elif defined __FCC_VERSION
265- dsysv_(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, tempWork, &lwork, &info, uploLen);
195+ info = LAPACKE_dsysv(LAPACK_ROW_MAJOR, uplo, size, nrhs, &matrix[0][0], lda, ipiv, b, ldb);
266196 #else
267- info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, tempWork, lwork);
197+ info = LAPACKE_dsysv(LAPACK_ROW_MAJOR, uplo, size, nrhs, &matrix[0][0], lda, ipiv, b, ldb);
268198 #endif
269- blockSize = tempWork[0]/size;
270- }
271- info = 0;
272- lwork = blockSize*size;
273- work = (double*)MOLDS_LAPACK_malloc( sizeof(double)*lwork, 16 );
274-
275- // call Lapack
276-#ifdef __INTEL_COMPILER
277- dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, work, &lwork, &info);
278-#elif defined __FCC_VERSION
279- dsysv_(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, work, &lwork, &info, uploLen);
280-#else
281- info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, work, lwork);
282-#endif
283- for(molds_lapack_int i = 0; i < size; i++){
284- b[i] = tempB[i];
285- }
286199
287200 // free
288- MOLDS_LAPACK_free(convertedMatrix);
289201 MOLDS_LAPACK_free(ipiv);
290- MOLDS_LAPACK_free(work);
291- MOLDS_LAPACK_free(tempB);
292202
293203 if(info != 0){
294204 stringstream ss;
@@ -304,17 +214,17 @@ molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lap
304214 /***
305215 *
306216 * "matrix*X[i]=b[i] (i=0, 1, ... , nrhs-1) is solved, then we get X[i] by this method.
307- * The X[i] will be stored in b[i].
308- * b[i][j] is j-th element of i-th solution, b[i].
217+ * The X[i] will be stored in b[i], namely
218+ * the b[i][j] will be j-th element of i-th solution, b[i].
219+ * Besides, the matrix will be overwriten by this method.
309220 *
310221 */
311-molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const{
222+molds_lapack_int Lapack::Dgetrs(double** matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const{
312223 molds_lapack_int info = 0;
313224 char trans = 'N';
314225 molds_lapack_int lda = size;
315- molds_lapack_int ldb = size;
316- double* convertedMatrix;
317- double* convertedB;
226+ molds_lapack_int ldb = nrhs;
227+ double* tmpB;
318228 molds_lapack_int* ipiv;
319229
320230 if(size < 1 ){
@@ -323,48 +233,39 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l
323233 throw MolDSException(ss.str());
324234 }
325235
326-
327236 try{
328237 // malloc
329238 ipiv = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*2*size, 16 );
330- convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*size, 16 );
331- convertedB = (double*)MOLDS_LAPACK_malloc( sizeof(double)*nrhs*size, 16 );
332- for(molds_lapack_int i = 0; i < size; i++){
333- for(molds_lapack_int j = 0; j < size; j++){
334- convertedMatrix[i+j*size] = matrix[i][j];
335- }
336- }
239+ tmpB = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*nrhs, 16 );
240+ // matrix b should be transposed
337241 for(molds_lapack_int i = 0; i < nrhs; i++){
338242 for(molds_lapack_int j = 0; j < size; j++){
339- convertedB[j+i*size] = b[i][j];
243+ tmpB[j*nrhs+i] = b[i][j];
340244 }
341245 }
342- this->Dgetrf(convertedMatrix, ipiv, size, size);
246+ this->Dgetrf(&matrix[0][0], ipiv, size, size);
343247 #ifdef __INTEL_COMPILER
344- dgetrs(&trans, &size, &nrhs, convertedMatrix, &lda, ipiv, convertedB, &ldb, &info);
248+ info = LAPACKE_dgetrs(LAPACK_ROW_MAJOR, trans, size, nrhs, &matrix[0][0], lda, ipiv, tmpB, ldb);
345249 #elif defined __FCC_VERSION
346- molds_lapack_int transLen=1;
347- dgetrs_(&trans, &size, &nrhs, convertedMatrix, &lda, ipiv, convertedB, &ldb, &info, transLen);
250+ info = LAPACKE_dgetrs(LAPACK_ROW_MAJOR, trans, size, nrhs, &matrix[0][0], lda, ipiv, tmpB, ldb);
348251 #else
349- info = LAPACKE_dgetrs_work(LAPACK_COL_MAJOR, trans, size, nrhs, convertedMatrix, lda, ipiv, convertedB, ldb);
252+ info = LAPACKE_dgetrs(LAPACK_ROW_MAJOR, trans, size, nrhs, &matrix[0][0], lda, ipiv, tmpB, ldb);
350253 #endif
351254 for(molds_lapack_int i = 0; i < nrhs; i++){
352255 for(molds_lapack_int j = 0; j < size; j++){
353- b[i][j] = convertedB[j+i*size];
256+ b[i][j] = tmpB[j*nrhs+i];
354257 }
355258 }
356259 }
357260 catch(MolDSException ex){
358261 // free
359- MOLDS_LAPACK_free(convertedMatrix);
360- MOLDS_LAPACK_free(convertedB);
262+ MOLDS_LAPACK_free(tmpB);
361263 MOLDS_LAPACK_free(ipiv);
362264 throw ex;
363265 }
364266 // free
365- MOLDS_LAPACK_free(convertedMatrix);
366- MOLDS_LAPACK_free(convertedB);
367267 MOLDS_LAPACK_free(ipiv);
268+ MOLDS_LAPACK_free(tmpB);
368269
369270 if(info != 0){
370271 stringstream ss;
@@ -379,43 +280,31 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l
379280 // Argument "matrix" will be LU-decomposed.
380281 molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int sizeM, molds_lapack_int sizeN) const{
381282 molds_lapack_int* ipiv = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*2*sizeM, 16 );
382- this->Dgetrf(matrix, ipiv, sizeM, sizeN);
283+ this->Dgetrf(&matrix[0][0], ipiv, sizeM, sizeN);
383284 MOLDS_LAPACK_free(ipiv);
384285 molds_lapack_int info = 0;
385286 return info;
386287 }
387288
388-// Argument "matrix" is sizeM*sizeN matrix.
289+// Argument "matrix" is sizeM*sizeN matrix in Row-major (C/C++ style)
389290 // Argument "matrix" will be LU-decomposed.
390291 molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{
391- double* convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*sizeM*sizeN, 16 );
392- for(molds_lapack_int i=0; i<sizeM; i++){
393- for(molds_lapack_int j=0; j<sizeN; j++){
394- convertedMatrix[i+j*sizeM] = matrix[i][j];
395- }
396- }
397- this->Dgetrf(convertedMatrix, ipiv, sizeM, sizeN);
398- for(molds_lapack_int i=0; i<sizeM; i++){
399- for(molds_lapack_int j=0; j<sizeN; j++){
400- matrix[i][j] = convertedMatrix[i+j*sizeM];
401- }
402- }
403- MOLDS_LAPACK_free(convertedMatrix);
292+ this->Dgetrf(&matrix[0][0], ipiv, sizeM, sizeN);
404293 molds_lapack_int info = 0;
405294 return info;
406295 }
407296
408297 // Argument "matrix" is sizeM*sizeN matrix.
409-// The each element of "matrix" should be stored in 1-dimensional vecotre with column major (Fortran type).
298+// The each element of "matrix" should be stored in 1-dimensional vecotre with Row major (C/C++ style).
410299 molds_lapack_int Lapack::Dgetrf(double* matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{
411300 molds_lapack_int info = 0;
412301 molds_lapack_int lda = sizeM;
413302 #ifdef __INTEL_COMPILER
414- dgetrf(&sizeM, &sizeN, matrix, &lda, ipiv, &info);
303+ info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, sizeM, sizeN, matrix, lda, ipiv);
415304 #elif defined __FCC_VERSION
416- dgetrf_(&sizeM, &sizeN, matrix, &lda, ipiv, &info);
305+ info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, sizeM, sizeN, matrix, lda, ipiv);
417306 #else
418- info = LAPACKE_dgetrf_work(LAPACK_COL_MAJOR, sizeM, sizeN, matrix, lda, ipiv);
307+ info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, sizeM, sizeN, matrix, lda, ipiv);
419308 #endif
420309 if(info != 0){
421310 stringstream ss;
--- a/src/wrappers/Lapack.h
+++ b/src/wrappers/Lapack.h
@@ -31,8 +31,8 @@ public:
3131 static Lapack* GetInstance();
3232 static void DeleteInstance();
3333 molds_lapack_int Dsyevd(double** matrix, double* eigenValues, molds_lapack_int size, bool calcEigenVectors);
34- molds_lapack_int Dsysv(double const* const* matrix, double* b, molds_lapack_int size);
35- molds_lapack_int Dgetrs(double const* const* matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const;
34+ molds_lapack_int Dsysv(double** matrix, double* b, molds_lapack_int size);
35+ molds_lapack_int Dgetrs(double** matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const;
3636 molds_lapack_int Dgetrf(double** matrix, molds_lapack_int sizeM, molds_lapack_int sizeN) const;
3737 molds_lapack_int Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const;
3838 private:
--- a/src/zindo/ZindoS.cpp
+++ b/src/zindo/ZindoS.cpp
@@ -2353,11 +2353,9 @@ void ZindoS::CalcCISMatrix(double** matrixCIS) const{
23532353 int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
23542354 int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
23552355 int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
2356- int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(this->matrixCISdimension);
23572356 MolDS_mpi::AsyncCommunicator asyncCommunicator;
23582357 boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
2359- &asyncCommunicator,
2360- mPassingTimes) );
2358+ &asyncCommunicator) );
23612359
23622360 // this loop-a is MPI-parallelized
23632361 for(int k=this->matrixCISdimension-1; 0<=k; k--){
@@ -2418,12 +2416,13 @@ void ZindoS::CalcCISMatrix(double** matrixCIS) const{
24182416 int num = this->matrixCISdimension - k;
24192417 double* buff = &this->matrixCIS[k][k];
24202418 if(mpiRank == mpiHeadRank && mpiRank != calcRank){
2421- asyncCommunicator.SetRecvedVector(buff, num, source, tag);
2419+ asyncCommunicator.SetRecvedMessage(buff, num, source, tag);
24222420 }
24232421 if(mpiRank != mpiHeadRank && mpiRank == calcRank){
2424- asyncCommunicator.SetSentVector(buff, num, dest, tag);
2422+ asyncCommunicator.SetSentMessage(buff, num, dest, tag);
24252423 }
24262424 } // end of k-loop which is MPI-parallelized
2425+ asyncCommunicator.Finalize();
24272426 communicationThread.join();
24282427 // Broadcast data to all rank
24292428 for(int k=0; k<this->matrixCISdimension; k++){
@@ -3339,11 +3338,9 @@ void ZindoS::CalcGammaNRMinusKNRMatrix(double** gammaNRMinusKNR, const vector<Mo
33393338 int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
33403339 int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
33413340 int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
3342- int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(nonRedundantQIndecesSize);
33433341 MolDS_mpi::AsyncCommunicator asyncCommunicator;
33443342 boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
3345- &asyncCommunicator,
3346- mPassingTimes) );
3343+ &asyncCommunicator) );
33473344 // this loop-i is MPI-parallelized
33483345 for(int i=nonRedundantQIndecesSize-1; 0<=i; i--){
33493346 int calcRank = i%mpiSize;
@@ -3376,12 +3373,13 @@ void ZindoS::CalcGammaNRMinusKNRMatrix(double** gammaNRMinusKNR, const vector<Mo
33763373 int num = nonRedundantQIndecesSize - i;
33773374 double* buff = &gammaNRMinusKNR[i][i];
33783375 if(mpiRank == mpiHeadRank && mpiRank != calcRank){
3379- asyncCommunicator.SetRecvedVector(buff, num, source, tag);
3376+ asyncCommunicator.SetRecvedMessage(buff, num, source, tag);
33803377 }
33813378 if(mpiRank != mpiHeadRank && mpiRank == calcRank){
3382- asyncCommunicator.SetSentVector(buff, num, dest, tag);
3379+ asyncCommunicator.SetSentMessage(buff, num, dest, tag);
33833380 }
33843381 } // end of loop-i parallelized with MPI
3382+ asyncCommunicator.Finalize();
33853383 communicationThread.join();
33863384 // broadcast data to all rank
33873385 for(int i=0; i<nonRedundantQIndecesSize; i++){
@@ -3403,11 +3401,9 @@ void ZindoS::CalcKRDagerGammaRInvMatrix(double** kRDagerGammaRInv,
34033401 int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
34043402 int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
34053403 int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
3406- int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(nonRedundantQIndecesSize);
34073404 MolDS_mpi::AsyncCommunicator asyncCommunicator;
34083405 boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
3409- &asyncCommunicator,
3410- mPassingTimes) );
3406+ &asyncCommunicator) );
34113407 // this loop-i is MPI-parallelized
34123408 for(int i=0; i<nonRedundantQIndecesSize; i++){
34133409 int calcRank = i%mpiSize;
@@ -3440,12 +3436,13 @@ void ZindoS::CalcKRDagerGammaRInvMatrix(double** kRDagerGammaRInv,
34403436 int num = redundantQIndecesSize;
34413437 double* buff = &kRDagerGammaRInv[i][0];
34423438 if(mpiRank == mpiHeadRank && mpiRank != calcRank){
3443- asyncCommunicator.SetRecvedVector(buff, num, source, tag);
3439+ asyncCommunicator.SetRecvedMessage(buff, num, source, tag);
34443440 }
34453441 if(mpiRank != mpiHeadRank && mpiRank == calcRank){
3446- asyncCommunicator.SetSentVector(buff, num, dest, tag);
3442+ asyncCommunicator.SetSentMessage(buff, num, dest, tag);
34473443 }
34483444 } // end of loop-i parallelized with MPI
3445+ asyncCommunicator.Finalize();
34493446 communicationThread.join();
34503447 // broadcast data to all rank
34513448 for(int i=0; i<nonRedundantQIndecesSize; i++){