OSDN > Desarrollador > ktns > Chamber > MolDS > Commit

MolDS
Fork

(Original repository, No fork origin)

R/O
HTTP
SSH
HTTPS

Commit

Commit MetaInfo

Revisión	50f6dfa1ce8b7359ed19d60177ebf851710a5b4c (tree)
Tiempo	2013-10-14 17:17:50
Autor	Mikiya Fujii <mikiya.fujii@gmai...>
Commiter	Mikiya Fujii

Log Message

trunk.r1540 is merged to branches/fx10. #32094

git-svn-id: https://svn.sourceforge.jp/svnroot/molds/branches/fx10@1541 1136aad2-a195-0410-b898-f5ea1d11b9d8

Cambiar Resumen

modified: src/cndo/Cndo2.cpp (diff)
modified: src/cndo/Cndo2.h (diff)
modified: src/mndo/Mndo.cpp (diff)
modified: src/mpi/AsyncCommunicator.cpp (diff)
modified: src/mpi/AsyncCommunicator.h (diff)
modified: src/mpi/MpiProcess.cpp (diff)
modified: src/mpi/MpiProcess.h (diff)
modified: src/wrappers/Lapack.cpp (diff)
modified: src/wrappers/Lapack.h (diff)
modified: src/zindo/ZindoS.cpp (diff)

Diferencia incremental

--- a/src/cndo/Cndo2.cpp

+++ b/src/cndo/Cndo2.cpp

		@@ -514,6 +514,7 @@ void Cndo2::DoSCF(bool requiresGuess){
514	514	double*** diisStoredDensityMatrix = NULL;
515	515	double*** diisStoredErrorVect = NULL;
516	516	double** diisErrorProducts = NULL;
	517	+ double** tmpDiisErrorProducts = NULL;
517	518	double* diisErrorCoefficients = NULL;
518	519
519	520	try{

		@@ -521,6 +522,7 @@ void Cndo2::DoSCF(bool requiresGuess){
521	522	&diisStoredDensityMatrix,
522	523	&diisStoredErrorVect,
523	524	&diisErrorProducts,
	525	+ &tmpDiisErrorProducts,
524	526	&diisErrorCoefficients);
525	527	// calculate electron integral
526	528	this->CalcGammaAB(this->gammaAB, *this->molecule);

		@@ -590,6 +592,7 @@ void Cndo2::DoSCF(bool requiresGuess){
590	592	diisStoredDensityMatrix,
591	593	diisStoredErrorVect,
592	594	diisErrorProducts,
	595	+ tmpDiisErrorProducts,
593	596	diisErrorCoefficients,
594	597	diisError,
595	598	hasAppliedDIIS,

		@@ -612,6 +615,7 @@ void Cndo2::DoSCF(bool requiresGuess){
612	615	&diisStoredDensityMatrix,
613	616	&diisStoredErrorVect,
614	617	&diisErrorProducts,
	618	+ &tmpDiisErrorProducts,
615	619	&diisErrorCoefficients);
616	620
617	621	throw ex;

		@@ -620,6 +624,7 @@ void Cndo2::DoSCF(bool requiresGuess){
620	624	&diisStoredDensityMatrix,
621	625	&diisStoredErrorVect,
622	626	&diisErrorProducts,
	627	+ &tmpDiisErrorProducts,
623	628	&diisErrorCoefficients);
624	629
625	630	double ompEndTime = omp_get_wtime();

		@@ -750,6 +755,7 @@ void Cndo2::FreeSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation,
750	755	double**** diisStoredDensityMatrix,
751	756	double**** diisStoredErrorVect,
752	757	double*** diisErrorProducts,
	758	+ double*** tmpDiisErrorProducts,
753	759	double** diisErrorCoefficients) const{
754	760
755	761	int diisNumErrorVect = Parameters::GetInstance()->GetDiisNumErrorVectSCF();

		@@ -767,6 +773,9 @@ void Cndo2::FreeSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation,
767	773	MallocerFreer::GetInstance()->Free<double>(diisErrorProducts,
768	774	diisNumErrorVect+1,
769	775	diisNumErrorVect+1);
	776	+ MallocerFreer::GetInstance()->Free<double>(tmpDiisErrorProducts,
	777	+ diisNumErrorVect+1,
	778	+ diisNumErrorVect+1);
770	779	MallocerFreer::GetInstance()->Free<double>(diisErrorCoefficients,
771	780	diisNumErrorVect+1);
772	781	}

		@@ -775,6 +784,7 @@ void Cndo2::MallocSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation,
775	784	double**** diisStoredDensityMatrix,
776	785	double**** diisStoredErrorVect,
777	786	double*** diisErrorProducts,
	787	+ double*** tmpDiisErrorProducts,
778	788	double** diisErrorCoefficients){
779	789
780	790	int diisNumErrorVect = Parameters::GetInstance()->GetDiisNumErrorVectSCF();

		@@ -791,6 +801,7 @@ void Cndo2::MallocSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation,
791	801	this->molecule->GetTotalNumberAOs(),
792	802	this->molecule->GetTotalNumberAOs());
793	803	MallocerFreer::GetInstance()->Malloc<double>(diisErrorProducts, diisNumErrorVect+1, diisNumErrorVect+1);
	804	+ MallocerFreer::GetInstance()->Malloc<double>(tmpDiisErrorProducts, diisNumErrorVect+1, diisNumErrorVect+1);
794	805	MallocerFreer::GetInstance()->Malloc<double>(diisErrorCoefficients, diisNumErrorVect+1);
795	806	}
796	807	}

		@@ -805,6 +816,7 @@ void Cndo2::DoDIIS(double** orbitalElectronPopulation,
805	816	double*** diisStoredDensityMatrix,
806	817	double*** diisStoredErrorVect,
807	818	double** diisErrorProducts,
	819	+ double** tmpDiisErrorProducts,
808	820	double* diisErrorCoefficients,
809	821	double& diisError,
810	822	bool& hasAppliedDIIS,

		@@ -873,7 +885,13 @@ void Cndo2::DoDIIS(double** orbitalElectronPopulation,
873	885	if(diisNumErrorVect <= step && diisEndError<diisError && diisError<diisStartError){
874	886	hasAppliedDIIS = true;
875	887	try{
876		- MolDS_wrappers::Lapack::GetInstance()->Dsysv(diisErrorProducts,
	888	+#pragma omp parallel for schedule(auto)
	889	+ for(int i=0; i<diisNumErrorVect+1; i++){
	890	+ for(int j=0; j<diisNumErrorVect+1; j++){
	891	+ tmpDiisErrorProducts[i][j] = diisErrorProducts[i][j];
	892	+ }
	893	+ }
	894	+ MolDS_wrappers::Lapack::GetInstance()->Dsysv(tmpDiisErrorProducts,
877	895	diisErrorCoefficients,
878	896	diisNumErrorVect+1);
879	897	}catch(MolDSException ex){

		@@ -1295,6 +1313,7 @@ double Cndo2::GetMolecularIntegralElement(int moI, int moJ, int moK, int moL,
1295	1313	void Cndo2::UpdateOldOrbitalElectronPopulation(double** oldOrbitalElectronPopulation,
1296	1314	double const* const* orbitalElectronPopulation,
1297	1315	int numberAOs) const{
	1316	+#pragma omp parallel for schedule(auto)
1298	1317	for(int i=0; i<numberAOs; i++){
1299	1318	for(int j=0; j<numberAOs; j++){
1300	1319	oldOrbitalElectronPopulation[i][j] = orbitalElectronPopulation[i][j];

		@@ -1371,11 +1390,9 @@ void Cndo2::CalcFockMatrix(double** fockMatrix,
1371	1390	int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
1372	1391	int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
1373	1392	int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1374		- int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalNumberAOs);
1375	1393	MolDS_mpi::AsyncCommunicator asyncCommunicator;
1376	1394	boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
1377		- &asyncCommunicator,
1378		- mPassingTimes) );
	1395	+ &asyncCommunicator) );
1379	1396
1380	1397	MallocerFreer::GetInstance()->Initialize<double>(fockMatrix, totalNumberAOs, totalNumberAOs);
1381	1398	for(int A=totalNumberAtoms-1; 0<=A; A--){

		@@ -1437,26 +1454,25 @@ void Cndo2::CalcFockMatrix(double** fockMatrix,
1437	1454	} // end of if(mpiRank == calcRank)
1438	1455
1439	1456	// set data to gather in mpiHeadRank with asynchronous MPI
1440		- int tag = mu;
1441		- int source = calcRank;
1442		- int dest = mpiHeadRank;
	1457	+ int tag = mu;
	1458	+ int source = calcRank;
	1459	+ int dest = mpiHeadRank;
	1460	+ double* buff = &fockMatrix[mu][mu];
	1461	+ MolDS_mpi::molds_mpi_int num = totalNumberAOs-mu;
1443	1462	if(mpiRank == mpiHeadRank && mpiRank != calcRank){
1444		- asyncCommunicator.SetRecvedVector(&fockMatrix[mu][mu],
1445		- totalNumberAOs-mu,
1446		- source,
1447		- tag);
	1463	+ asyncCommunicator.SetRecvedMessage(buff, num, source, tag);
1448	1464	}
1449	1465	if(mpiRank != mpiHeadRank && mpiRank == calcRank){
1450		- asyncCommunicator.SetSentVector(&fockMatrix[mu][mu],
1451		- totalNumberAOs-mu,
1452		- dest,
1453		- tag);
	1466	+ asyncCommunicator.SetSentMessage(buff, num, dest, tag);
1454	1467	}
1455	1468	} // end of loop mu parallelized with MPI
1456	1469	} // end of loop A
1457	1470	// Delete the communication thread.
	1471	+ asyncCommunicator.Finalize();
1458	1472	communicationThread.join();
1459		- MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&fockMatrix[0][0], totalNumberAOs*totalNumberAOs, mpiHeadRank);
	1473	+ double* buff = &fockMatrix[0][0];
	1474	+ MolDS_mpi::molds_mpi_int num = totalNumberAOs*totalNumberAOs;
	1475	+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(buff, num, mpiHeadRank);
1460	1476
1461	1477	/*
1462	1478	this->OutputLog("fock matrix\n");

		@@ -1570,12 +1586,10 @@ void Cndo2::CalcAtomicElectronPopulation(double* atomicElectronPopulation,
1570	1586	const Molecule& molecule) const{
1571	1587	int totalNumberAtoms = molecule.GetNumberAtoms();
1572	1588	MallocerFreer::GetInstance()->Initialize<double>(atomicElectronPopulation, totalNumberAtoms);
1573		-
1574		- int firstAOIndex = 0;
1575		- int numberAOs = 0;
	1589	+#pragma omp parallel for schedule(auto)
1576	1590	for(int A=0; A<totalNumberAtoms; A++){
1577		- firstAOIndex = molecule.GetAtom(A)->GetFirstAOIndex();
1578		- numberAOs = molecule.GetAtom(A)->GetValenceSize();
	1591	+ int firstAOIndex = molecule.GetAtom(A)->GetFirstAOIndex();
	1592	+ int numberAOs = molecule.GetAtom(A)->GetValenceSize();
1579	1593	for(int i=firstAOIndex; i<firstAOIndex+numberAOs; i++){
1580	1594	atomicElectronPopulation[A] += orbitalElectronPopulation[i][i];
1581	1595	}

		@@ -1591,11 +1605,9 @@ void Cndo2::CalcGammaAB(double** gammaAB, const Molecule& molecule) const{
1591	1605	int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
1592	1606	int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
1593	1607	int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1594		- int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber);
1595	1608	MolDS_mpi::AsyncCommunicator asyncCommunicator;
1596	1609	boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
1597		- &asyncCommunicator,
1598		- mPassingTimes) );
	1610	+ &asyncCommunicator) );
1599	1611
1600	1612	// This loop (A) is parallelized by MPI
1601	1613	for(int A=0; A<totalAtomNumber; A++){

		@@ -1669,24 +1681,23 @@ void Cndo2::CalcGammaAB(double** gammaAB, const Molecule& molecule) const{
1669	1681	} // end of if(mpiRank==calcRank)
1670	1682
1671	1683	// set data to gater in mpiHeadRank with asynchronous MPI
1672		- int tag = A;
1673		- int source = calcRank;
1674		- int dest = mpiHeadRank;
	1684	+ int tag = A;
	1685	+ int source = calcRank;
	1686	+ int dest = mpiHeadRank;
	1687	+ double* buff = &gammaAB[A][A];
	1688	+ MolDS_mpi::molds_mpi_int num = totalAtomNumber-A;
1675	1689	if(mpiRank == mpiHeadRank && mpiRank != calcRank){
1676		- asyncCommunicator.SetRecvedVector(&gammaAB[A][A],
1677		- totalAtomNumber-A,
1678		- source,
1679		- tag);
	1690	+ asyncCommunicator.SetRecvedMessage(buff, num, source, tag);
1680	1691	}
1681	1692	if(mpiRank != mpiHeadRank && mpiRank == calcRank){
1682		- asyncCommunicator.SetSentVector(&gammaAB[A][A],
1683		- totalAtomNumber-A,
1684		- dest,
1685		- tag);
	1693	+ asyncCommunicator.SetSentMessage(buff, num, dest, tag);
1686	1694	}
1687	1695	} // end of loop A prallelized by MPI
	1696	+ asyncCommunicator.Finalize();
1688	1697	communicationThread.join();
1689		- MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&gammaAB[0][0], totalAtomNumber*totalAtomNumber, mpiHeadRank);
	1698	+ double* buff = &gammaAB[0][0];
	1699	+ MolDS_mpi::molds_mpi_int num = totalAtomNumber*totalAtomNumber;
	1700	+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(buff, num, mpiHeadRank);
1690	1701
1691	1702	#pragma omp parallel for schedule(auto)
1692	1703	for(int A=0; A<totalAtomNumber; A++){

		@@ -1792,12 +1803,9 @@ void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix,
1792	1803	int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
1793	1804	int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
1794	1805	int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1795		- int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber);
1796		- mPassingTimes *= CartesianType_end;
1797	1806	MolDS_mpi::AsyncCommunicator asyncCommunicator;
1798	1807	boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
1799		- &asyncCommunicator,
1800		- mPassingTimes) );
	1808	+ &asyncCommunicator) );
1801	1809
1802	1810	// This loop (A and mu) is parallelized by MPI
1803	1811	for(int A=0; A<totalAtomNumber; A++){

		@@ -1836,43 +1844,32 @@ void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix,
1836	1844	} // end lof if(mpiRank == calcRank)
1837	1845
1838	1846	// set data to gater in mpiHeadRank with asynchronous MPI
1839		- int tagX = A* CartesianType_end + XAxis;
1840		- int tagY = A* CartesianType_end + YAxis;
1841		- int tagZ = A* CartesianType_end + ZAxis;
1842		- int source = calcRank;
1843		- int dest = mpiHeadRank;
	1847	+ int tagX = A* CartesianType_end + XAxis;
	1848	+ int tagY = A* CartesianType_end + YAxis;
	1849	+ int tagZ = A* CartesianType_end + ZAxis;
	1850	+ int source = calcRank;
	1851	+ int dest = mpiHeadRank;
	1852	+ double* buffX = &cartesianMatrix[XAxis][firstAOIndexA][0];
	1853	+ double* buffY = &cartesianMatrix[YAxis][firstAOIndexA][0];
	1854	+ double* buffZ = &cartesianMatrix[ZAxis][firstAOIndexA][0];
	1855	+ MolDS_mpi::molds_mpi_int num = numValenceAOsA*totalAONumber;
1844	1856	if(mpiRank == mpiHeadRank && mpiRank != calcRank){
1845		- asyncCommunicator.SetRecvedVector(&cartesianMatrix[XAxis][firstAOIndexA][0],
1846		- numValenceAOsA*totalAONumber,
1847		- source,
1848		- tagX);
1849		- asyncCommunicator.SetRecvedVector(&cartesianMatrix[YAxis][firstAOIndexA][0],
1850		- numValenceAOsA*totalAONumber,
1851		- source,
1852		- tagY);
1853		- asyncCommunicator.SetRecvedVector(&cartesianMatrix[ZAxis][firstAOIndexA][0],
1854		- numValenceAOsA*totalAONumber,
1855		- source,
1856		- tagZ);
	1857	+ asyncCommunicator.SetRecvedMessage(buffX, num, source, tagX);
	1858	+ asyncCommunicator.SetRecvedMessage(buffY, num, source, tagY);
	1859	+ asyncCommunicator.SetRecvedMessage(buffZ, num, source, tagZ);
1857	1860	}
1858	1861	if(mpiRank != mpiHeadRank && mpiRank == calcRank){
1859		- asyncCommunicator.SetSentVector(&cartesianMatrix[XAxis][firstAOIndexA][0],
1860		- numValenceAOsA*totalAONumber,
1861		- dest,
1862		- tagX);
1863		- asyncCommunicator.SetSentVector(&cartesianMatrix[YAxis][firstAOIndexA][0],
1864		- numValenceAOsA*totalAONumber,
1865		- dest,
1866		- tagY);
1867		- asyncCommunicator.SetSentVector(&cartesianMatrix[ZAxis][firstAOIndexA][0],
1868		- numValenceAOsA*totalAONumber,
1869		- dest,
1870		- tagZ);
	1862	+ asyncCommunicator.SetSentMessage(buffX, num, dest, tagX);
	1863	+ asyncCommunicator.SetSentMessage(buffY, num, dest, tagY);
	1864	+ asyncCommunicator.SetSentMessage(buffZ, num, dest, tagZ);
1871	1865	}
1872	1866	} // end of loop for int A with MPI
1873	1867	// Delete the communication thread.
	1868	+ asyncCommunicator.Finalize();
1874	1869	communicationThread.join();
1875		- MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&cartesianMatrix[0][0][0], CartesianType_endtotalAONumbertotalAONumber, mpiHeadRank);
	1870	+ double* buff = &cartesianMatrix[0][0][0];
	1871	+ MolDS_mpi::molds_mpi_int num = CartesianType_endtotalAONumbertotalAONumber;
	1872	+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(buff, num, mpiHeadRank);
1876	1873
1877	1874	/*
1878	1875	// communication to collect all matrix data on head-rank

		@@ -3908,11 +3905,9 @@ void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{
3908	3905	int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
3909	3906	int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
3910	3907	int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
3911		- int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber);
3912	3908	MolDS_mpi::AsyncCommunicator asyncCommunicator;
3913	3909	boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
3914		- &asyncCommunicator,
3915		- mPassingTimes) );
	3910	+ &asyncCommunicator) );
3916	3911
3917	3912	MallocerFreer::GetInstance()->Initialize<double>(overlapAOs,
3918	3913	totalAONumber,

		@@ -3976,24 +3971,23 @@ void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{
3976	3971	} // end of if(mpiRank == calcRnak)
3977	3972
3978	3973	// set data to gather in mpiHeadRank with asynchronous MPI
3979		- int tag = A;
3980		- int source = calcRank;
3981		- int dest = mpiHeadRank;
	3974	+ int tag = A;
	3975	+ int source = calcRank;
	3976	+ int dest = mpiHeadRank;
	3977	+ double* buff = overlapAOs[firstAOIndexA];
	3978	+ MolDS_mpi::molds_mpi_int num = totalAONumber*numValenceAOs;
3982	3979	if(mpiRank == mpiHeadRank && mpiRank != calcRank){
3983		- asyncCommunicator.SetRecvedVector(overlapAOs[firstAOIndexA],
3984		- totalAONumber*numValenceAOs,
3985		- source,
3986		- tag);
	3980	+ asyncCommunicator.SetRecvedMessage(buff, num, source, tag);
3987	3981	}
3988	3982	if(mpiRank != mpiHeadRank && mpiRank == calcRank){
3989		- asyncCommunicator.SetSentVector(overlapAOs[firstAOIndexA],
3990		- totalAONumber*numValenceAOs,
3991		- dest,
3992		- tag);
	3983	+ asyncCommunicator.SetSentMessage(buff, num, dest, tag);
3993	3984	}
3994	3985	} // end of loop A parallelized with MPI
	3986	+ asyncCommunicator.Finalize();
3995	3987	communicationThread.join();
3996		- MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&overlapAOs[0][0], totalAONumber*totalAONumber, mpiHeadRank);
	3988	+ double* buff = &overlapAOs[0][0];
	3989	+ MolDS_mpi::molds_mpi_int num = totalAONumber*totalAONumber;
	3990	+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(buff, num, mpiHeadRank);
3997	3991
3998	3992	#pragma omp parallel for schedule(auto)
3999	3993	for(int mu=0; mu<totalAONumber; mu++){

--- a/src/cndo/Cndo2.h

+++ b/src/cndo/Cndo2.h

		@@ -485,6 +485,7 @@ private:
485	485	double*** diisStoredDensityMatrix,
486	486	double*** diisStoredErrorVect,
487	487	double** diisErrorProducts,
	488	+ double** tmpDiisErrorProducts,
488	489	double* diisErrorCoefficients,
489	490	double& diisError,
490	491	bool& hasAppliedDIIS,

		@@ -510,11 +511,13 @@ private:
510	511	double**** diisStoredDensityMatrix,
511	512	double**** diisStoredErrorVect,
512	513	double*** diisErrorProducts,
	514	+ double*** tmpDiisErrorProducts,
513	515	double** diisErrorCoefficients) const;
514	516	void MallocSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation,
515	517	double**** diisStoredDensityMatrix,
516	518	double**** diisStoredErrorVect,
517	519	double*** diisErrorProducts,
	520	+ double*** tmpDiisErrorProducts,
518	521	double** diisErrorCoefficients);
519	522	};
520	523

--- a/src/mndo/Mndo.cpp

+++ b/src/mndo/Mndo.cpp

		@@ -3443,11 +3443,9 @@ void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore,
3443	3443	int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
3444	3444	int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
3445	3445	int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
3446		- int mPassingTimes = totalNumberAtoms-1;
3447	3446	MolDS_mpi::AsyncCommunicator asyncCommunicator;
3448	3447	boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
3449		- &asyncCommunicator,
3450		- mPassingTimes) );
	3448	+ &asyncCommunicator) );
3451	3449	#ifdef MOLDS_DBG
3452	3450	if(twoElecTwoCore == NULL){
3453	3451	throw MolDSException(this->errorMessageCalcTwoElecTwoCoreNullMatrix);

		@@ -3515,9 +3513,10 @@ void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore,
3515	3513	OrbitalType twoElecLimit = dxy;
3516	3514	int numBuff = (twoElecLimit+1)*twoElecLimit/2;
3517	3515	int num = (totalNumberAtoms-b)numBuffnumBuff;
3518		- asyncCommunicator.SetBroadcastedVector(&this->twoElecTwoCoreMpiBuff[a][b][0][0], num, calcRank);
	3516	+ asyncCommunicator.SetBroadcastedMessage(&this->twoElecTwoCoreMpiBuff[a][b][0][0], num, calcRank);
3519	3517	}
3520	3518	} // end of loop a parallelized with MPI
	3519	+ asyncCommunicator.Finalize();
3521	3520	communicationThread.join();
3522	3521
3523	3522	#pragma omp parallel for schedule(auto)

--- a/src/mpi/AsyncCommunicator.cpp

+++ b/src/mpi/AsyncCommunicator.cpp

		@@ -35,8 +35,15 @@
35	35	#include"AsyncCommunicator.h"
36	36	using namespace std;
37	37	namespace MolDS_mpi{
38		-AsyncCommunicator::AsyncCommunicator(){}
	38	+AsyncCommunicator::AsyncCommunicator(){
	39	+ this->hasAllMessagesSet=false;
	40	+}
39	41	AsyncCommunicator::~AsyncCommunicator(){}
	42	+void AsyncCommunicator::Finalize(){
	43	+ boost::mutex::scoped_lock lk(this->stateGuard);
	44	+ this->hasAllMessagesSet = true;
	45	+ this->stateChange.notify_all();
	46	+}
40	47	}
41	48
42	49

--- a/src/mpi/AsyncCommunicator.h

+++ b/src/mpi/AsyncCommunicator.h

		@@ -28,29 +28,28 @@ class AsyncCommunicator{
28	28	public:
29	29	AsyncCommunicator();
30	30	~AsyncCommunicator();
31		- template<typename T> void Run(int passingTimes){
	31	+ template<typename T> void Run(){
32	32	int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
33		- while(0<passingTimes){
34		- sleep(0.1);
	33	+ while(true){
35	34	boost::mutex::scoped_lock lk(this->stateGuard);
36	35	try{
37		- DataInfo dInfo = this->dataQueue.FrontPop();
38		- if(dInfo.mpiFuncType == MolDS_base::Send){
39		- MolDS_mpi::MpiProcess::GetInstance()->Send(dInfo.dest,
40		- dInfo.tag,
41		- reinterpret_cast<T*>(dInfo.vectorPtr),
42		- dInfo.num);
	36	+ MessageInfo mInfo = this->messageQueue.FrontPop();
	37	+ if(mInfo.mpiFuncType == MolDS_base::Send){
	38	+ MolDS_mpi::MpiProcess::GetInstance()->Send(mInfo.dest,
	39	+ mInfo.tag,
	40	+ reinterpret_cast<T*>(mInfo.vectorPtr),
	41	+ mInfo.num);
43	42	}
44		- else if(dInfo.mpiFuncType == MolDS_base::Recv){
45		- MolDS_mpi::MpiProcess::GetInstance()->Recv(dInfo.source,
46		- dInfo.tag,
47		- reinterpret_cast<T*>(dInfo.vectorPtr),
48		- dInfo.num);
	43	+ else if(mInfo.mpiFuncType == MolDS_base::Recv){
	44	+ MolDS_mpi::MpiProcess::GetInstance()->Recv(mInfo.source,
	45	+ mInfo.tag,
	46	+ reinterpret_cast<T*>(mInfo.vectorPtr),
	47	+ mInfo.num);
49	48	}
50		- else if(dInfo.mpiFuncType == MolDS_base::Broadcast){
51		- MolDS_mpi::MpiProcess::GetInstance()->Broadcast(reinterpret_cast<T*>(dInfo.vectorPtr),
52		- dInfo.num,
53		- dInfo.source);
	49	+ else if(mInfo.mpiFuncType == MolDS_base::Broadcast){
	50	+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(reinterpret_cast<T*>(mInfo.vectorPtr),
	51	+ mInfo.num,
	52	+ mInfo.source);
54	53	}
55	54	else{
56	55	std::stringstream ss;

		@@ -59,10 +58,12 @@ public:
59	58	throw ex;
60	59	}
61	60	this->stateChange.notify_all();
62		- passingTimes--;
63	61	}
64	62	catch(MolDS_base::MolDSException ex){
65		- if(ex.HasKey(MolDS_base::EmptyQueue)){
	63	+ if(ex.HasKey(MolDS_base::EmptyQueue && this->hasAllMessagesSet)){
	64	+ break;
	65	+ }
	66	+ else if(ex.HasKey(MolDS_base::EmptyQueue && !this->hasAllMessagesSet)){
66	67	this->stateChange.wait(lk);
67	68	continue;
68	69	}

		@@ -73,51 +74,54 @@ public:
73	74	}
74	75	}
75	76
76		- template<typename T> void SetSentVector(T* vector,
77		- molds_mpi_int num,
78		- int dest,
79		- int tag){
	77	+ template<typename T> void SetSentMessage(T* vector,
	78	+ molds_mpi_int num,
	79	+ int dest,
	80	+ int tag){
80	81	int source = NON_USED;
81	82	MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Send;
82		- this->SetVector(vector, num, source, dest, tag, mpiFuncType);
	83	+ this->SetMessage(vector, num, source, dest, tag, mpiFuncType);
83	84	}
84	85
85		- template<typename T> void SetRecvedVector(T* vector,
86		- molds_mpi_int num,
87		- int source,
88		- int tag){
	86	+ template<typename T> void SetRecvedMessage(T* vector,
	87	+ molds_mpi_int num,
	88	+ int source,
	89	+ int tag){
89	90	int dest = NON_USED;
90	91	MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Recv;
91		- this->SetVector(vector, num, source, dest, tag, mpiFuncType);
	92	+ this->SetMessage(vector, num, source, dest, tag, mpiFuncType);
92	93	}
93	94
94		- template<typename T> void SetBroadcastedVector(T* vector, molds_mpi_int num, int root){
	95	+ template<typename T> void SetBroadcastedMessage(T* vector, molds_mpi_int num, int root){
95	96	int source = root;
96	97	int dest = NON_USED;
97	98	int tag = NON_USED;
98	99	MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Broadcast;
99		- this->SetVector(vector, num, source, dest, tag, mpiFuncType);
	100	+ this->SetMessage(vector, num, source, dest, tag, mpiFuncType);
100	101	}
101	102
	103	+ void Finalize();
	104	+
102	105	private:
103		- struct DataInfo{intptr_t vectorPtr;
104		- molds_mpi_int num;
105		- int source;
106		- int dest;
107		- int tag;
108		- MolDS_base::MpiFunctionType mpiFuncType;};
	106	+ struct MessageInfo{intptr_t vectorPtr;
	107	+ molds_mpi_int num;
	108	+ int source;
	109	+ int dest;
	110	+ int tag;
	111	+ MolDS_base::MpiFunctionType mpiFuncType;};
109	112	boost::mutex stateGuard;
110	113	boost::condition stateChange;
111		- MolDS_base_containers::ThreadSafeQueue<DataInfo> dataQueue;
112		- template<typename T> void SetVector(T* vector,
113		- molds_mpi_int num,
114		- int source,
115		- int dest,
116		- int tag,
117		- MolDS_base::MpiFunctionType mpiFuncType){
	114	+ bool hasAllMessagesSet;
	115	+ MolDS_base_containers::ThreadSafeQueue<MessageInfo> messageQueue;
	116	+ template<typename T> void SetMessage(T* vector,
	117	+ molds_mpi_int num,
	118	+ int source,
	119	+ int dest,
	120	+ int tag,
	121	+ MolDS_base::MpiFunctionType mpiFuncType){
118	122	boost::mutex::scoped_lock lk(this->stateGuard);
119		- DataInfo dInfo = {reinterpret_cast<intptr_t>(vector), num, source, dest, tag, mpiFuncType};
120		- this->dataQueue.Push(dInfo);
	123	+ MessageInfo mInfo = {reinterpret_cast<intptr_t>(vector), num, source, dest, tag, mpiFuncType};
	124	+ this->messageQueue.Push(mInfo);
121	125	this->stateChange.notify_all();
122	126	}
123	127	};

--- a/src/mpi/MpiProcess.cpp

+++ b/src/mpi/MpiProcess.cpp

		@@ -97,22 +97,6 @@ MpiProcess* MpiProcess::GetInstance(){
97	97
98	98	void MpiProcess::Barrier(){this->communicator->barrier();}
99	99
100		-int MpiProcess::GetMessagePassingTimes(molds_mpi_int num)const{
101		- int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
102		- int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
103		- int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
104		- int calcTimes = num/mpiSize;
105		- if(mpiRank < num%mpiSize){calcTimes+=1;}
106		- int mpiPassingTimes;
107		- if(mpiRank == mpiHeadRank){
108		- mpiPassingTimes = num - calcTimes;
109		- }
110		- else{
111		- mpiPassingTimes = calcTimes;
112		- }
113		- return mpiPassingTimes;
114		-}
115		-
116	100	void MpiProcess::SetMessages(){
117	101	this->errorMessageSplitMessageElemLimNegative
118	102	= "Error in mpi::MpiProcess::SplitMessage2Chunks: elementsLimit is negative. \nelementsLimit=";

--- a/src/mpi/MpiProcess.h

+++ b/src/mpi/MpiProcess.h

		@@ -114,7 +114,6 @@ public:
114	114	MolDS_base::MallocerFreer::GetInstance()->Free<double>(&tmpValues, num);
115	115	}
116	116	void Barrier();
117		- int GetMessagePassingTimes(molds_mpi_int num) const;
118	117	private:
119	118	static MpiProcess* mpiProcess;
120	119	MpiProcess();

		@@ -142,25 +141,25 @@ private:
142	141	int tagBase = origianlTag*numChunks;
143	142	if(elementsLimit < 0){
144	143	std::stringstream ss;
145		- ss << this->errorMessageSplitMessageElemLimNegative << elementsLimit << endl;
	144	+ ss << this->errorMessageSplitMessageElemLimNegative << elementsLimit << std::endl;
146	145	MolDS_base::MolDSException ex(ss.str());
147	146	throw ex;
148	147	}
149	148	if(numChunks < 0){
150	149	std::stringstream ss;
151		- ss << this->errorMessageSplitMessageNumChnkNegative << numChunks << endl;
	150	+ ss << this->errorMessageSplitMessageNumChnkNegative << numChunks << std::endl;
152	151	MolDS_base::MolDSException ex(ss.str());
153	152	throw ex;
154	153	}
155	154	if(remaining < 0){
156	155	std::stringstream ss;
157		- ss << this->errorMessageSplitMessageRemainingNegative << remaining << endl;
	156	+ ss << this->errorMessageSplitMessageRemainingNegative << remaining << std::endl;
158	157	MolDS_base::MolDSException ex(ss.str());
159	158	throw ex;
160	159	}
161	160	if(tagBase < 0){
162	161	std::stringstream ss;
163		- ss << this->errorMessageSplitMessageTagBaseNegative << tagBase << endl;
	162	+ ss << this->errorMessageSplitMessageTagBaseNegative << tagBase << std::endl;
164	163	MolDS_base::MolDSException ex(ss.str());
165	164	throw ex;
166	165	}

--- a/src/wrappers/Lapack.cpp

+++ b/src/wrappers/Lapack.cpp

		@@ -36,7 +36,7 @@
36	36	#ifdef __INTEL_COMPILER
37	37	#include"mkl.h"
38	38	#elif defined __FCC_VERSION
39		- #include"fj_lapack.h"
	39	+ #include"lapacke.h"
40	40	#else
41	41	#if ( __WORDSIZE == 32 )
42	42	#else

		@@ -100,18 +100,10 @@ void Lapack::DeleteInstance(){
100	100	* ***/
101	101	molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapack_int size, bool calcEigenVectors){
102	102	molds_lapack_int info = 0;
103		- molds_lapack_int k = 0;
104		- molds_lapack_int lwork;
105		- molds_lapack_int liwork;
106		- char job;
107	103	char uplo = 'U';
108	104	molds_lapack_int lda = size;
109		- double* convertedMatrix;
110		- double* tempEigenValues;
111		- double* work;
112		- molds_lapack_int* iwork;
113		-
114	105	// set job type
	106	+ char job;
115	107	if(calcEigenVectors){
116	108	job = 'V';
117	109	}

		@@ -119,88 +111,49 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa
119	111	job = 'N';
120	112	}
121	113
122		- // calc. lwork and liwork
123		- if(size < 1 ){
124		- stringstream ss;
125		- ss << errorMessageDsyevdSize;
126		- MolDSException ex(ss.str());
127		- ex.SetKeyValue<int>(LapackInfo, info);
128		- throw ex;
129		- }
130		- else if(size == 1){
131		- lwork = 1;
132		- liwork = 1;
133		- }
134		- else if(1 < size && job == 'N'){
135		- lwork = 2*size + 1;
136		- liwork = 2;
137		- }
138		- else{
139		- // calc. k
140		- double temp = log((double)size)/log(2.0);
141		- if( (double)((molds_lapack_int)temp) < temp ){
142		- k = (molds_lapack_int)temp + 1;
143		- }
144		- else{
145		- k = (molds_lapack_int)temp;
146		- }
147		- lwork = 3sizesize + (5+2k)size + 1;
148		- liwork = 5*size + 3;
149		- }
150		-
151		- // malloc
152		- work = (double)MOLDS_LAPACK_malloc( sizeof(double)lwork, 16 );
153		- iwork = (molds_lapack_int)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)liwork, 16 );
154		- convertedMatrix = (double)MOLDS_LAPACK_malloc( sizeof(double)size*size, 16 );
155		- tempEigenValues = (double)MOLDS_LAPACK_malloc( sizeof(double)size, 16 );
156		-
157		- for(molds_lapack_int i = 0; i < size; i++){
158		- for(molds_lapack_int j = i; j < size; j++){
159		- convertedMatrix[i+j*size] = matrix[i][j];
160		- }
161		- }
162		-
163	114	// call Lapack
164	115	#ifdef __INTEL_COMPILER
165		- dsyevd(&job, &uplo, &size, convertedMatrix, &lda, tempEigenValues, work, &lwork, iwork, &liwork, &info);
	116	+ info = LAPACKE_dsyevd(LAPACK_ROW_MAJOR, job, uplo, size, &matrix[0][0], lda, eigenValues);
166	117	#elif defined __FCC_VERSION
167		- molds_lapack_int jobLen=1;
168		- molds_lapack_int uploLen=1;
169		- dsyevd_(&job, &uplo, &size, convertedMatrix, &lda, tempEigenValues, work, &lwork, iwork, &liwork, &info, jobLen, uploLen);
	118	+ info = LAPACKE_dsyevd(LAPACK_ROW_MAJOR, job, uplo, size, &matrix[0][0], lda, eigenValues);
170	119	#else
171		- info = LAPACKE_dsyevd_work(LAPACK_COL_MAJOR, job, uplo, size, convertedMatrix, lda, tempEigenValues, work, lwork, iwork, liwork);
	120	+ info = LAPACKE_dsyevd(LAPACK_ROW_MAJOR, job, uplo, size, &matrix[0][0], lda, eigenValues);
172	121	#endif
173	122
174		- for(molds_lapack_int i = 0; i < size; i++){
175		- for(molds_lapack_int j = 0; j < size; j++){
176		- matrix[i][j] = convertedMatrix[j+i*size]; //i-th row is i-th eigen vector
177		- //matrix[j][i] = convertedMatrix[j+i*size]; //i-th column is i-th eigen vector
	123	+ // make i-th row i-the eigenvector
	124	+ double** tmpMatrix=NULL;
	125	+ try{
	126	+ MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrix, size, size);
	127	+ for(molds_lapack_int i = 0; i < size; i++){
	128	+ for(molds_lapack_int j = 0; j < size; j++){
	129	+ tmpMatrix[j][i] = matrix[i][j];
	130	+ }
178	131	}
	132	+ for(molds_lapack_int i = 0; i < size; i++){
	133	+ for(molds_lapack_int j = 0; j < size; j++){
	134	+ matrix[i][j] = tmpMatrix[i][j];
	135	+ }
	136	+ }
	137	+ }
	138	+ catch(MolDSException ex){
	139	+ MallocerFreer::GetInstance()->Free<double>(&tmpMatrix, size, size);
	140	+ throw ex;
179	141	}
	142	+ MallocerFreer::GetInstance()->Free<double>(&tmpMatrix, size, size);
180	143
	144	+ // adjust phase of eigenvectors
181	145	for(molds_lapack_int i=0;i<size;i++){
182		- double temp = 0.0;
	146	+ double tmp = 0.0;
183	147	for(molds_lapack_int j=0;j<size;j++){
184		- temp += matrix[i][j];
	148	+ tmp += matrix[i][j];
185	149	}
186		- if(temp<0){
	150	+ if(tmp<0){
187	151	for(molds_lapack_int j=0;j<size;j++){
188	152	matrix[i][j]*=-1.0;
189	153	}
190	154	}
191	155	}
192	156
193		- for(molds_lapack_int i = 0; i < size; i++){
194		- eigenValues[i] = tempEigenValues[i];
195		- }
196		- //this->OutputLog(boost::format("size=%d lwork=%d liwork=%d k=%d info=%d\n") % size % lwork % liwork % k % info);
197		-
198		- // free
199		- MOLDS_LAPACK_free(work);
200		- MOLDS_LAPACK_free(iwork);
201		- MOLDS_LAPACK_free(convertedMatrix);
202		- MOLDS_LAPACK_free(tempEigenValues);
203		-
204	157	if(info != 0){
205	158	stringstream ss;
206	159	ss << errorMessageDsyevdInfo;

		@@ -216,21 +169,15 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa
216	169	*
217	170	* "matrix*X=b" is solved, then we get X by this method.
218	171	* The X will be stored in b.
	172	+ * The matrix will be overwriten by this method.
219	173	*
220	174	*/
221		-molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lapack_int size){
	175	+molds_lapack_int Lapack::Dsysv(double** matrix, double* b, molds_lapack_int size){
222	176	molds_lapack_int info = 0;
223		- molds_lapack_int lwork;
224		- char uplo = 'U';
225		-#ifdef __FCC_VERSION
226		- molds_lapack_int uploLen=1;
227		-#endif
228		- molds_lapack_int lda = size;
229		- molds_lapack_int ldb = size;
	177	+ char uplo = 'U';
230	178	molds_lapack_int nrhs = 1;
231		- double* convertedMatrix;
232		- double* work;
233		- double* tempB;
	179	+ molds_lapack_int lda = size;
	180	+ molds_lapack_int ldb = nrhs;
234	181	molds_lapack_int* ipiv;
235	182
236	183	if(size < 1 ){

		@@ -241,54 +188,17 @@ molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lap
241	188
242	189	// malloc
243	190	ipiv = (molds_lapack_int)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)2*size, 16 );
244		- convertedMatrix = (double)MOLDS_LAPACK_malloc( sizeof(double)size*size, 16 );
245		- tempB = (double)MOLDS_LAPACK_malloc( sizeof(double)size, 16 );
246		-
247		- for(molds_lapack_int i = 0; i < size; i++){
248		- for(molds_lapack_int j = i; j < size; j++){
249		- convertedMatrix[i+j*size] = matrix[i][j];
250		- }
251		- }
252		- for(molds_lapack_int i = 0; i < size; i++){
253		- tempB[i] = b[i];
254		- }
255	191
256		- // calc. lwork
257		- double blockSize=0.0;
258		-#pragma omp critical
259		- {
260		- lwork = -1;
261		- double tempWork[3]={0.0, 0.0, 0.0};
262	192	#ifdef __INTEL_COMPILER
263		- dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, tempWork, &lwork, &info);
	193	+ info = LAPACKE_dsysv(LAPACK_ROW_MAJOR, uplo, size, nrhs, &matrix[0][0], lda, ipiv, b, ldb);
264	194	#elif defined __FCC_VERSION
265		- dsysv_(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, tempWork, &lwork, &info, uploLen);
	195	+ info = LAPACKE_dsysv(LAPACK_ROW_MAJOR, uplo, size, nrhs, &matrix[0][0], lda, ipiv, b, ldb);
266	196	#else
267		- info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, tempWork, lwork);
	197	+ info = LAPACKE_dsysv(LAPACK_ROW_MAJOR, uplo, size, nrhs, &matrix[0][0], lda, ipiv, b, ldb);
268	198	#endif
269		- blockSize = tempWork[0]/size;
270		- }
271		- info = 0;
272		- lwork = blockSize*size;
273		- work = (double)MOLDS_LAPACK_malloc( sizeof(double)lwork, 16 );
274		-
275		- // call Lapack
276		-#ifdef __INTEL_COMPILER
277		- dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, work, &lwork, &info);
278		-#elif defined __FCC_VERSION
279		- dsysv_(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, work, &lwork, &info, uploLen);
280		-#else
281		- info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, work, lwork);
282		-#endif
283		- for(molds_lapack_int i = 0; i < size; i++){
284		- b[i] = tempB[i];
285		- }
286	199
287	200	// free
288		- MOLDS_LAPACK_free(convertedMatrix);
289	201	MOLDS_LAPACK_free(ipiv);
290		- MOLDS_LAPACK_free(work);
291		- MOLDS_LAPACK_free(tempB);
292	202
293	203	if(info != 0){
294	204	stringstream ss;

		@@ -304,17 +214,17 @@ molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lap
304	214	/***
305	215	*
306	216	* "matrix*X[i]=b[i] (i=0, 1, ... , nrhs-1) is solved, then we get X[i] by this method.
307		- * The X[i] will be stored in b[i].
308		- * b[i][j] is j-th element of i-th solution, b[i].
	217	+ * The X[i] will be stored in b[i], namely
	218	+ * the b[i][j] will be j-th element of i-th solution, b[i].
	219	+ * Besides, the matrix will be overwriten by this method.
309	220	*
310	221	*/
311		-molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const{
	222	+molds_lapack_int Lapack::Dgetrs(double matrix, double b, molds_lapack_int size, molds_lapack_int nrhs) const{
312	223	molds_lapack_int info = 0;
313	224	char trans = 'N';
314	225	molds_lapack_int lda = size;
315		- molds_lapack_int ldb = size;
316		- double* convertedMatrix;
317		- double* convertedB;
	226	+ molds_lapack_int ldb = nrhs;
	227	+ double* tmpB;
318	228	molds_lapack_int* ipiv;
319	229
320	230	if(size < 1 ){

		@@ -323,48 +233,39 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l
323	233	throw MolDSException(ss.str());
324	234	}
325	235
326		-
327	236	try{
328	237	// malloc
329	238	ipiv = (molds_lapack_int)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)2*size, 16 );
330		- convertedMatrix = (double)MOLDS_LAPACK_malloc( sizeof(double)size*size, 16 );
331		- convertedB = (double)MOLDS_LAPACK_malloc( sizeof(double)nrhs*size, 16 );
332		- for(molds_lapack_int i = 0; i < size; i++){
333		- for(molds_lapack_int j = 0; j < size; j++){
334		- convertedMatrix[i+j*size] = matrix[i][j];
335		- }
336		- }
	239	+ tmpB = (double)MOLDS_LAPACK_malloc( sizeof(double)size*nrhs, 16 );
	240	+ // matrix b should be transposed
337	241	for(molds_lapack_int i = 0; i < nrhs; i++){
338	242	for(molds_lapack_int j = 0; j < size; j++){
339		- convertedB[j+i*size] = b[i][j];
	243	+ tmpB[j*nrhs+i] = b[i][j];
340	244	}
341	245	}
342		- this->Dgetrf(convertedMatrix, ipiv, size, size);
	246	+ this->Dgetrf(&matrix[0][0], ipiv, size, size);
343	247	#ifdef __INTEL_COMPILER
344		- dgetrs(&trans, &size, &nrhs, convertedMatrix, &lda, ipiv, convertedB, &ldb, &info);
	248	+ info = LAPACKE_dgetrs(LAPACK_ROW_MAJOR, trans, size, nrhs, &matrix[0][0], lda, ipiv, tmpB, ldb);
345	249	#elif defined __FCC_VERSION
346		- molds_lapack_int transLen=1;
347		- dgetrs_(&trans, &size, &nrhs, convertedMatrix, &lda, ipiv, convertedB, &ldb, &info, transLen);
	250	+ info = LAPACKE_dgetrs(LAPACK_ROW_MAJOR, trans, size, nrhs, &matrix[0][0], lda, ipiv, tmpB, ldb);
348	251	#else
349		- info = LAPACKE_dgetrs_work(LAPACK_COL_MAJOR, trans, size, nrhs, convertedMatrix, lda, ipiv, convertedB, ldb);
	252	+ info = LAPACKE_dgetrs(LAPACK_ROW_MAJOR, trans, size, nrhs, &matrix[0][0], lda, ipiv, tmpB, ldb);
350	253	#endif
351	254	for(molds_lapack_int i = 0; i < nrhs; i++){
352	255	for(molds_lapack_int j = 0; j < size; j++){
353		- b[i][j] = convertedB[j+i*size];
	256	+ b[i][j] = tmpB[j*nrhs+i];
354	257	}
355	258	}
356	259	}
357	260	catch(MolDSException ex){
358	261	// free
359		- MOLDS_LAPACK_free(convertedMatrix);
360		- MOLDS_LAPACK_free(convertedB);
	262	+ MOLDS_LAPACK_free(tmpB);
361	263	MOLDS_LAPACK_free(ipiv);
362	264	throw ex;
363	265	}
364	266	// free
365		- MOLDS_LAPACK_free(convertedMatrix);
366		- MOLDS_LAPACK_free(convertedB);
367	267	MOLDS_LAPACK_free(ipiv);
	268	+ MOLDS_LAPACK_free(tmpB);
368	269
369	270	if(info != 0){
370	271	stringstream ss;

		@@ -379,43 +280,31 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l
379	280	// Argument "matrix" will be LU-decomposed.
380	281	molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int sizeM, molds_lapack_int sizeN) const{
381	282	molds_lapack_int* ipiv = (molds_lapack_int)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)2*sizeM, 16 );
382		- this->Dgetrf(matrix, ipiv, sizeM, sizeN);
	283	+ this->Dgetrf(&matrix[0][0], ipiv, sizeM, sizeN);
383	284	MOLDS_LAPACK_free(ipiv);
384	285	molds_lapack_int info = 0;
385	286	return info;
386	287	}
387	288
388		-// Argument "matrix" is sizeM*sizeN matrix.
	289	+// Argument "matrix" is sizeM*sizeN matrix in Row-major (C/C++ style)
389	290	// Argument "matrix" will be LU-decomposed.
390	291	molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{
391		- double* convertedMatrix = (double)MOLDS_LAPACK_malloc( sizeof(double)sizeM*sizeN, 16 );
392		- for(molds_lapack_int i=0; i<sizeM; i++){
393		- for(molds_lapack_int j=0; j<sizeN; j++){
394		- convertedMatrix[i+j*sizeM] = matrix[i][j];
395		- }
396		- }
397		- this->Dgetrf(convertedMatrix, ipiv, sizeM, sizeN);
398		- for(molds_lapack_int i=0; i<sizeM; i++){
399		- for(molds_lapack_int j=0; j<sizeN; j++){
400		- matrix[i][j] = convertedMatrix[i+j*sizeM];
401		- }
402		- }
403		- MOLDS_LAPACK_free(convertedMatrix);
	292	+ this->Dgetrf(&matrix[0][0], ipiv, sizeM, sizeN);
404	293	molds_lapack_int info = 0;
405	294	return info;
406	295	}
407	296
408	297	// Argument "matrix" is sizeM*sizeN matrix.
409		-// The each element of "matrix" should be stored in 1-dimensional vecotre with column major (Fortran type).
	298	+// The each element of "matrix" should be stored in 1-dimensional vecotre with Row major (C/C++ style).
410	299	molds_lapack_int Lapack::Dgetrf(double* matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{
411	300	molds_lapack_int info = 0;
412	301	molds_lapack_int lda = sizeM;
413	302	#ifdef __INTEL_COMPILER
414		- dgetrf(&sizeM, &sizeN, matrix, &lda, ipiv, &info);
	303	+ info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, sizeM, sizeN, matrix, lda, ipiv);
415	304	#elif defined __FCC_VERSION
416		- dgetrf_(&sizeM, &sizeN, matrix, &lda, ipiv, &info);
	305	+ info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, sizeM, sizeN, matrix, lda, ipiv);
417	306	#else
418		- info = LAPACKE_dgetrf_work(LAPACK_COL_MAJOR, sizeM, sizeN, matrix, lda, ipiv);
	307	+ info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, sizeM, sizeN, matrix, lda, ipiv);
419	308	#endif
420	309	if(info != 0){
421	310	stringstream ss;

--- a/src/wrappers/Lapack.h

+++ b/src/wrappers/Lapack.h

		@@ -31,8 +31,8 @@ public:
31	31	static Lapack* GetInstance();
32	32	static void DeleteInstance();
33	33	molds_lapack_int Dsyevd(double** matrix, double* eigenValues, molds_lapack_int size, bool calcEigenVectors);
34		- molds_lapack_int Dsysv(double const* const* matrix, double* b, molds_lapack_int size);
35		- molds_lapack_int Dgetrs(double const* const* matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const;
	34	+ molds_lapack_int Dsysv(double** matrix, double* b, molds_lapack_int size);
	35	+ molds_lapack_int Dgetrs(double matrix, double b, molds_lapack_int size, molds_lapack_int nrhs) const;
36	36	molds_lapack_int Dgetrf(double** matrix, molds_lapack_int sizeM, molds_lapack_int sizeN) const;
37	37	molds_lapack_int Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const;
38	38	private:

--- a/src/zindo/ZindoS.cpp

+++ b/src/zindo/ZindoS.cpp

		@@ -2353,11 +2353,9 @@ void ZindoS::CalcCISMatrix(double** matrixCIS) const{
2353	2353	int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
2354	2354	int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
2355	2355	int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
2356		- int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(this->matrixCISdimension);
2357	2356	MolDS_mpi::AsyncCommunicator asyncCommunicator;
2358	2357	boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
2359		- &asyncCommunicator,
2360		- mPassingTimes) );
	2358	+ &asyncCommunicator) );
2361	2359
2362	2360	// this loop-a is MPI-parallelized
2363	2361	for(int k=this->matrixCISdimension-1; 0<=k; k--){

		@@ -2418,12 +2416,13 @@ void ZindoS::CalcCISMatrix(double** matrixCIS) const{
2418	2416	int num = this->matrixCISdimension - k;
2419	2417	double* buff = &this->matrixCIS[k][k];
2420	2418	if(mpiRank == mpiHeadRank && mpiRank != calcRank){
2421		- asyncCommunicator.SetRecvedVector(buff, num, source, tag);
	2419	+ asyncCommunicator.SetRecvedMessage(buff, num, source, tag);
2422	2420	}
2423	2421	if(mpiRank != mpiHeadRank && mpiRank == calcRank){
2424		- asyncCommunicator.SetSentVector(buff, num, dest, tag);
	2422	+ asyncCommunicator.SetSentMessage(buff, num, dest, tag);
2425	2423	}
2426	2424	} // end of k-loop which is MPI-parallelized
	2425	+ asyncCommunicator.Finalize();
2427	2426	communicationThread.join();
2428	2427	// Broadcast data to all rank
2429	2428	for(int k=0; k<this->matrixCISdimension; k++){

		@@ -3339,11 +3338,9 @@ void ZindoS::CalcGammaNRMinusKNRMatrix(double** gammaNRMinusKNR, const vector<Mo
3339	3338	int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
3340	3339	int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
3341	3340	int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
3342		- int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(nonRedundantQIndecesSize);
3343	3341	MolDS_mpi::AsyncCommunicator asyncCommunicator;
3344	3342	boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
3345		- &asyncCommunicator,
3346		- mPassingTimes) );
	3343	+ &asyncCommunicator) );
3347	3344	// this loop-i is MPI-parallelized
3348	3345	for(int i=nonRedundantQIndecesSize-1; 0<=i; i--){
3349	3346	int calcRank = i%mpiSize;

		@@ -3376,12 +3373,13 @@ void ZindoS::CalcGammaNRMinusKNRMatrix(double** gammaNRMinusKNR, const vector<Mo
3376	3373	int num = nonRedundantQIndecesSize - i;
3377	3374	double* buff = &gammaNRMinusKNR[i][i];
3378	3375	if(mpiRank == mpiHeadRank && mpiRank != calcRank){
3379		- asyncCommunicator.SetRecvedVector(buff, num, source, tag);
	3376	+ asyncCommunicator.SetRecvedMessage(buff, num, source, tag);
3380	3377	}
3381	3378	if(mpiRank != mpiHeadRank && mpiRank == calcRank){
3382		- asyncCommunicator.SetSentVector(buff, num, dest, tag);
	3379	+ asyncCommunicator.SetSentMessage(buff, num, dest, tag);
3383	3380	}
3384	3381	} // end of loop-i parallelized with MPI
	3382	+ asyncCommunicator.Finalize();
3385	3383	communicationThread.join();
3386	3384	// broadcast data to all rank
3387	3385	for(int i=0; i<nonRedundantQIndecesSize; i++){

		@@ -3403,11 +3401,9 @@ void ZindoS::CalcKRDagerGammaRInvMatrix(double** kRDagerGammaRInv,
3403	3401	int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
3404	3402	int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
3405	3403	int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
3406		- int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(nonRedundantQIndecesSize);
3407	3404	MolDS_mpi::AsyncCommunicator asyncCommunicator;
3408	3405	boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
3409		- &asyncCommunicator,
3410		- mPassingTimes) );
	3406	+ &asyncCommunicator) );
3411	3407	// this loop-i is MPI-parallelized
3412	3408	for(int i=0; i<nonRedundantQIndecesSize; i++){
3413	3409	int calcRank = i%mpiSize;

		@@ -3440,12 +3436,13 @@ void ZindoS::CalcKRDagerGammaRInvMatrix(double** kRDagerGammaRInv,
3440	3436	int num = redundantQIndecesSize;
3441	3437	double* buff = &kRDagerGammaRInv[i][0];
3442	3438	if(mpiRank == mpiHeadRank && mpiRank != calcRank){
3443		- asyncCommunicator.SetRecvedVector(buff, num, source, tag);
	3439	+ asyncCommunicator.SetRecvedMessage(buff, num, source, tag);
3444	3440	}
3445	3441	if(mpiRank != mpiHeadRank && mpiRank == calcRank){
3446		- asyncCommunicator.SetSentVector(buff, num, dest, tag);
	3442	+ asyncCommunicator.SetSentMessage(buff, num, dest, tag);
3447	3443	}
3448	3444	} // end of loop-i parallelized with MPI
	3445	+ asyncCommunicator.Finalize();
3449	3446	communicationThread.join();
3450	3447	// broadcast data to all rank
3451	3448	for(int i=0; i<nonRedundantQIndecesSize; i++){

MolDS Fork