Revisión | 50f6dfa1ce8b7359ed19d60177ebf851710a5b4c (tree) |
---|---|
Tiempo | 2013-10-14 17:17:50 |
Autor | Mikiya Fujii <mikiya.fujii@gmai...> |
Commiter | Mikiya Fujii |
trunk.r1540 is merged to branches/fx10. #32094
git-svn-id: https://svn.sourceforge.jp/svnroot/molds/branches/fx10@1541 1136aad2-a195-0410-b898-f5ea1d11b9d8
@@ -514,6 +514,7 @@ void Cndo2::DoSCF(bool requiresGuess){ | ||
514 | 514 | double*** diisStoredDensityMatrix = NULL; |
515 | 515 | double*** diisStoredErrorVect = NULL; |
516 | 516 | double** diisErrorProducts = NULL; |
517 | + double** tmpDiisErrorProducts = NULL; | |
517 | 518 | double* diisErrorCoefficients = NULL; |
518 | 519 | |
519 | 520 | try{ |
@@ -521,6 +522,7 @@ void Cndo2::DoSCF(bool requiresGuess){ | ||
521 | 522 | &diisStoredDensityMatrix, |
522 | 523 | &diisStoredErrorVect, |
523 | 524 | &diisErrorProducts, |
525 | + &tmpDiisErrorProducts, | |
524 | 526 | &diisErrorCoefficients); |
525 | 527 | // calculate electron integral |
526 | 528 | this->CalcGammaAB(this->gammaAB, *this->molecule); |
@@ -590,6 +592,7 @@ void Cndo2::DoSCF(bool requiresGuess){ | ||
590 | 592 | diisStoredDensityMatrix, |
591 | 593 | diisStoredErrorVect, |
592 | 594 | diisErrorProducts, |
595 | + tmpDiisErrorProducts, | |
593 | 596 | diisErrorCoefficients, |
594 | 597 | diisError, |
595 | 598 | hasAppliedDIIS, |
@@ -612,6 +615,7 @@ void Cndo2::DoSCF(bool requiresGuess){ | ||
612 | 615 | &diisStoredDensityMatrix, |
613 | 616 | &diisStoredErrorVect, |
614 | 617 | &diisErrorProducts, |
618 | + &tmpDiisErrorProducts, | |
615 | 619 | &diisErrorCoefficients); |
616 | 620 | |
617 | 621 | throw ex; |
@@ -620,6 +624,7 @@ void Cndo2::DoSCF(bool requiresGuess){ | ||
620 | 624 | &diisStoredDensityMatrix, |
621 | 625 | &diisStoredErrorVect, |
622 | 626 | &diisErrorProducts, |
627 | + &tmpDiisErrorProducts, | |
623 | 628 | &diisErrorCoefficients); |
624 | 629 | |
625 | 630 | double ompEndTime = omp_get_wtime(); |
@@ -750,6 +755,7 @@ void Cndo2::FreeSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation, | ||
750 | 755 | double**** diisStoredDensityMatrix, |
751 | 756 | double**** diisStoredErrorVect, |
752 | 757 | double*** diisErrorProducts, |
758 | + double*** tmpDiisErrorProducts, | |
753 | 759 | double** diisErrorCoefficients) const{ |
754 | 760 | |
755 | 761 | int diisNumErrorVect = Parameters::GetInstance()->GetDiisNumErrorVectSCF(); |
@@ -767,6 +773,9 @@ void Cndo2::FreeSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation, | ||
767 | 773 | MallocerFreer::GetInstance()->Free<double>(diisErrorProducts, |
768 | 774 | diisNumErrorVect+1, |
769 | 775 | diisNumErrorVect+1); |
776 | + MallocerFreer::GetInstance()->Free<double>(tmpDiisErrorProducts, | |
777 | + diisNumErrorVect+1, | |
778 | + diisNumErrorVect+1); | |
770 | 779 | MallocerFreer::GetInstance()->Free<double>(diisErrorCoefficients, |
771 | 780 | diisNumErrorVect+1); |
772 | 781 | } |
@@ -775,6 +784,7 @@ void Cndo2::MallocSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation, | ||
775 | 784 | double**** diisStoredDensityMatrix, |
776 | 785 | double**** diisStoredErrorVect, |
777 | 786 | double*** diisErrorProducts, |
787 | + double*** tmpDiisErrorProducts, | |
778 | 788 | double** diisErrorCoefficients){ |
779 | 789 | |
780 | 790 | int diisNumErrorVect = Parameters::GetInstance()->GetDiisNumErrorVectSCF(); |
@@ -791,6 +801,7 @@ void Cndo2::MallocSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation, | ||
791 | 801 | this->molecule->GetTotalNumberAOs(), |
792 | 802 | this->molecule->GetTotalNumberAOs()); |
793 | 803 | MallocerFreer::GetInstance()->Malloc<double>(diisErrorProducts, diisNumErrorVect+1, diisNumErrorVect+1); |
804 | + MallocerFreer::GetInstance()->Malloc<double>(tmpDiisErrorProducts, diisNumErrorVect+1, diisNumErrorVect+1); | |
794 | 805 | MallocerFreer::GetInstance()->Malloc<double>(diisErrorCoefficients, diisNumErrorVect+1); |
795 | 806 | } |
796 | 807 | } |
@@ -805,6 +816,7 @@ void Cndo2::DoDIIS(double** orbitalElectronPopulation, | ||
805 | 816 | double*** diisStoredDensityMatrix, |
806 | 817 | double*** diisStoredErrorVect, |
807 | 818 | double** diisErrorProducts, |
819 | + double** tmpDiisErrorProducts, | |
808 | 820 | double* diisErrorCoefficients, |
809 | 821 | double& diisError, |
810 | 822 | bool& hasAppliedDIIS, |
@@ -873,7 +885,13 @@ void Cndo2::DoDIIS(double** orbitalElectronPopulation, | ||
873 | 885 | if(diisNumErrorVect <= step && diisEndError<diisError && diisError<diisStartError){ |
874 | 886 | hasAppliedDIIS = true; |
875 | 887 | try{ |
876 | - MolDS_wrappers::Lapack::GetInstance()->Dsysv(diisErrorProducts, | |
888 | +#pragma omp parallel for schedule(auto) | |
889 | + for(int i=0; i<diisNumErrorVect+1; i++){ | |
890 | + for(int j=0; j<diisNumErrorVect+1; j++){ | |
891 | + tmpDiisErrorProducts[i][j] = diisErrorProducts[i][j]; | |
892 | + } | |
893 | + } | |
894 | + MolDS_wrappers::Lapack::GetInstance()->Dsysv(tmpDiisErrorProducts, | |
877 | 895 | diisErrorCoefficients, |
878 | 896 | diisNumErrorVect+1); |
879 | 897 | }catch(MolDSException ex){ |
@@ -1295,6 +1313,7 @@ double Cndo2::GetMolecularIntegralElement(int moI, int moJ, int moK, int moL, | ||
1295 | 1313 | void Cndo2::UpdateOldOrbitalElectronPopulation(double** oldOrbitalElectronPopulation, |
1296 | 1314 | double const* const* orbitalElectronPopulation, |
1297 | 1315 | int numberAOs) const{ |
1316 | +#pragma omp parallel for schedule(auto) | |
1298 | 1317 | for(int i=0; i<numberAOs; i++){ |
1299 | 1318 | for(int j=0; j<numberAOs; j++){ |
1300 | 1319 | oldOrbitalElectronPopulation[i][j] = orbitalElectronPopulation[i][j]; |
@@ -1371,11 +1390,9 @@ void Cndo2::CalcFockMatrix(double** fockMatrix, | ||
1371 | 1390 | int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); |
1372 | 1391 | int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); |
1373 | 1392 | int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); |
1374 | - int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalNumberAOs); | |
1375 | 1393 | MolDS_mpi::AsyncCommunicator asyncCommunicator; |
1376 | 1394 | boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>, |
1377 | - &asyncCommunicator, | |
1378 | - mPassingTimes) ); | |
1395 | + &asyncCommunicator) ); | |
1379 | 1396 | |
1380 | 1397 | MallocerFreer::GetInstance()->Initialize<double>(fockMatrix, totalNumberAOs, totalNumberAOs); |
1381 | 1398 | for(int A=totalNumberAtoms-1; 0<=A; A--){ |
@@ -1437,26 +1454,25 @@ void Cndo2::CalcFockMatrix(double** fockMatrix, | ||
1437 | 1454 | } // end of if(mpiRank == calcRank) |
1438 | 1455 | |
1439 | 1456 | // set data to gather in mpiHeadRank with asynchronous MPI |
1440 | - int tag = mu; | |
1441 | - int source = calcRank; | |
1442 | - int dest = mpiHeadRank; | |
1457 | + int tag = mu; | |
1458 | + int source = calcRank; | |
1459 | + int dest = mpiHeadRank; | |
1460 | + double* buff = &fockMatrix[mu][mu]; | |
1461 | + MolDS_mpi::molds_mpi_int num = totalNumberAOs-mu; | |
1443 | 1462 | if(mpiRank == mpiHeadRank && mpiRank != calcRank){ |
1444 | - asyncCommunicator.SetRecvedVector(&fockMatrix[mu][mu], | |
1445 | - totalNumberAOs-mu, | |
1446 | - source, | |
1447 | - tag); | |
1463 | + asyncCommunicator.SetRecvedMessage(buff, num, source, tag); | |
1448 | 1464 | } |
1449 | 1465 | if(mpiRank != mpiHeadRank && mpiRank == calcRank){ |
1450 | - asyncCommunicator.SetSentVector(&fockMatrix[mu][mu], | |
1451 | - totalNumberAOs-mu, | |
1452 | - dest, | |
1453 | - tag); | |
1466 | + asyncCommunicator.SetSentMessage(buff, num, dest, tag); | |
1454 | 1467 | } |
1455 | 1468 | } // end of loop mu parallelized with MPI |
1456 | 1469 | } // end of loop A |
1457 | 1470 | // Delete the communication thread. |
1471 | + asyncCommunicator.Finalize(); | |
1458 | 1472 | communicationThread.join(); |
1459 | - MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&fockMatrix[0][0], totalNumberAOs*totalNumberAOs, mpiHeadRank); | |
1473 | + double* buff = &fockMatrix[0][0]; | |
1474 | + MolDS_mpi::molds_mpi_int num = totalNumberAOs*totalNumberAOs; | |
1475 | + MolDS_mpi::MpiProcess::GetInstance()->Broadcast(buff, num, mpiHeadRank); | |
1460 | 1476 | |
1461 | 1477 | /* |
1462 | 1478 | this->OutputLog("fock matrix\n"); |
@@ -1570,12 +1586,10 @@ void Cndo2::CalcAtomicElectronPopulation(double* atomicElectronPopulation, | ||
1570 | 1586 | const Molecule& molecule) const{ |
1571 | 1587 | int totalNumberAtoms = molecule.GetNumberAtoms(); |
1572 | 1588 | MallocerFreer::GetInstance()->Initialize<double>(atomicElectronPopulation, totalNumberAtoms); |
1573 | - | |
1574 | - int firstAOIndex = 0; | |
1575 | - int numberAOs = 0; | |
1589 | +#pragma omp parallel for schedule(auto) | |
1576 | 1590 | for(int A=0; A<totalNumberAtoms; A++){ |
1577 | - firstAOIndex = molecule.GetAtom(A)->GetFirstAOIndex(); | |
1578 | - numberAOs = molecule.GetAtom(A)->GetValenceSize(); | |
1591 | + int firstAOIndex = molecule.GetAtom(A)->GetFirstAOIndex(); | |
1592 | + int numberAOs = molecule.GetAtom(A)->GetValenceSize(); | |
1579 | 1593 | for(int i=firstAOIndex; i<firstAOIndex+numberAOs; i++){ |
1580 | 1594 | atomicElectronPopulation[A] += orbitalElectronPopulation[i][i]; |
1581 | 1595 | } |
@@ -1591,11 +1605,9 @@ void Cndo2::CalcGammaAB(double** gammaAB, const Molecule& molecule) const{ | ||
1591 | 1605 | int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); |
1592 | 1606 | int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); |
1593 | 1607 | int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); |
1594 | - int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber); | |
1595 | 1608 | MolDS_mpi::AsyncCommunicator asyncCommunicator; |
1596 | 1609 | boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>, |
1597 | - &asyncCommunicator, | |
1598 | - mPassingTimes) ); | |
1610 | + &asyncCommunicator) ); | |
1599 | 1611 | |
1600 | 1612 | // This loop (A) is parallelized by MPI |
1601 | 1613 | for(int A=0; A<totalAtomNumber; A++){ |
@@ -1669,24 +1681,23 @@ void Cndo2::CalcGammaAB(double** gammaAB, const Molecule& molecule) const{ | ||
1669 | 1681 | } // end of if(mpiRank==calcRank) |
1670 | 1682 | |
1671 | 1683 | // set data to gater in mpiHeadRank with asynchronous MPI |
1672 | - int tag = A; | |
1673 | - int source = calcRank; | |
1674 | - int dest = mpiHeadRank; | |
1684 | + int tag = A; | |
1685 | + int source = calcRank; | |
1686 | + int dest = mpiHeadRank; | |
1687 | + double* buff = &gammaAB[A][A]; | |
1688 | + MolDS_mpi::molds_mpi_int num = totalAtomNumber-A; | |
1675 | 1689 | if(mpiRank == mpiHeadRank && mpiRank != calcRank){ |
1676 | - asyncCommunicator.SetRecvedVector(&gammaAB[A][A], | |
1677 | - totalAtomNumber-A, | |
1678 | - source, | |
1679 | - tag); | |
1690 | + asyncCommunicator.SetRecvedMessage(buff, num, source, tag); | |
1680 | 1691 | } |
1681 | 1692 | if(mpiRank != mpiHeadRank && mpiRank == calcRank){ |
1682 | - asyncCommunicator.SetSentVector(&gammaAB[A][A], | |
1683 | - totalAtomNumber-A, | |
1684 | - dest, | |
1685 | - tag); | |
1693 | + asyncCommunicator.SetSentMessage(buff, num, dest, tag); | |
1686 | 1694 | } |
1687 | 1695 | } // end of loop A prallelized by MPI |
1696 | + asyncCommunicator.Finalize(); | |
1688 | 1697 | communicationThread.join(); |
1689 | - MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&gammaAB[0][0], totalAtomNumber*totalAtomNumber, mpiHeadRank); | |
1698 | + double* buff = &gammaAB[0][0]; | |
1699 | + MolDS_mpi::molds_mpi_int num = totalAtomNumber*totalAtomNumber; | |
1700 | + MolDS_mpi::MpiProcess::GetInstance()->Broadcast(buff, num, mpiHeadRank); | |
1690 | 1701 | |
1691 | 1702 | #pragma omp parallel for schedule(auto) |
1692 | 1703 | for(int A=0; A<totalAtomNumber; A++){ |
@@ -1792,12 +1803,9 @@ void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix, | ||
1792 | 1803 | int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); |
1793 | 1804 | int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); |
1794 | 1805 | int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); |
1795 | - int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber); | |
1796 | - mPassingTimes *= CartesianType_end; | |
1797 | 1806 | MolDS_mpi::AsyncCommunicator asyncCommunicator; |
1798 | 1807 | boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>, |
1799 | - &asyncCommunicator, | |
1800 | - mPassingTimes) ); | |
1808 | + &asyncCommunicator) ); | |
1801 | 1809 | |
1802 | 1810 | // This loop (A and mu) is parallelized by MPI |
1803 | 1811 | for(int A=0; A<totalAtomNumber; A++){ |
@@ -1836,43 +1844,32 @@ void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix, | ||
1836 | 1844 | } // end lof if(mpiRank == calcRank) |
1837 | 1845 | |
1838 | 1846 | // set data to gater in mpiHeadRank with asynchronous MPI |
1839 | - int tagX = A* CartesianType_end + XAxis; | |
1840 | - int tagY = A* CartesianType_end + YAxis; | |
1841 | - int tagZ = A* CartesianType_end + ZAxis; | |
1842 | - int source = calcRank; | |
1843 | - int dest = mpiHeadRank; | |
1847 | + int tagX = A* CartesianType_end + XAxis; | |
1848 | + int tagY = A* CartesianType_end + YAxis; | |
1849 | + int tagZ = A* CartesianType_end + ZAxis; | |
1850 | + int source = calcRank; | |
1851 | + int dest = mpiHeadRank; | |
1852 | + double* buffX = &cartesianMatrix[XAxis][firstAOIndexA][0]; | |
1853 | + double* buffY = &cartesianMatrix[YAxis][firstAOIndexA][0]; | |
1854 | + double* buffZ = &cartesianMatrix[ZAxis][firstAOIndexA][0]; | |
1855 | + MolDS_mpi::molds_mpi_int num = numValenceAOsA*totalAONumber; | |
1844 | 1856 | if(mpiRank == mpiHeadRank && mpiRank != calcRank){ |
1845 | - asyncCommunicator.SetRecvedVector(&cartesianMatrix[XAxis][firstAOIndexA][0], | |
1846 | - numValenceAOsA*totalAONumber, | |
1847 | - source, | |
1848 | - tagX); | |
1849 | - asyncCommunicator.SetRecvedVector(&cartesianMatrix[YAxis][firstAOIndexA][0], | |
1850 | - numValenceAOsA*totalAONumber, | |
1851 | - source, | |
1852 | - tagY); | |
1853 | - asyncCommunicator.SetRecvedVector(&cartesianMatrix[ZAxis][firstAOIndexA][0], | |
1854 | - numValenceAOsA*totalAONumber, | |
1855 | - source, | |
1856 | - tagZ); | |
1857 | + asyncCommunicator.SetRecvedMessage(buffX, num, source, tagX); | |
1858 | + asyncCommunicator.SetRecvedMessage(buffY, num, source, tagY); | |
1859 | + asyncCommunicator.SetRecvedMessage(buffZ, num, source, tagZ); | |
1857 | 1860 | } |
1858 | 1861 | if(mpiRank != mpiHeadRank && mpiRank == calcRank){ |
1859 | - asyncCommunicator.SetSentVector(&cartesianMatrix[XAxis][firstAOIndexA][0], | |
1860 | - numValenceAOsA*totalAONumber, | |
1861 | - dest, | |
1862 | - tagX); | |
1863 | - asyncCommunicator.SetSentVector(&cartesianMatrix[YAxis][firstAOIndexA][0], | |
1864 | - numValenceAOsA*totalAONumber, | |
1865 | - dest, | |
1866 | - tagY); | |
1867 | - asyncCommunicator.SetSentVector(&cartesianMatrix[ZAxis][firstAOIndexA][0], | |
1868 | - numValenceAOsA*totalAONumber, | |
1869 | - dest, | |
1870 | - tagZ); | |
1862 | + asyncCommunicator.SetSentMessage(buffX, num, dest, tagX); | |
1863 | + asyncCommunicator.SetSentMessage(buffY, num, dest, tagY); | |
1864 | + asyncCommunicator.SetSentMessage(buffZ, num, dest, tagZ); | |
1871 | 1865 | } |
1872 | 1866 | } // end of loop for int A with MPI |
1873 | 1867 | // Delete the communication thread. |
1868 | + asyncCommunicator.Finalize(); | |
1874 | 1869 | communicationThread.join(); |
1875 | - MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&cartesianMatrix[0][0][0], CartesianType_end*totalAONumber*totalAONumber, mpiHeadRank); | |
1870 | + double* buff = &cartesianMatrix[0][0][0]; | |
1871 | + MolDS_mpi::molds_mpi_int num = CartesianType_end*totalAONumber*totalAONumber; | |
1872 | + MolDS_mpi::MpiProcess::GetInstance()->Broadcast(buff, num, mpiHeadRank); | |
1876 | 1873 | |
1877 | 1874 | /* |
1878 | 1875 | // communication to collect all matrix data on head-rank |
@@ -3908,11 +3905,9 @@ void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{ | ||
3908 | 3905 | int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); |
3909 | 3906 | int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); |
3910 | 3907 | int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); |
3911 | - int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber); | |
3912 | 3908 | MolDS_mpi::AsyncCommunicator asyncCommunicator; |
3913 | 3909 | boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>, |
3914 | - &asyncCommunicator, | |
3915 | - mPassingTimes) ); | |
3910 | + &asyncCommunicator) ); | |
3916 | 3911 | |
3917 | 3912 | MallocerFreer::GetInstance()->Initialize<double>(overlapAOs, |
3918 | 3913 | totalAONumber, |
@@ -3976,24 +3971,23 @@ void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{ | ||
3976 | 3971 | } // end of if(mpiRank == calcRnak) |
3977 | 3972 | |
3978 | 3973 | // set data to gather in mpiHeadRank with asynchronous MPI |
3979 | - int tag = A; | |
3980 | - int source = calcRank; | |
3981 | - int dest = mpiHeadRank; | |
3974 | + int tag = A; | |
3975 | + int source = calcRank; | |
3976 | + int dest = mpiHeadRank; | |
3977 | + double* buff = overlapAOs[firstAOIndexA]; | |
3978 | + MolDS_mpi::molds_mpi_int num = totalAONumber*numValenceAOs; | |
3982 | 3979 | if(mpiRank == mpiHeadRank && mpiRank != calcRank){ |
3983 | - asyncCommunicator.SetRecvedVector(overlapAOs[firstAOIndexA], | |
3984 | - totalAONumber*numValenceAOs, | |
3985 | - source, | |
3986 | - tag); | |
3980 | + asyncCommunicator.SetRecvedMessage(buff, num, source, tag); | |
3987 | 3981 | } |
3988 | 3982 | if(mpiRank != mpiHeadRank && mpiRank == calcRank){ |
3989 | - asyncCommunicator.SetSentVector(overlapAOs[firstAOIndexA], | |
3990 | - totalAONumber*numValenceAOs, | |
3991 | - dest, | |
3992 | - tag); | |
3983 | + asyncCommunicator.SetSentMessage(buff, num, dest, tag); | |
3993 | 3984 | } |
3994 | 3985 | } // end of loop A parallelized with MPI |
3986 | + asyncCommunicator.Finalize(); | |
3995 | 3987 | communicationThread.join(); |
3996 | - MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&overlapAOs[0][0], totalAONumber*totalAONumber, mpiHeadRank); | |
3988 | + double* buff = &overlapAOs[0][0]; | |
3989 | + MolDS_mpi::molds_mpi_int num = totalAONumber*totalAONumber; | |
3990 | + MolDS_mpi::MpiProcess::GetInstance()->Broadcast(buff, num, mpiHeadRank); | |
3997 | 3991 | |
3998 | 3992 | #pragma omp parallel for schedule(auto) |
3999 | 3993 | for(int mu=0; mu<totalAONumber; mu++){ |
@@ -485,6 +485,7 @@ private: | ||
485 | 485 | double*** diisStoredDensityMatrix, |
486 | 486 | double*** diisStoredErrorVect, |
487 | 487 | double** diisErrorProducts, |
488 | + double** tmpDiisErrorProducts, | |
488 | 489 | double* diisErrorCoefficients, |
489 | 490 | double& diisError, |
490 | 491 | bool& hasAppliedDIIS, |
@@ -510,11 +511,13 @@ private: | ||
510 | 511 | double**** diisStoredDensityMatrix, |
511 | 512 | double**** diisStoredErrorVect, |
512 | 513 | double*** diisErrorProducts, |
514 | + double*** tmpDiisErrorProducts, | |
513 | 515 | double** diisErrorCoefficients) const; |
514 | 516 | void MallocSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation, |
515 | 517 | double**** diisStoredDensityMatrix, |
516 | 518 | double**** diisStoredErrorVect, |
517 | 519 | double*** diisErrorProducts, |
520 | + double*** tmpDiisErrorProducts, | |
518 | 521 | double** diisErrorCoefficients); |
519 | 522 | }; |
520 | 523 |
@@ -3443,11 +3443,9 @@ void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore, | ||
3443 | 3443 | int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); |
3444 | 3444 | int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); |
3445 | 3445 | int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); |
3446 | - int mPassingTimes = totalNumberAtoms-1; | |
3447 | 3446 | MolDS_mpi::AsyncCommunicator asyncCommunicator; |
3448 | 3447 | boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>, |
3449 | - &asyncCommunicator, | |
3450 | - mPassingTimes) ); | |
3448 | + &asyncCommunicator) ); | |
3451 | 3449 | #ifdef MOLDS_DBG |
3452 | 3450 | if(twoElecTwoCore == NULL){ |
3453 | 3451 | throw MolDSException(this->errorMessageCalcTwoElecTwoCoreNullMatrix); |
@@ -3515,9 +3513,10 @@ void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore, | ||
3515 | 3513 | OrbitalType twoElecLimit = dxy; |
3516 | 3514 | int numBuff = (twoElecLimit+1)*twoElecLimit/2; |
3517 | 3515 | int num = (totalNumberAtoms-b)*numBuff*numBuff; |
3518 | - asyncCommunicator.SetBroadcastedVector(&this->twoElecTwoCoreMpiBuff[a][b][0][0], num, calcRank); | |
3516 | + asyncCommunicator.SetBroadcastedMessage(&this->twoElecTwoCoreMpiBuff[a][b][0][0], num, calcRank); | |
3519 | 3517 | } |
3520 | 3518 | } // end of loop a parallelized with MPI |
3519 | + asyncCommunicator.Finalize(); | |
3521 | 3520 | communicationThread.join(); |
3522 | 3521 | |
3523 | 3522 | #pragma omp parallel for schedule(auto) |
@@ -35,8 +35,15 @@ | ||
35 | 35 | #include"AsyncCommunicator.h" |
36 | 36 | using namespace std; |
37 | 37 | namespace MolDS_mpi{ |
38 | -AsyncCommunicator::AsyncCommunicator(){} | |
38 | +AsyncCommunicator::AsyncCommunicator(){ | |
39 | + this->hasAllMessagesSet=false; | |
40 | +} | |
39 | 41 | AsyncCommunicator::~AsyncCommunicator(){} |
42 | +void AsyncCommunicator::Finalize(){ | |
43 | + boost::mutex::scoped_lock lk(this->stateGuard); | |
44 | + this->hasAllMessagesSet = true; | |
45 | + this->stateChange.notify_all(); | |
46 | +} | |
40 | 47 | } |
41 | 48 | |
42 | 49 |
@@ -28,29 +28,28 @@ class AsyncCommunicator{ | ||
28 | 28 | public: |
29 | 29 | AsyncCommunicator(); |
30 | 30 | ~AsyncCommunicator(); |
31 | - template<typename T> void Run(int passingTimes){ | |
31 | + template<typename T> void Run(){ | |
32 | 32 | int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); |
33 | - while(0<passingTimes){ | |
34 | - sleep(0.1); | |
33 | + while(true){ | |
35 | 34 | boost::mutex::scoped_lock lk(this->stateGuard); |
36 | 35 | try{ |
37 | - DataInfo dInfo = this->dataQueue.FrontPop(); | |
38 | - if(dInfo.mpiFuncType == MolDS_base::Send){ | |
39 | - MolDS_mpi::MpiProcess::GetInstance()->Send(dInfo.dest, | |
40 | - dInfo.tag, | |
41 | - reinterpret_cast<T*>(dInfo.vectorPtr), | |
42 | - dInfo.num); | |
36 | + MessageInfo mInfo = this->messageQueue.FrontPop(); | |
37 | + if(mInfo.mpiFuncType == MolDS_base::Send){ | |
38 | + MolDS_mpi::MpiProcess::GetInstance()->Send(mInfo.dest, | |
39 | + mInfo.tag, | |
40 | + reinterpret_cast<T*>(mInfo.vectorPtr), | |
41 | + mInfo.num); | |
43 | 42 | } |
44 | - else if(dInfo.mpiFuncType == MolDS_base::Recv){ | |
45 | - MolDS_mpi::MpiProcess::GetInstance()->Recv(dInfo.source, | |
46 | - dInfo.tag, | |
47 | - reinterpret_cast<T*>(dInfo.vectorPtr), | |
48 | - dInfo.num); | |
43 | + else if(mInfo.mpiFuncType == MolDS_base::Recv){ | |
44 | + MolDS_mpi::MpiProcess::GetInstance()->Recv(mInfo.source, | |
45 | + mInfo.tag, | |
46 | + reinterpret_cast<T*>(mInfo.vectorPtr), | |
47 | + mInfo.num); | |
49 | 48 | } |
50 | - else if(dInfo.mpiFuncType == MolDS_base::Broadcast){ | |
51 | - MolDS_mpi::MpiProcess::GetInstance()->Broadcast(reinterpret_cast<T*>(dInfo.vectorPtr), | |
52 | - dInfo.num, | |
53 | - dInfo.source); | |
49 | + else if(mInfo.mpiFuncType == MolDS_base::Broadcast){ | |
50 | + MolDS_mpi::MpiProcess::GetInstance()->Broadcast(reinterpret_cast<T*>(mInfo.vectorPtr), | |
51 | + mInfo.num, | |
52 | + mInfo.source); | |
54 | 53 | } |
55 | 54 | else{ |
56 | 55 | std::stringstream ss; |
@@ -59,10 +58,12 @@ public: | ||
59 | 58 | throw ex; |
60 | 59 | } |
61 | 60 | this->stateChange.notify_all(); |
62 | - passingTimes--; | |
63 | 61 | } |
64 | 62 | catch(MolDS_base::MolDSException ex){ |
65 | - if(ex.HasKey(MolDS_base::EmptyQueue)){ | |
63 | + if(ex.HasKey(MolDS_base::EmptyQueue && this->hasAllMessagesSet)){ | |
64 | + break; | |
65 | + } | |
66 | + else if(ex.HasKey(MolDS_base::EmptyQueue && !this->hasAllMessagesSet)){ | |
66 | 67 | this->stateChange.wait(lk); |
67 | 68 | continue; |
68 | 69 | } |
@@ -73,51 +74,54 @@ public: | ||
73 | 74 | } |
74 | 75 | } |
75 | 76 | |
76 | - template<typename T> void SetSentVector(T* vector, | |
77 | - molds_mpi_int num, | |
78 | - int dest, | |
79 | - int tag){ | |
77 | + template<typename T> void SetSentMessage(T* vector, | |
78 | + molds_mpi_int num, | |
79 | + int dest, | |
80 | + int tag){ | |
80 | 81 | int source = NON_USED; |
81 | 82 | MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Send; |
82 | - this->SetVector(vector, num, source, dest, tag, mpiFuncType); | |
83 | + this->SetMessage(vector, num, source, dest, tag, mpiFuncType); | |
83 | 84 | } |
84 | 85 | |
85 | - template<typename T> void SetRecvedVector(T* vector, | |
86 | - molds_mpi_int num, | |
87 | - int source, | |
88 | - int tag){ | |
86 | + template<typename T> void SetRecvedMessage(T* vector, | |
87 | + molds_mpi_int num, | |
88 | + int source, | |
89 | + int tag){ | |
89 | 90 | int dest = NON_USED; |
90 | 91 | MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Recv; |
91 | - this->SetVector(vector, num, source, dest, tag, mpiFuncType); | |
92 | + this->SetMessage(vector, num, source, dest, tag, mpiFuncType); | |
92 | 93 | } |
93 | 94 | |
94 | - template<typename T> void SetBroadcastedVector(T* vector, molds_mpi_int num, int root){ | |
95 | + template<typename T> void SetBroadcastedMessage(T* vector, molds_mpi_int num, int root){ | |
95 | 96 | int source = root; |
96 | 97 | int dest = NON_USED; |
97 | 98 | int tag = NON_USED; |
98 | 99 | MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Broadcast; |
99 | - this->SetVector(vector, num, source, dest, tag, mpiFuncType); | |
100 | + this->SetMessage(vector, num, source, dest, tag, mpiFuncType); | |
100 | 101 | } |
101 | 102 | |
103 | + void Finalize(); | |
104 | + | |
102 | 105 | private: |
103 | - struct DataInfo{intptr_t vectorPtr; | |
104 | - molds_mpi_int num; | |
105 | - int source; | |
106 | - int dest; | |
107 | - int tag; | |
108 | - MolDS_base::MpiFunctionType mpiFuncType;}; | |
106 | + struct MessageInfo{intptr_t vectorPtr; | |
107 | + molds_mpi_int num; | |
108 | + int source; | |
109 | + int dest; | |
110 | + int tag; | |
111 | + MolDS_base::MpiFunctionType mpiFuncType;}; | |
109 | 112 | boost::mutex stateGuard; |
110 | 113 | boost::condition stateChange; |
111 | - MolDS_base_containers::ThreadSafeQueue<DataInfo> dataQueue; | |
112 | - template<typename T> void SetVector(T* vector, | |
113 | - molds_mpi_int num, | |
114 | - int source, | |
115 | - int dest, | |
116 | - int tag, | |
117 | - MolDS_base::MpiFunctionType mpiFuncType){ | |
114 | + bool hasAllMessagesSet; | |
115 | + MolDS_base_containers::ThreadSafeQueue<MessageInfo> messageQueue; | |
116 | + template<typename T> void SetMessage(T* vector, | |
117 | + molds_mpi_int num, | |
118 | + int source, | |
119 | + int dest, | |
120 | + int tag, | |
121 | + MolDS_base::MpiFunctionType mpiFuncType){ | |
118 | 122 | boost::mutex::scoped_lock lk(this->stateGuard); |
119 | - DataInfo dInfo = {reinterpret_cast<intptr_t>(vector), num, source, dest, tag, mpiFuncType}; | |
120 | - this->dataQueue.Push(dInfo); | |
123 | + MessageInfo mInfo = {reinterpret_cast<intptr_t>(vector), num, source, dest, tag, mpiFuncType}; | |
124 | + this->messageQueue.Push(mInfo); | |
121 | 125 | this->stateChange.notify_all(); |
122 | 126 | } |
123 | 127 | }; |
@@ -97,22 +97,6 @@ MpiProcess* MpiProcess::GetInstance(){ | ||
97 | 97 | |
98 | 98 | void MpiProcess::Barrier(){this->communicator->barrier();} |
99 | 99 | |
100 | -int MpiProcess::GetMessagePassingTimes(molds_mpi_int num)const{ | |
101 | - int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
102 | - int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
103 | - int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); | |
104 | - int calcTimes = num/mpiSize; | |
105 | - if(mpiRank < num%mpiSize){calcTimes+=1;} | |
106 | - int mpiPassingTimes; | |
107 | - if(mpiRank == mpiHeadRank){ | |
108 | - mpiPassingTimes = num - calcTimes; | |
109 | - } | |
110 | - else{ | |
111 | - mpiPassingTimes = calcTimes; | |
112 | - } | |
113 | - return mpiPassingTimes; | |
114 | -} | |
115 | - | |
116 | 100 | void MpiProcess::SetMessages(){ |
117 | 101 | this->errorMessageSplitMessageElemLimNegative |
118 | 102 | = "Error in mpi::MpiProcess::SplitMessage2Chunks: elementsLimit is negative. \nelementsLimit="; |
@@ -114,7 +114,6 @@ public: | ||
114 | 114 | MolDS_base::MallocerFreer::GetInstance()->Free<double>(&tmpValues, num); |
115 | 115 | } |
116 | 116 | void Barrier(); |
117 | - int GetMessagePassingTimes(molds_mpi_int num) const; | |
118 | 117 | private: |
119 | 118 | static MpiProcess* mpiProcess; |
120 | 119 | MpiProcess(); |
@@ -142,25 +141,25 @@ private: | ||
142 | 141 | int tagBase = origianlTag*numChunks; |
143 | 142 | if(elementsLimit < 0){ |
144 | 143 | std::stringstream ss; |
145 | - ss << this->errorMessageSplitMessageElemLimNegative << elementsLimit << endl; | |
144 | + ss << this->errorMessageSplitMessageElemLimNegative << elementsLimit << std::endl; | |
146 | 145 | MolDS_base::MolDSException ex(ss.str()); |
147 | 146 | throw ex; |
148 | 147 | } |
149 | 148 | if(numChunks < 0){ |
150 | 149 | std::stringstream ss; |
151 | - ss << this->errorMessageSplitMessageNumChnkNegative << numChunks << endl; | |
150 | + ss << this->errorMessageSplitMessageNumChnkNegative << numChunks << std::endl; | |
152 | 151 | MolDS_base::MolDSException ex(ss.str()); |
153 | 152 | throw ex; |
154 | 153 | } |
155 | 154 | if(remaining < 0){ |
156 | 155 | std::stringstream ss; |
157 | - ss << this->errorMessageSplitMessageRemainingNegative << remaining << endl; | |
156 | + ss << this->errorMessageSplitMessageRemainingNegative << remaining << std::endl; | |
158 | 157 | MolDS_base::MolDSException ex(ss.str()); |
159 | 158 | throw ex; |
160 | 159 | } |
161 | 160 | if(tagBase < 0){ |
162 | 161 | std::stringstream ss; |
163 | - ss << this->errorMessageSplitMessageTagBaseNegative << tagBase << endl; | |
162 | + ss << this->errorMessageSplitMessageTagBaseNegative << tagBase << std::endl; | |
164 | 163 | MolDS_base::MolDSException ex(ss.str()); |
165 | 164 | throw ex; |
166 | 165 | } |
@@ -36,7 +36,7 @@ | ||
36 | 36 | #ifdef __INTEL_COMPILER |
37 | 37 | #include"mkl.h" |
38 | 38 | #elif defined __FCC_VERSION |
39 | - #include"fj_lapack.h" | |
39 | + #include"lapacke.h" | |
40 | 40 | #else |
41 | 41 | #if ( __WORDSIZE == 32 ) |
42 | 42 | #else |
@@ -100,18 +100,10 @@ void Lapack::DeleteInstance(){ | ||
100 | 100 | * ***/ |
101 | 101 | molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapack_int size, bool calcEigenVectors){ |
102 | 102 | molds_lapack_int info = 0; |
103 | - molds_lapack_int k = 0; | |
104 | - molds_lapack_int lwork; | |
105 | - molds_lapack_int liwork; | |
106 | - char job; | |
107 | 103 | char uplo = 'U'; |
108 | 104 | molds_lapack_int lda = size; |
109 | - double* convertedMatrix; | |
110 | - double* tempEigenValues; | |
111 | - double* work; | |
112 | - molds_lapack_int* iwork; | |
113 | - | |
114 | 105 | // set job type |
106 | + char job; | |
115 | 107 | if(calcEigenVectors){ |
116 | 108 | job = 'V'; |
117 | 109 | } |
@@ -119,88 +111,49 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa | ||
119 | 111 | job = 'N'; |
120 | 112 | } |
121 | 113 | |
122 | - // calc. lwork and liwork | |
123 | - if(size < 1 ){ | |
124 | - stringstream ss; | |
125 | - ss << errorMessageDsyevdSize; | |
126 | - MolDSException ex(ss.str()); | |
127 | - ex.SetKeyValue<int>(LapackInfo, info); | |
128 | - throw ex; | |
129 | - } | |
130 | - else if(size == 1){ | |
131 | - lwork = 1; | |
132 | - liwork = 1; | |
133 | - } | |
134 | - else if(1 < size && job == 'N'){ | |
135 | - lwork = 2*size + 1; | |
136 | - liwork = 2; | |
137 | - } | |
138 | - else{ | |
139 | - // calc. k | |
140 | - double temp = log((double)size)/log(2.0); | |
141 | - if( (double)((molds_lapack_int)temp) < temp ){ | |
142 | - k = (molds_lapack_int)temp + 1; | |
143 | - } | |
144 | - else{ | |
145 | - k = (molds_lapack_int)temp; | |
146 | - } | |
147 | - lwork = 3*size*size + (5+2*k)*size + 1; | |
148 | - liwork = 5*size + 3; | |
149 | - } | |
150 | - | |
151 | - // malloc | |
152 | - work = (double*)MOLDS_LAPACK_malloc( sizeof(double)*lwork, 16 ); | |
153 | - iwork = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*liwork, 16 ); | |
154 | - convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*size, 16 ); | |
155 | - tempEigenValues = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size, 16 ); | |
156 | - | |
157 | - for(molds_lapack_int i = 0; i < size; i++){ | |
158 | - for(molds_lapack_int j = i; j < size; j++){ | |
159 | - convertedMatrix[i+j*size] = matrix[i][j]; | |
160 | - } | |
161 | - } | |
162 | - | |
163 | 114 | // call Lapack |
164 | 115 | #ifdef __INTEL_COMPILER |
165 | - dsyevd(&job, &uplo, &size, convertedMatrix, &lda, tempEigenValues, work, &lwork, iwork, &liwork, &info); | |
116 | + info = LAPACKE_dsyevd(LAPACK_ROW_MAJOR, job, uplo, size, &matrix[0][0], lda, eigenValues); | |
166 | 117 | #elif defined __FCC_VERSION |
167 | - molds_lapack_int jobLen=1; | |
168 | - molds_lapack_int uploLen=1; | |
169 | - dsyevd_(&job, &uplo, &size, convertedMatrix, &lda, tempEigenValues, work, &lwork, iwork, &liwork, &info, jobLen, uploLen); | |
118 | + info = LAPACKE_dsyevd(LAPACK_ROW_MAJOR, job, uplo, size, &matrix[0][0], lda, eigenValues); | |
170 | 119 | #else |
171 | - info = LAPACKE_dsyevd_work(LAPACK_COL_MAJOR, job, uplo, size, convertedMatrix, lda, tempEigenValues, work, lwork, iwork, liwork); | |
120 | + info = LAPACKE_dsyevd(LAPACK_ROW_MAJOR, job, uplo, size, &matrix[0][0], lda, eigenValues); | |
172 | 121 | #endif |
173 | 122 | |
174 | - for(molds_lapack_int i = 0; i < size; i++){ | |
175 | - for(molds_lapack_int j = 0; j < size; j++){ | |
176 | - matrix[i][j] = convertedMatrix[j+i*size]; //i-th row is i-th eigen vector | |
177 | - //matrix[j][i] = convertedMatrix[j+i*size]; //i-th column is i-th eigen vector | |
123 | + // make i-th row i-the eigenvector | |
124 | + double** tmpMatrix=NULL; | |
125 | + try{ | |
126 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrix, size, size); | |
127 | + for(molds_lapack_int i = 0; i < size; i++){ | |
128 | + for(molds_lapack_int j = 0; j < size; j++){ | |
129 | + tmpMatrix[j][i] = matrix[i][j]; | |
130 | + } | |
178 | 131 | } |
132 | + for(molds_lapack_int i = 0; i < size; i++){ | |
133 | + for(molds_lapack_int j = 0; j < size; j++){ | |
134 | + matrix[i][j] = tmpMatrix[i][j]; | |
135 | + } | |
136 | + } | |
137 | + } | |
138 | + catch(MolDSException ex){ | |
139 | + MallocerFreer::GetInstance()->Free<double>(&tmpMatrix, size, size); | |
140 | + throw ex; | |
179 | 141 | } |
142 | + MallocerFreer::GetInstance()->Free<double>(&tmpMatrix, size, size); | |
180 | 143 | |
144 | + // adjust phase of eigenvectors | |
181 | 145 | for(molds_lapack_int i=0;i<size;i++){ |
182 | - double temp = 0.0; | |
146 | + double tmp = 0.0; | |
183 | 147 | for(molds_lapack_int j=0;j<size;j++){ |
184 | - temp += matrix[i][j]; | |
148 | + tmp += matrix[i][j]; | |
185 | 149 | } |
186 | - if(temp<0){ | |
150 | + if(tmp<0){ | |
187 | 151 | for(molds_lapack_int j=0;j<size;j++){ |
188 | 152 | matrix[i][j]*=-1.0; |
189 | 153 | } |
190 | 154 | } |
191 | 155 | } |
192 | 156 | |
193 | - for(molds_lapack_int i = 0; i < size; i++){ | |
194 | - eigenValues[i] = tempEigenValues[i]; | |
195 | - } | |
196 | - //this->OutputLog(boost::format("size=%d lwork=%d liwork=%d k=%d info=%d\n") % size % lwork % liwork % k % info); | |
197 | - | |
198 | - // free | |
199 | - MOLDS_LAPACK_free(work); | |
200 | - MOLDS_LAPACK_free(iwork); | |
201 | - MOLDS_LAPACK_free(convertedMatrix); | |
202 | - MOLDS_LAPACK_free(tempEigenValues); | |
203 | - | |
204 | 157 | if(info != 0){ |
205 | 158 | stringstream ss; |
206 | 159 | ss << errorMessageDsyevdInfo; |
@@ -216,21 +169,15 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa | ||
216 | 169 | * |
217 | 170 | * "matrix*X=b" is solved, then we get X by this method. |
218 | 171 | * The X will be stored in b. |
172 | + * The matrix will be overwriten by this method. | |
219 | 173 | * |
220 | 174 | */ |
221 | -molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lapack_int size){ | |
175 | +molds_lapack_int Lapack::Dsysv(double** matrix, double* b, molds_lapack_int size){ | |
222 | 176 | molds_lapack_int info = 0; |
223 | - molds_lapack_int lwork; | |
224 | - char uplo = 'U'; | |
225 | -#ifdef __FCC_VERSION | |
226 | - molds_lapack_int uploLen=1; | |
227 | -#endif | |
228 | - molds_lapack_int lda = size; | |
229 | - molds_lapack_int ldb = size; | |
177 | + char uplo = 'U'; | |
230 | 178 | molds_lapack_int nrhs = 1; |
231 | - double* convertedMatrix; | |
232 | - double* work; | |
233 | - double* tempB; | |
179 | + molds_lapack_int lda = size; | |
180 | + molds_lapack_int ldb = nrhs; | |
234 | 181 | molds_lapack_int* ipiv; |
235 | 182 | |
236 | 183 | if(size < 1 ){ |
@@ -241,54 +188,17 @@ molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lap | ||
241 | 188 | |
242 | 189 | // malloc |
243 | 190 | ipiv = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*2*size, 16 ); |
244 | - convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*size, 16 ); | |
245 | - tempB = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size, 16 ); | |
246 | - | |
247 | - for(molds_lapack_int i = 0; i < size; i++){ | |
248 | - for(molds_lapack_int j = i; j < size; j++){ | |
249 | - convertedMatrix[i+j*size] = matrix[i][j]; | |
250 | - } | |
251 | - } | |
252 | - for(molds_lapack_int i = 0; i < size; i++){ | |
253 | - tempB[i] = b[i]; | |
254 | - } | |
255 | 191 | |
256 | - // calc. lwork | |
257 | - double blockSize=0.0; | |
258 | -#pragma omp critical | |
259 | - { | |
260 | - lwork = -1; | |
261 | - double tempWork[3]={0.0, 0.0, 0.0}; | |
262 | 192 | #ifdef __INTEL_COMPILER |
263 | - dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, tempWork, &lwork, &info); | |
193 | + info = LAPACKE_dsysv(LAPACK_ROW_MAJOR, uplo, size, nrhs, &matrix[0][0], lda, ipiv, b, ldb); | |
264 | 194 | #elif defined __FCC_VERSION |
265 | - dsysv_(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, tempWork, &lwork, &info, uploLen); | |
195 | + info = LAPACKE_dsysv(LAPACK_ROW_MAJOR, uplo, size, nrhs, &matrix[0][0], lda, ipiv, b, ldb); | |
266 | 196 | #else |
267 | - info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, tempWork, lwork); | |
197 | + info = LAPACKE_dsysv(LAPACK_ROW_MAJOR, uplo, size, nrhs, &matrix[0][0], lda, ipiv, b, ldb); | |
268 | 198 | #endif |
269 | - blockSize = tempWork[0]/size; | |
270 | - } | |
271 | - info = 0; | |
272 | - lwork = blockSize*size; | |
273 | - work = (double*)MOLDS_LAPACK_malloc( sizeof(double)*lwork, 16 ); | |
274 | - | |
275 | - // call Lapack | |
276 | -#ifdef __INTEL_COMPILER | |
277 | - dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, work, &lwork, &info); | |
278 | -#elif defined __FCC_VERSION | |
279 | - dsysv_(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, work, &lwork, &info, uploLen); | |
280 | -#else | |
281 | - info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, work, lwork); | |
282 | -#endif | |
283 | - for(molds_lapack_int i = 0; i < size; i++){ | |
284 | - b[i] = tempB[i]; | |
285 | - } | |
286 | 199 | |
287 | 200 | // free |
288 | - MOLDS_LAPACK_free(convertedMatrix); | |
289 | 201 | MOLDS_LAPACK_free(ipiv); |
290 | - MOLDS_LAPACK_free(work); | |
291 | - MOLDS_LAPACK_free(tempB); | |
292 | 202 | |
293 | 203 | if(info != 0){ |
294 | 204 | stringstream ss; |
@@ -304,17 +214,17 @@ molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lap | ||
304 | 214 | /*** |
305 | 215 | * |
306 | 216 | * "matrix*X[i]=b[i] (i=0, 1, ... , nrhs-1) is solved, then we get X[i] by this method. |
307 | - * The X[i] will be stored in b[i]. | |
308 | - * b[i][j] is j-th element of i-th solution, b[i]. | |
217 | + * The X[i] will be stored in b[i], namely | |
218 | + * the b[i][j] will be j-th element of i-th solution, b[i]. | |
219 | + * Besides, the matrix will be overwriten by this method. | |
309 | 220 | * |
310 | 221 | */ |
311 | -molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const{ | |
222 | +molds_lapack_int Lapack::Dgetrs(double** matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const{ | |
312 | 223 | molds_lapack_int info = 0; |
313 | 224 | char trans = 'N'; |
314 | 225 | molds_lapack_int lda = size; |
315 | - molds_lapack_int ldb = size; | |
316 | - double* convertedMatrix; | |
317 | - double* convertedB; | |
226 | + molds_lapack_int ldb = nrhs; | |
227 | + double* tmpB; | |
318 | 228 | molds_lapack_int* ipiv; |
319 | 229 | |
320 | 230 | if(size < 1 ){ |
@@ -323,48 +233,39 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l | ||
323 | 233 | throw MolDSException(ss.str()); |
324 | 234 | } |
325 | 235 | |
326 | - | |
327 | 236 | try{ |
328 | 237 | // malloc |
329 | 238 | ipiv = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*2*size, 16 ); |
330 | - convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*size, 16 ); | |
331 | - convertedB = (double*)MOLDS_LAPACK_malloc( sizeof(double)*nrhs*size, 16 ); | |
332 | - for(molds_lapack_int i = 0; i < size; i++){ | |
333 | - for(molds_lapack_int j = 0; j < size; j++){ | |
334 | - convertedMatrix[i+j*size] = matrix[i][j]; | |
335 | - } | |
336 | - } | |
239 | + tmpB = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*nrhs, 16 ); | |
240 | + // matrix b should be transposed | |
337 | 241 | for(molds_lapack_int i = 0; i < nrhs; i++){ |
338 | 242 | for(molds_lapack_int j = 0; j < size; j++){ |
339 | - convertedB[j+i*size] = b[i][j]; | |
243 | + tmpB[j*nrhs+i] = b[i][j]; | |
340 | 244 | } |
341 | 245 | } |
342 | - this->Dgetrf(convertedMatrix, ipiv, size, size); | |
246 | + this->Dgetrf(&matrix[0][0], ipiv, size, size); | |
343 | 247 | #ifdef __INTEL_COMPILER |
344 | - dgetrs(&trans, &size, &nrhs, convertedMatrix, &lda, ipiv, convertedB, &ldb, &info); | |
248 | + info = LAPACKE_dgetrs(LAPACK_ROW_MAJOR, trans, size, nrhs, &matrix[0][0], lda, ipiv, tmpB, ldb); | |
345 | 249 | #elif defined __FCC_VERSION |
346 | - molds_lapack_int transLen=1; | |
347 | - dgetrs_(&trans, &size, &nrhs, convertedMatrix, &lda, ipiv, convertedB, &ldb, &info, transLen); | |
250 | + info = LAPACKE_dgetrs(LAPACK_ROW_MAJOR, trans, size, nrhs, &matrix[0][0], lda, ipiv, tmpB, ldb); | |
348 | 251 | #else |
349 | - info = LAPACKE_dgetrs_work(LAPACK_COL_MAJOR, trans, size, nrhs, convertedMatrix, lda, ipiv, convertedB, ldb); | |
252 | + info = LAPACKE_dgetrs(LAPACK_ROW_MAJOR, trans, size, nrhs, &matrix[0][0], lda, ipiv, tmpB, ldb); | |
350 | 253 | #endif |
351 | 254 | for(molds_lapack_int i = 0; i < nrhs; i++){ |
352 | 255 | for(molds_lapack_int j = 0; j < size; j++){ |
353 | - b[i][j] = convertedB[j+i*size]; | |
256 | + b[i][j] = tmpB[j*nrhs+i]; | |
354 | 257 | } |
355 | 258 | } |
356 | 259 | } |
357 | 260 | catch(MolDSException ex){ |
358 | 261 | // free |
359 | - MOLDS_LAPACK_free(convertedMatrix); | |
360 | - MOLDS_LAPACK_free(convertedB); | |
262 | + MOLDS_LAPACK_free(tmpB); | |
361 | 263 | MOLDS_LAPACK_free(ipiv); |
362 | 264 | throw ex; |
363 | 265 | } |
364 | 266 | // free |
365 | - MOLDS_LAPACK_free(convertedMatrix); | |
366 | - MOLDS_LAPACK_free(convertedB); | |
367 | 267 | MOLDS_LAPACK_free(ipiv); |
268 | + MOLDS_LAPACK_free(tmpB); | |
368 | 269 | |
369 | 270 | if(info != 0){ |
370 | 271 | stringstream ss; |
@@ -379,43 +280,31 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l | ||
379 | 280 | // Argument "matrix" will be LU-decomposed. |
380 | 281 | molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int sizeM, molds_lapack_int sizeN) const{ |
381 | 282 | molds_lapack_int* ipiv = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*2*sizeM, 16 ); |
382 | - this->Dgetrf(matrix, ipiv, sizeM, sizeN); | |
283 | + this->Dgetrf(&matrix[0][0], ipiv, sizeM, sizeN); | |
383 | 284 | MOLDS_LAPACK_free(ipiv); |
384 | 285 | molds_lapack_int info = 0; |
385 | 286 | return info; |
386 | 287 | } |
387 | 288 | |
388 | -// Argument "matrix" is sizeM*sizeN matrix. | |
289 | +// Argument "matrix" is sizeM*sizeN matrix in Row-major (C/C++ style) | |
389 | 290 | // Argument "matrix" will be LU-decomposed. |
390 | 291 | molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{ |
391 | - double* convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*sizeM*sizeN, 16 ); | |
392 | - for(molds_lapack_int i=0; i<sizeM; i++){ | |
393 | - for(molds_lapack_int j=0; j<sizeN; j++){ | |
394 | - convertedMatrix[i+j*sizeM] = matrix[i][j]; | |
395 | - } | |
396 | - } | |
397 | - this->Dgetrf(convertedMatrix, ipiv, sizeM, sizeN); | |
398 | - for(molds_lapack_int i=0; i<sizeM; i++){ | |
399 | - for(molds_lapack_int j=0; j<sizeN; j++){ | |
400 | - matrix[i][j] = convertedMatrix[i+j*sizeM]; | |
401 | - } | |
402 | - } | |
403 | - MOLDS_LAPACK_free(convertedMatrix); | |
292 | + this->Dgetrf(&matrix[0][0], ipiv, sizeM, sizeN); | |
404 | 293 | molds_lapack_int info = 0; |
405 | 294 | return info; |
406 | 295 | } |
407 | 296 | |
408 | 297 | // Argument "matrix" is sizeM*sizeN matrix. |
409 | -// The each element of "matrix" should be stored in 1-dimensional vecotre with column major (Fortran type). | |
298 | +// The each element of "matrix" should be stored in 1-dimensional vecotre with Row major (C/C++ style). | |
410 | 299 | molds_lapack_int Lapack::Dgetrf(double* matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{ |
411 | 300 | molds_lapack_int info = 0; |
412 | 301 | molds_lapack_int lda = sizeM; |
413 | 302 | #ifdef __INTEL_COMPILER |
414 | - dgetrf(&sizeM, &sizeN, matrix, &lda, ipiv, &info); | |
303 | + info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, sizeM, sizeN, matrix, lda, ipiv); | |
415 | 304 | #elif defined __FCC_VERSION |
416 | - dgetrf_(&sizeM, &sizeN, matrix, &lda, ipiv, &info); | |
305 | + info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, sizeM, sizeN, matrix, lda, ipiv); | |
417 | 306 | #else |
418 | - info = LAPACKE_dgetrf_work(LAPACK_COL_MAJOR, sizeM, sizeN, matrix, lda, ipiv); | |
307 | + info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, sizeM, sizeN, matrix, lda, ipiv); | |
419 | 308 | #endif |
420 | 309 | if(info != 0){ |
421 | 310 | stringstream ss; |
@@ -31,8 +31,8 @@ public: | ||
31 | 31 | static Lapack* GetInstance(); |
32 | 32 | static void DeleteInstance(); |
33 | 33 | molds_lapack_int Dsyevd(double** matrix, double* eigenValues, molds_lapack_int size, bool calcEigenVectors); |
34 | - molds_lapack_int Dsysv(double const* const* matrix, double* b, molds_lapack_int size); | |
35 | - molds_lapack_int Dgetrs(double const* const* matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const; | |
34 | + molds_lapack_int Dsysv(double** matrix, double* b, molds_lapack_int size); | |
35 | + molds_lapack_int Dgetrs(double** matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const; | |
36 | 36 | molds_lapack_int Dgetrf(double** matrix, molds_lapack_int sizeM, molds_lapack_int sizeN) const; |
37 | 37 | molds_lapack_int Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const; |
38 | 38 | private: |
@@ -2353,11 +2353,9 @@ void ZindoS::CalcCISMatrix(double** matrixCIS) const{ | ||
2353 | 2353 | int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); |
2354 | 2354 | int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); |
2355 | 2355 | int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); |
2356 | - int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(this->matrixCISdimension); | |
2357 | 2356 | MolDS_mpi::AsyncCommunicator asyncCommunicator; |
2358 | 2357 | boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>, |
2359 | - &asyncCommunicator, | |
2360 | - mPassingTimes) ); | |
2358 | + &asyncCommunicator) ); | |
2361 | 2359 | |
2362 | 2360 | // this loop-a is MPI-parallelized |
2363 | 2361 | for(int k=this->matrixCISdimension-1; 0<=k; k--){ |
@@ -2418,12 +2416,13 @@ void ZindoS::CalcCISMatrix(double** matrixCIS) const{ | ||
2418 | 2416 | int num = this->matrixCISdimension - k; |
2419 | 2417 | double* buff = &this->matrixCIS[k][k]; |
2420 | 2418 | if(mpiRank == mpiHeadRank && mpiRank != calcRank){ |
2421 | - asyncCommunicator.SetRecvedVector(buff, num, source, tag); | |
2419 | + asyncCommunicator.SetRecvedMessage(buff, num, source, tag); | |
2422 | 2420 | } |
2423 | 2421 | if(mpiRank != mpiHeadRank && mpiRank == calcRank){ |
2424 | - asyncCommunicator.SetSentVector(buff, num, dest, tag); | |
2422 | + asyncCommunicator.SetSentMessage(buff, num, dest, tag); | |
2425 | 2423 | } |
2426 | 2424 | } // end of k-loop which is MPI-parallelized |
2425 | + asyncCommunicator.Finalize(); | |
2427 | 2426 | communicationThread.join(); |
2428 | 2427 | // Broadcast data to all rank |
2429 | 2428 | for(int k=0; k<this->matrixCISdimension; k++){ |
@@ -3339,11 +3338,9 @@ void ZindoS::CalcGammaNRMinusKNRMatrix(double** gammaNRMinusKNR, const vector<Mo | ||
3339 | 3338 | int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); |
3340 | 3339 | int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); |
3341 | 3340 | int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); |
3342 | - int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(nonRedundantQIndecesSize); | |
3343 | 3341 | MolDS_mpi::AsyncCommunicator asyncCommunicator; |
3344 | 3342 | boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>, |
3345 | - &asyncCommunicator, | |
3346 | - mPassingTimes) ); | |
3343 | + &asyncCommunicator) ); | |
3347 | 3344 | // this loop-i is MPI-parallelized |
3348 | 3345 | for(int i=nonRedundantQIndecesSize-1; 0<=i; i--){ |
3349 | 3346 | int calcRank = i%mpiSize; |
@@ -3376,12 +3373,13 @@ void ZindoS::CalcGammaNRMinusKNRMatrix(double** gammaNRMinusKNR, const vector<Mo | ||
3376 | 3373 | int num = nonRedundantQIndecesSize - i; |
3377 | 3374 | double* buff = &gammaNRMinusKNR[i][i]; |
3378 | 3375 | if(mpiRank == mpiHeadRank && mpiRank != calcRank){ |
3379 | - asyncCommunicator.SetRecvedVector(buff, num, source, tag); | |
3376 | + asyncCommunicator.SetRecvedMessage(buff, num, source, tag); | |
3380 | 3377 | } |
3381 | 3378 | if(mpiRank != mpiHeadRank && mpiRank == calcRank){ |
3382 | - asyncCommunicator.SetSentVector(buff, num, dest, tag); | |
3379 | + asyncCommunicator.SetSentMessage(buff, num, dest, tag); | |
3383 | 3380 | } |
3384 | 3381 | } // end of loop-i parallelized with MPI |
3382 | + asyncCommunicator.Finalize(); | |
3385 | 3383 | communicationThread.join(); |
3386 | 3384 | // broadcast data to all rank |
3387 | 3385 | for(int i=0; i<nonRedundantQIndecesSize; i++){ |
@@ -3403,11 +3401,9 @@ void ZindoS::CalcKRDagerGammaRInvMatrix(double** kRDagerGammaRInv, | ||
3403 | 3401 | int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); |
3404 | 3402 | int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); |
3405 | 3403 | int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); |
3406 | - int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(nonRedundantQIndecesSize); | |
3407 | 3404 | MolDS_mpi::AsyncCommunicator asyncCommunicator; |
3408 | 3405 | boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>, |
3409 | - &asyncCommunicator, | |
3410 | - mPassingTimes) ); | |
3406 | + &asyncCommunicator) ); | |
3411 | 3407 | // this loop-i is MPI-parallelized |
3412 | 3408 | for(int i=0; i<nonRedundantQIndecesSize; i++){ |
3413 | 3409 | int calcRank = i%mpiSize; |
@@ -3440,12 +3436,13 @@ void ZindoS::CalcKRDagerGammaRInvMatrix(double** kRDagerGammaRInv, | ||
3440 | 3436 | int num = redundantQIndecesSize; |
3441 | 3437 | double* buff = &kRDagerGammaRInv[i][0]; |
3442 | 3438 | if(mpiRank == mpiHeadRank && mpiRank != calcRank){ |
3443 | - asyncCommunicator.SetRecvedVector(buff, num, source, tag); | |
3439 | + asyncCommunicator.SetRecvedMessage(buff, num, source, tag); | |
3444 | 3440 | } |
3445 | 3441 | if(mpiRank != mpiHeadRank && mpiRank == calcRank){ |
3446 | - asyncCommunicator.SetSentVector(buff, num, dest, tag); | |
3442 | + asyncCommunicator.SetSentMessage(buff, num, dest, tag); | |
3447 | 3443 | } |
3448 | 3444 | } // end of loop-i parallelized with MPI |
3445 | + asyncCommunicator.Finalize(); | |
3449 | 3446 | communicationThread.join(); |
3450 | 3447 | // broadcast data to all rank |
3451 | 3448 | for(int i=0; i<nonRedundantQIndecesSize; i++){ |