• R/O
  • HTTP
  • SSH
  • HTTPS

sctk: Commit

Superconducting Toolkit用メインリポジトリ


Commit MetaInfo

Revisiónb5e39c53aea532682c7f6d2b467c1892d4b12cfc (tree)
Tiempo2022-04-21 10:45:49
AutorMitsuaki Kawamura <kawamitsuaki@gmai...>
CommiterMitsuaki Kawamura

Log Message

Parallelism change

Cambiar Resumen

Diferencia incremental

--- a/SCTK/src/nesting_proj.f90
+++ b/SCTK/src/nesting_proj.f90
@@ -1110,6 +1110,8 @@ PROGRAM nesting_proj
11101110 ALLOCATE(equiv(nk1, nk2, nk3))
11111111 CALL rotate_k_fs(equiv)
11121112 !
1113+ ef1 = 0.0_dp
1114+ ef2 = 0.0_dp
11131115 IF (nspin == 2) THEN
11141116 ns = 2
11151117 IF(two_fermi_energies) THEN
--- a/SCTK/src/sctk_cnt_dsp.f90
+++ b/SCTK/src/sctk_cnt_dsp.f90
@@ -58,3 +58,82 @@ SUBROUTINE cnt_and_dsp_full(n,cnt,dsp)
5858 END SUBROUTINE cnt_and_dsp_full
5959 !
6060 END MODULE sctk_cnt_dsp
61+
62+MODULE mp_kel
63+ !----------------------------------------------------------------------------
64+ !! Pool groups (processors within a pool of k-points).
65+ !! Subdivision of image group, used for k-point parallelization.
66+ !
67+ USE mp, ONLY : mp_barrier, mp_size, mp_rank, mp_comm_split
68+ USE parallel_include
69+ !
70+ IMPLICIT NONE
71+ SAVE
72+ !
73+ INTEGER :: nproc_band1 = 1
74+ INTEGER :: nproc_band2 = 1
75+ INTEGER :: me_band2 = 0
76+ INTEGER :: me_band1 = 0
77+ INTEGER :: band1_comm = 0
78+ INTEGER :: band2_comm = 0
79+ !
80+CONTAINS
81+ !
82+ !----------------------------------------------------------------------------
83+ SUBROUTINE mp_start_kel()
84+ !---------------------------------------------------------------------------
85+ !! Divide processors (of the "world_comm" group) into "pools".
86+ !! Requires: \(\text{nproc_band1}\) read from command line,
87+ !! \(\text{world_comm}\), typically \(\text{world_comm} =
88+ !! \text{group}\) of all processors.
89+ !
90+ USE mp_world, ONLY : nproc, mpime, world_comm
91+ IMPLICIT NONE
92+ !
93+#if defined (__MPI)
94+ !
95+ INTEGER :: nproc_band0, iproc
96+ !
97+ nproc_band0 = CEILING(SQRT(DBLE(nproc)))
98+ !
99+ DO iproc = nproc_band0, 1, -1
100+ IF(MOD(nproc, iproc) == 0) THEN
101+ nproc_band1 = iproc
102+ nproc_band2 = nproc / iproc
103+ EXIT
104+ END IF
105+ END DO
106+ !
107+ ! ... nproc_band1 must have been previously read from command line argument
108+ ! ... by a call to routine get_command_line
109+ !
110+ IF ( nproc_band1 < 1 .OR. nproc_band1 > nproc ) CALL errore( 'mp_start_kel',&
111+ 'invalid number of pools, out of range', 1 )
112+
113+ IF ( MOD( nproc, nproc_band1 ) /= 0 ) CALL errore( 'mp_start_kel', &
114+ 'invalid number of pools, nproc /= nproc_band2 * nproc_band1', 1 )
115+ !
116+ ! ... me_band1 = pool index for this processor ( 0 : nproc_band1 - 1 )
117+ ! ... me_band2 = processor index within the pool ( 0 : nproc_band2 - 1 )
118+ !
119+ me_band1 = mpime / nproc_band2
120+ me_band2 = MOD( mpime, nproc_band2 )
121+ !
122+ CALL mp_barrier( world_comm )
123+ !
124+ ! ... the band2_comm communicator is created
125+ !
126+ CALL mp_comm_split ( world_comm, me_band1, mpime, band2_comm )
127+ !
128+ CALL mp_barrier( world_comm )
129+ !
130+ ! ... the band1_comm communicator is created
131+ !
132+ CALL mp_comm_split ( world_comm, me_band2, mpime, band1_comm )
133+ !
134+#endif
135+ !
136+ RETURN
137+ END SUBROUTINE mp_start_kel
138+ !
139+END MODULE mp_kel
--- a/SCTK/src/sctk_coulomb.f90
+++ b/SCTK/src/sctk_coulomb.f90
@@ -81,7 +81,6 @@ SUBROUTINE prepare_q()
8181 !
8282 USE kinds, ONLY : DP
8383 USE cell_base, ONLY : bg, tpiba
84- USE mp_pools, ONLY : npool, inter_pool_comm
8584 USE mp_world, ONLY : world_comm, nproc
8685 USE constants, ONLY : pi
8786 USE io_global, ONLY : stdout
@@ -94,19 +93,15 @@ SUBROUTINE prepare_q()
9493 USE exx, ONLY : dfftt
9594 USE mp, ONLY : mp_circular_shift_left, mp_barrier
9695 !
97- USE sctk_val, ONLY : gindx, gq2, ngv, nqbz, nk_p, k0_p, nk_p_max, &
98- & wfc, wfcq, becwfc, becwfcq, wscr, &
99- & ngv0, ngv1, lsf, nmf, nbnd_p, nbnd_p_max
96+ USE sctk_val, ONLY : gindx, gq2, ngv, nqbz, wfc, wfcq, becwfc, becwfcq, wscr, &
97+ & ngv0, ngv1, lsf, nmf, nb, nb_max
10098 !
10199 IMPLICIT NONE
102100 !
103- INTEGER :: ik_p, ik_g, jk_p, jk_g, ib, i1, i2, i3, ikv(3), jkv(3), g0(3), ir(3), ifft, ig, igv(3), &
104- & my_k0_p, my_nk_p, jkindx(nqbz), ipe, iqv(3), ipol
101+ INTEGER :: ik, jk, ib, i1, i2, i3, ikv(3), jkv(3), g0(3), ir(3), ifft, ig, igv(3), &
102+ & iqv(3), ipol
105103 REAL(dp) :: gv(3), theta, gq20, RAM_V, RAM_rho
106104 COMPLEX(dp) :: phase(dfftt%nnr)
107- COMPLEX(DP),ALLOCATABLE :: wfctmp(:,:,:,:), becwfctmp(:,:,:,:)
108- !
109- ALLOCATE(wfctmp(dfftt%nnr,nbnd_p_max,npol,nk_p_max), becwfctmp(nkb,nbnd_p_max,npol,nk_p_max))
110105 !
111106 ! |G+q|^2
112107 !
@@ -145,7 +140,7 @@ SUBROUTINE prepare_q()
145140 ! RAM size estimation
146141 !
147142 RAM_V = REAL(ngv,dp)**2*REAL(nmf+1)
148- RAM_rho = REAL(ngv + (ngv1-ngv0),dp)*REAL(nbnd_p_max,dp)*REAL(nk_p_max,dp)*REAL(nproc,dp)
143+ RAM_rho = REAL(ngv + (ngv1-ngv0),dp)*REAL(nb_max,dp)*REAL(nqbz,dp)*REAL(nproc,dp)
149144 !
150145 IF(lsf>0) THEN
151146 IF(npol == 2) THEN
@@ -169,11 +164,11 @@ SUBROUTINE prepare_q()
169164 !
170165 iqv(1:3) = NINT(MATMUL(x_q(1:3,current_iq), at(1:3,1:3)) * REAL((/nq1, nq2, nq3/), dp) - 0.5_dp)
171166 !
172- DO ik_g = 1, nqbz
167+ DO ik = 1, nqbz
173168 !
174- ikv(1) = (ik_g - 1) / (nq3*nq2)
175- ikv(2) = (ik_g - 1 - ikv(1)*nq2*nq3) / nq3
176- ikv(3) = ik_g - 1 - ikv(1)*nq2*nq3 - ikv(2)*nq3
169+ ikv(1) = (ik - 1) / (nq3*nq2)
170+ ikv(2) = (ik - 1 - ikv(1)*nq2*nq3) / nq3
171+ ikv(3) = ik - 1 - ikv(1)*nq2*nq3 - ikv(2)*nq3
177172 !
178173 WHERE(ikv(1:3)*2 >= (/nq1,nq2,nq3/)) ikv(1:3) = ikv(1:3) - (/nq1,nq2,nq3/)
179174 !
@@ -186,12 +181,7 @@ SUBROUTINE prepare_q()
186181 g0(1:3) = (jkv(1:3) - ikv(1:3) - iqv(1:3)) / (/nq1,nq2,nq3/)
187182 !
188183 jkv(1:3) = MODULO(jkv(1:3), (/nq1,nq2,nq3/))
189- jk_g = 1 + jkv(3) + jkv(2)*nq3 + jkv(1)*nq2*nq3
190- jkindx(ik_g) = jk_g
191- !
192- IF(ik_g <= k0_p .OR. k0_p+nk_p < ik_g) CYCLE
193- !
194- ik_p = ik_g - k0_p
184+ jk = 1 + jkv(3) + jkv(2)*nq3 + jkv(1)*nq2*nq3
195185 !
196186 ! phi_{k}(r) = exp(i k r) u_k(r)
197187 ! phi_{k+G0}(r) = exp(i (k+G0) r) u_{k+G0}(r) = phi_{k}(r)
@@ -219,63 +209,32 @@ SUBROUTINE prepare_q()
219209 END DO ! i3
220210 !
221211 DO ipol = 1, npol
222- DO ib = 1, nbnd_p
223- !
224- ! This phase should be applied as phi(2) * phase.
225- ! But phi(2) will be rearranged below.
226- ! Therefore, it is applied as
227- ! phi(1)^* phi(2) phase = (phi(1) phase^*)^* phi(2)
212+ !
213+ ! This phase should be applied as phi(2) * phase.
214+ ! But phi(2) will be rearranged below.
215+ ! Therefore, it is applied as
216+ ! phi(1)^* phi(2) phase = (phi(1) phase^*)^* phi(2)
217+ !
218+ DO ib = 1, nb(1)
228219 !
229- wfcq( 1:dfftt%nnr,ib,ipol,ik_p, 1) = &
230- & wfc(1:dfftt%nnr,ib,ipol,ik_p, 1) * CONJG(phase(1:dfftt%nnr))
231- wfcq( 1:dfftt%nnr,ib,ipol,ik_p, 2) = &
232- & wfc(1:dfftt%nnr,ib,ipol,ik_p, 2)
220+ wfcq( 1:dfftt%nnr, ib, ipol, ik, 1) = &
221+ & wfc(1:dfftt%nnr, ib, ipol, ik, 1) * CONJG(phase(1:dfftt%nnr))
233222 !
234- END DO ! ipol
235- END DO ! ib
236- !
237- END DO ! ik
238- !
239- IF(okvan) becwfcq(1:nkb,1:nbnd_p,1:npol,1:nk_p, 1:2) &
240- & = becwfc(1:nkb,1:nbnd_p,1:npol,1:nk_p, 1:2)
241- !
242- wfctmp(1:dfftt%nnr,1:nbnd_p,1:npol,1:nk_p) = wfcq(1:dfftt%nnr,1:nbnd_p,1:npol,1:nk_p,2)
243- wfcq( 1:dfftt%nnr,1:nbnd_p,1:npol,1:nk_p,2) = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
244- IF (okvan) THEN
245- becwfctmp(1:nkb,1:nbnd_p,1:npol,1:nk_p) = becwfcq(1:nkb, 1:nbnd_p,1:npol,1:nk_p,2)
246- becwfcq( 1:nkb,1:nbnd_p,1:npol,1:nk_p,2) = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
247- END IF
248- !
249- my_k0_p = k0_p
250- my_nk_p = nk_p
251- !
252- DO ipe = 1, npool
223+ END DO ! ib
224+ !
225+ END DO ! ipol
253226 !
254- CALL mp_circular_shift_left(nk_p, 1, inter_pool_comm )
255- CALL mp_circular_shift_left(k0_p, 1, inter_pool_comm )
256- CALL circular_shift_wrapper_c( dfftt%nnr*npol*nbnd_p_max,nk_p_max,inter_pool_comm,wfctmp)
257- IF(okvan) CALL circular_shift_wrapper_c(nkb*npol*nbnd_p_max,nk_p_max,inter_pool_comm,becwfctmp)
227+ wfcq( 1:dfftt%nnr, 1:nb(2), 1:npol, ik, 2) = &
228+ & wfc(1:dfftt%nnr, 1:nb(2), 1:npol, jk, 2)
258229 !
259- DO ik_p = 1, my_nk_p
260- !
261- ik_g = ik_p + my_k0_p
262- jk_g = jkindx(ik_g)
263- IF(jk_g <= k0_p .OR. k0_p+nk_p < jk_g) CYCLE
264- !
265- jk_p = jk_g - k0_p
266- !
267- wfcq( 1:dfftt%nnr,1:nbnd_p,1:npol,ik_p,2) = &
268- & wfctmp(1:dfftt%nnr,1:nbnd_p,1:npol,jk_p)
269- IF (okvan) THEN
270- becwfcq( 1:nkb,1:nbnd_p,1:npol,ik_p,2) = &
271- & becwfctmp(1:nkb,1:nbnd_p,1:npol,jk_p)
272- END IF
273- !
274- END DO
230+ IF(okvan) THEN
231+ becwfcq( 1:nkb, 1:nb(1), 1:npol, ik, 1) &
232+ & = becwfc(1:nkb, 1:nb(1), 1:npol, ik, 1)
233+ becwfcq( 1:nkb, 1:nb(2), 1:npol, ik, 2) &
234+ & = becwfc(1:nkb, 1:nb(2), 1:npol, jk, 2)
235+ END IF
275236 !
276- END DO
277- !
278- DEALLOCATE(wfctmp, becwfctmp)
237+ END DO ! ik
279238 !
280239 END SUBROUTINE prepare_q
281240 !>
@@ -284,31 +243,27 @@ END SUBROUTINE prepare_q
284243 !>
285244 SUBROUTINE fermi_factor(wght)
286245 !
287- USE wvfct, ONLY : nbnd
288246 USE kinds, ONLY : DP
289- USE ktetra, ONLY : ntetra, tetra
290247 USE cell_base, ONLY : at
291248 USE disp, ONLY : nq1, nq2, nq3, x_q
292249 USE start_k, ONLY : nk1, nk2, nk3
293250 USE mp_world, ONLY : world_comm
294- USE mp_pools, ONLY : inter_pool_comm, npool
295- USE mp, ONLY : mp_sum, mp_barrier, mp_circular_shift_left
251+ USE mp, ONLY : mp_sum, mp_barrier
296252 USE wvfct, ONLY : et, nbnd
297253 USE klist, ONLY : nks
298254 USE control_ph, ONLY : current_iq
299255 USE io_global, ONLY : stdout
300256 !
301257 USE sctk_tetra, ONLY : tetraweight, interpol_indx
302- USE sctk_val, ONLY : nbnd_p, nbnd_p_max, nk_p, nk_p_max, k0_p, nmf
258+ USE sctk_val, ONLY : nb, nb_max, nqbz, nmf
303259 !
304260 IMPLICIT NONE
305261 !
306- COMPLEX(dp),INTENT(OUT) :: wght((nmf+1)*nbnd*nbnd_p_max,nk_p_max) !< integration weight
262+ COMPLEX(dp),INTENT(OUT) :: wght(nmf+1, nqbz, nb_max, nb_max) !< integration weight
307263 !
308- INTEGER :: ntetra0, ntetra1, nks0, it, ik, ii, dik(3), ikv(3), ik_p, &
309- & indx2(20, ntetra), indx3(20 * ntetra), kintp(8), ikq, iproc
264+ INTEGER :: ik, ii, dik(3), ikv(3), kintp(8), ikq
310265 REAL(dp) :: kv(3), wintp(8), RAM_wghtd
311- COMPLEX(dp),ALLOCATABLE :: wghtd(:,:)
266+ COMPLEX(dp),ALLOCATABLE :: wghtd(:,:,:,:)
312267 !
313268 CALL start_clock("fermi_factor")
314269 !
@@ -328,78 +283,36 @@ SUBROUTINE fermi_factor(wght)
328283 !
329284 END DO
330285 !
331- indx2(1:20,1:ntetra) = 0
332- indx3(1:20 * ntetra) = 0
333- !
334- CALL divide(inter_pool_comm, ntetra,ntetra0,ntetra1)
335- !
336- nks0 = 0
337- DO it = ntetra0, ntetra1
338- !
339- DO ii = 1, 20
340- !
341- DO ik = 1, nks0
342- !
343- IF(tetra(ii,it) == indx3(ik)) THEN
344- !
345- indx2(ii,it) = ik
346- GOTO 10
347- !
348- END IF
349- !
350- END DO
351- !
352- nks0 = nks0 + 1
353- indx2(ii,it) = nks0
354- indx3(nks0) = tetra(ii,it)
355- !
356-10 CONTINUE
357- !
358- END DO
359- !
360- END DO
361- !
362- RAM_wghtd = REAL(nmf+1, dp)*REAL(nbnd, dp)*REAL(nbnd_p, dp)*REAL(nks0, dp)
286+ RAM_wghtd = REAL(nmf+1, dp)*REAL(nb(1)*nb(2), dp)*REAL(nks, dp)
363287 CALL mp_sum(RAM_wghtd, world_comm)
364288 WRITE(stdout,'(9x,"Total RAM for Weight on dense grid : ",e10.2," GB")') RAM_wghtd*16.0e-9_dp
365289 CALL mp_barrier(world_comm)
366- ALLOCATE(wghtd((nmf+1)*nbnd*nbnd_p,nks0))
290+ ALLOCATE(wghtd(nmf+1, nb(1), nb(2), nks))
367291 !
368- CALL tetraweight(nks0,indx2,wghtd)
292+ CALL tetraweight(wghtd)
369293 !
370294 ! Interpolation of weight
371295 !
372- wght(1:(nmf+1)*nbnd*nbnd_p_max,1:nk_p_max) = 0.0_dp
296+ wght(1:nmf+1, 1:nqbz, nb_max, nb_max) = 0.0_dp
373297 !
374- DO iproc = 1, npool
298+ DO ik = 1, nks
299+ !
300+ ikv(1) = (ik - 1) / (nk3*nk2)
301+ ikv(2) = (ik - 1 - ikv(1)*nk2*nk3) / nk3
302+ ikv(3) = ik - 1 - ikv(1)*nk2*nk3 - ikv(2)*nk3
375303 !
376- CALL mp_circular_shift_left(wght,1,inter_pool_comm)
377- CALL mp_circular_shift_left(nk_p,1,inter_pool_comm)
378- CALL mp_circular_shift_left(k0_p,1,inter_pool_comm)
304+ kv(1:3) = REAL(ikv(1:3), dp) / REAL((/nk1,nk2,nk3/), dp)
305+ CALL interpol_indx((/nq1,nq2,nq3/), kv, kintp, wintp)
379306 !
380- DO ik = 1, nks0
307+ DO ii = 1, 8
381308 !
382- ikv(1) = (indx3(ik) - 1) / (nk3*nk2)
383- ikv(2) = (indx3(ik) - 1 - ikv(1)*nk2*nk3) / nk3
384- ikv(3) = indx3(ik) - 1 - ikv(1)*nk2*nk3 - ikv(2)*nk3
309+ wght( 1:nmf+1, kintp(ii), 1:nb(2), 1:nb(1)) &
310+ & = wght( 1:nmf+1, kintp(ii), 1:nb(2), 1:nb(1)) &
311+ & + wghtd(1:nmf+1, 1:nb(2), 1:nb(1), ik) * wintp(ii)
385312 !
386- kv(1:3) = REAL(ikv(1:3), dp) / REAL((/nk1,nk2,nk3/), dp)
387- CALL interpol_indx((/nq1,nq2,nq3/),kv,kintp,wintp)
388- !
389- DO ii = 1, 8
390- IF(k0_p < kintp(ii) .AND. kintp(ii) <= k0_p + nk_p) THEN
391- !
392- ik_p = kintp(ii) - k0_p
393- !
394- wght( 1:(nmf+1)*nbnd*nbnd_p, ik_p) &
395- & = wght( 1:(nmf+1)*nbnd*nbnd_p, ik_p) &
396- & + wghtd(1:(nmf+1)*nbnd*nbnd_p, ik) * wintp(ii)
397- !
398- END IF
399- END DO ! ii
400- !
401- END DO ! ik
402- END DO ! iproc
313+ END DO ! ii
314+ !
315+ END DO ! ik
403316 !
404317 DEALLOCATE(wghtd)
405318 !
@@ -411,25 +324,25 @@ END SUBROUTINE fermi_factor
411324 !>
412325 SUBROUTINE make_scrn()
413326 !
414- USE wvfct, ONLY : nbnd
415327 USE kinds, ONLY : DP
416- USE mp_world, ONLY : nproc
417328 USE fft_scalar, ONLY : cfft3d
418329 USE noncollin_module, ONLY : npol
330+ USE exx, ONLY : dfftt
419331 USE us_exx, ONLY : addusxx_r
420332 USE mp, ONLY : mp_circular_shift_left, mp_sum, mp_barrier
421333 USE mp_world, ONLY : world_comm
422334 USE io_global, ONLY : stdout
423335 !
424- USE sctk_val, ONLY : ngv, nmf, nbnd_p_max, nbnd_p, nk_p, &
425- & wscr, lsf, ngv0, ngv1, nk_p_max
336+ USE sctk_val, ONLY : ngv, nmf, nqbz, nb_max, wfcq, &
337+ & wscr, lsf, ngv0, ngv1, nb
338+ USE mp_kel, ONLY : nproc_band1, nproc_band2, band1_comm, band2_comm
426339 !
427340 IMPLICIT NONE
428341 !
429- INTEGER :: ik_p, ibnd_p, jbnd_g, ikib, imf, ipe, ipol, npol2, nwscr
342+ INTEGER :: ik, ibnd, jbnd, imf, iproc1, iproc2, ipol, npol2, nwscr
430343 REAL(DP) :: RAM_wght
431344 !
432- COMPLEX(dp),ALLOCATABLE :: rho1(:,:,:), rho2(:,:,:), wght(:,:,:,:)
345+ COMPLEX(dp),ALLOCATABLE :: rho1(:,:,:,:), rho2(:,:,:,:), wght(:,:,:,:)
433346 !
434347 CALL start_clock("make_scrn")
435348 !
@@ -450,59 +363,66 @@ SUBROUTINE make_scrn()
450363 !
451364 ! Calc f * (1 - f') / (e - e' + iw)
452365 !
453- RAM_wght = REAL(nmf+1,dp)*REAL(nbnd,DP)*REAL(nbnd_p_max)*REAL(nk_p_max)
366+ RAM_wght = REAL(nmf+1,dp)*REAL(nb_max**2,DP)*REAL(nqbz)
454367 CALL mp_sum(RAM_wght, world_comm)
455368 WRITE(stdout,'(9x,"Total RAM for weight on coarse grid : ",e10.2," GB")') RAM_wght*16.0e-9_dp
456369 CALL mp_barrier(world_comm)
457- ALLOCATE(wght(0:nmf,nbnd,nbnd_p_max,nk_p_max))
370+ ALLOCATE(wght(0:nmf,nqbz,nb_max,nb_max))
458371 CALL fermi_factor(wght)
459372 !
460373 ! Average the weights for degenerated states
461374 !
462- CALL average_wght_kel(nmf+1, wght(0:nmf,1:nbnd,1:nbnd_p_max,1:nk_p_max))
375+ CALL average_wght_kel(nmf+1, wght(0:nmf,1:nqbz,1:nb_max,1:nb_max))
463376 !
464377 ! Calc. Chi
465378 !
466- ALLOCATE(rho1(ngv,nbnd_p_max*nk_p_max,npol2), rho2(ngv0:ngv1,nbnd_p_max*nk_p_max,npol2))
379+ ALLOCATE(rho1(ngv,nqbz,nb_max,npol2), rho2(ngv0:ngv1,nqbz,nb_max,npol2))
467380 !
468- DO jbnd_g = 1, nbnd
381+ DO iproc1 = 1, nproc_band1
469382 !
470- CALL calc_rhog(npol2, jbnd_g, rho1)
383+ CALL mp_circular_shift_left(nb(1), 1, band1_comm )
384+ CALL circular_shift_wrapper_c(dfftt%nnr, nb_max*npol*nqbz, band1_comm, &
385+ & wfcq(1:dfftt%nnr, 1:nb_max, 1:npol, 1:nqbz, 1))
386+ CALL circular_shift_wrapper_c(nmf+1, nqbz*nb_max*nb_max, band1_comm, &
387+ & wght(0:nmf, 1:nqbz, 1:nb_max, 1:nb_max))
471388 !
472- DO ipe = 1, nproc
389+ DO iproc2 = 1, nproc_band2
473390 !
474- CALL mp_circular_shift_left(nbnd_p, 1, world_comm )
475- CALL mp_circular_shift_left(nk_p, 1, world_comm )
476- CALL circular_shift_wrapper_c(ngv,nbnd_p_max*nk_p_max*npol2,world_comm,rho1)
477- CALL circular_shift_wrapper_c(nmf+1,nbnd_p_max*nbnd*nk_p_max,world_comm,wght)
391+ CALL mp_circular_shift_left(nb(2), 1, band2_comm )
392+ CALL circular_shift_wrapper_c(dfftt%nnr, nb_max*npol*nqbz, band2_comm, &
393+ & wfcq(1:dfftt%nnr, 1:nb_max, 1:npol, 1:nqbz, 2))
394+ CALL circular_shift_wrapper_c(nmf+1, nqbz*nb_max*nb_max, band2_comm, &
395+ & wght(0:nmf, 1:nqbz, 1:nb_max, 1:nb_max))
478396 !
479- DO imf = 0, nmf
480- !
481- ikib = 0
482- DO ik_p = 1, nk_p
483- DO ibnd_p = 1, nbnd_p
484- ikib = ikib + 1
485- rho2(ngv0:ngv1,ikib,1:npol2) = REAL(wght(imf,jbnd_g,ibnd_p,ik_p),dp) &
486- & * CONJG(rho1(ngv0:ngv1,ikib,1:npol2))
487- END DO
488- END DO
489- !
490- CALL start_clock("zgemm(make_scrn)")
397+ DO ibnd = 1, nb(1)
491398 !
492- DO ipol = 1, npol2
493- CALL zgemm("N", "T", ngv, ngv1-ngv0+1, ikib, &
494- & (1.0_dp, 0.0_dp), rho1(1:ngv, 1:ikib, ipol), ngv, &
495- & rho2( ngv0:ngv1, 1:ikib, ipol), ngv1-ngv0+1, &
496- & (1.0_dp, 0.0_dp), wscr(1:ngv, ngv0:ngv1, imf, ipol), ngv )
497- END DO
399+ CALL calc_rhog(npol2, ibnd, rho1)
498400 !
499- CALL stop_clock("zgemm(make_scrn)")
401+ DO imf = 0, nmf
402+ !
403+ DO jbnd = 1, nb(2)
404+ DO ik = 1, nqbz
405+ rho2(ngv0:ngv1,ik,jbnd,1:npol2) = REAL(wght(imf,ik,jbnd,ibnd),dp) &
406+ & * CONJG(rho1(ngv0:ngv1,ik,jbnd,1:npol2))
407+ END DO
408+ END DO
409+ !
410+ CALL start_clock("zgemm(make_scrn)")
411+ !
412+ DO ipol = 1, npol2
413+ CALL zgemm("N", "T", ngv, ngv1-ngv0+1, nqbz*nb(2), &
414+ & (1.0_dp, 0.0_dp), rho1(1:ngv, 1:nqbz, 1:nb(2), ipol), ngv, &
415+ & rho2( ngv0:ngv1, 1:nqbz, 1:nb(2), ipol), ngv1-ngv0+1, &
416+ & (1.0_dp, 0.0_dp), wscr(1:ngv, ngv0:ngv1, imf, ipol), ngv )
417+ END DO
418+ !
419+ CALL stop_clock("zgemm(make_scrn)")
420+ !
421+ END DO ! imf = 0, nmf
500422 !
501- END DO ! imf = 0, nmf
502- !
503- END DO ! ipe = 1, nproc
504- !
505- END DO
423+ END DO ! ibnd = 1, nb(1)
424+ END DO ! iproc2 = 1, nproc_band2
425+ END DO ! iproc1 = 1, nproc_band1
506426 !
507427 IF(nwscr == 2) &
508428 & wscr(1:ngv, ngv0:ngv1, 0:nmf, 2) = wscr(1:ngv, ngv0:ngv1, 0:nmf, 1)
@@ -517,26 +437,26 @@ END SUBROUTINE make_scrn
517437 !>
518438 SUBROUTINE make_Kel()
519439 !
520- USE wvfct, ONLY : nbnd
521440 USE kinds, ONLY : DP
522441 USE cell_base, ONLY : omega
523- USE mp_world, ONLY : world_comm, nproc
442+ USE mp_world, ONLY : world_comm
524443 USE mp, ONLY : mp_sum, mp_circular_shift_left, mp_barrier
525444 USE fft_scalar, ONLY : cfft3d
526445 USE noncollin_module, ONLY : npol
527446 USE us_exx, ONLY : addusxx_r
447+ USE exx, ONLY : dfftt
528448 USE io_global, ONLY : stdout
529449 !
530- USE sctk_val, ONLY : gq2, ngv, nmf, nbnd_p, nbnd_p_max, &
531- & ngv0, ngv1, Kel, Wscr, lsf, nk_p, nk_p_max
450+ USE sctk_val, ONLY : gq2, ngv, nmf, nb, nb_max, wfcq, &
451+ & ngv0, ngv1, Kel, Wscr, lsf, nqbz
452+ USE mp_kel, ONLY : nproc_band1, nproc_band2, band1_comm, band2_comm
532453 !
533454 IMPLICIT NONE
534455 !
535- INTEGER :: ik_p, ibnd_p, jbnd_g, imf, ipe, ipol, jpol, npol2, nwscr, &
536- & ikib, nKel
456+ INTEGER :: ik, ibnd, jbnd, imf, iproc1, iproc2, ipol, jpol, npol2, nwscr, nKel
537457 REAL(dp) :: RAM_Kel
538458 !
539- COMPLEX(dp),ALLOCATABLE :: rho1(:,:,:), rho2(:,:), Kel0(:,:), Kel_temp(:,:,:,:)
459+ COMPLEX(dp),ALLOCATABLE :: rho1(:,:,:,:), rho2(:,:,:), Kel0(:,:,:), Kel_temp(:,:,:,:)
540460 COMPLEX(DP),EXTERNAL :: zdotc
541461 !
542462 CALL start_clock("make_kel")
@@ -556,106 +476,110 @@ SUBROUTINE make_Kel()
556476 nKel = 1
557477 END IF
558478 !
559- RAM_Kel = REAL(nmf+2,dp)*REAL(nbnd,DP)*REAL(nbnd_p_max)*REAL(nk_p_max)*REAL(nKel,DP)
479+ RAM_Kel = REAL(nmf+2,dp)*REAL(nb_max**2,DP)*REAL(nqbz)*REAL(nKel,DP)
560480 CALL mp_sum(RAM_Kel, world_comm)
561481 WRITE(stdout,'(9x,"Total RAM for Kel : ",e10.2," GB")') RAM_Kel*8.0e-9_dp
562482 CALL mp_barrier(world_comm)
563- ALLOCATE(Kel(0:nmf+1,nbnd,nbnd_p_max,nk_p_max,nKel))
564- Kel(0:nmf + 1,1:nbnd,1:nbnd_p_max,1:nk_p_max,1:nKel) = 0.0_dp
483+ ALLOCATE(Kel(0:nmf+1,nqbz,nb_max,nb_max,nKel))
484+ Kel(0:nmf + 1, 1:nqbz, 1:nb_max, 1:nb_max, 1:nKel) = 0.0_dp
565485 !
566- ALLOCATE(rho1(ngv,nbnd_p_max*nk_p_max,npol2), rho2(nbnd_p_max*nk_p_max,ngv0:ngv1), &
567- & Kel0(nbnd_p_max*nk_p_max,nwscr))
486+ ALLOCATE(rho1(ngv,nqbz,nb_max,npol2), rho2(nqbz,nb_max,ngv0:ngv1), &
487+ & Kel0(nqbz,nb_max,nwscr))
568488 !
569- DO jbnd_g = 1, nbnd
570- !
571- CALL calc_rhog(npol2, jbnd_g, rho1)
572- !
573- ! Infinite frequency -> Bare Coulomb
489+ DO iproc1 = 1, nproc_band1
574490 !
575- ikib = 0
576- DO ik_p = 1, nk_p
577- DO ibnd_p = 1, nbnd_p
578- ikib = ikib + 1
579- Kel(nmf + 1,jbnd_g,ibnd_p,ik_p,1) = SUM(REAL(rho1(1:ngv,ikib,1) &
580- & * CONJG(rho1(1:ngv,ikib,1)), dp) &
581- & / gq2(1:ngv)) / omega
582- END DO ! ibnd_p
583- END DO ! ik_p
491+ CALL mp_circular_shift_left(nb(1), 1, band1_comm )
492+ CALL circular_shift_wrapper_c(dfftt%nnr, nb_max*npol*nqbz, band1_comm, &
493+ & wfcq(1:dfftt%nnr, 1:nb_max, 1:npol, 1:nqbz, 1))
494+ CALL circular_shift_wrapper_r(nmf+2, nqbz*nb_max*nb_max*nKel, band1_comm, &
495+ & Kel(0:nmf+1, 1:nqbz, 1:nb_max, 1:nb_max, 1:nKel))
584496 !
585- DO ipe = 1, nproc
497+ DO iproc2 = 1, nproc_band2
586498 !
587- CALL mp_circular_shift_left(nk_p, 1, world_comm )
588- CALL mp_circular_shift_left(nbnd_p, 1, world_comm )
589- CALL circular_shift_wrapper_c(ngv,nk_p_max*nbnd_p_max*npol2,world_comm,rho1)
590- IF(lsf>0) THEN
591- CALL circular_shift_wrapper_r(nmf + 2,nbnd*nbnd_p_max*nk_p_max*2,world_comm,Kel)
592- ELSE
593- CALL circular_shift_wrapper_r(nmf + 2,nbnd*nbnd_p_max*nk_p_max,world_comm,Kel)
594- END IF
499+ CALL mp_circular_shift_left(nb(2), 1, band2_comm )
500+ CALL circular_shift_wrapper_c(dfftt%nnr, nb_max*npol*nqbz, band2_comm, &
501+ & wfcq(1:dfftt%nnr, 1:nb_max, 1:npol, 1:nqbz, 2))
502+ CALL circular_shift_wrapper_r(nmf+2, nqbz*nb_max*nb_max*nKel, band2_comm, &
503+ & Kel(0:nmf+1, 1:nqbz, 1:nb_max, 1:nb_max, 1:nKel))
595504 !
596- DO imf = 0, nmf
505+ DO ibnd = 1, nb(1)
597506 !
598- DO ipol = 1, nwscr
599- !
600- IF(nwscr == 2) THEN
601- jpol = 1
602- ELSE
603- jpol = ipol
604- END IF
605- !
606- CALL start_clock("zgemm(make_kel)")
607- !
608- CALL zgemm("T", "N", nbnd_p*nk_p, ngv1-ngv0+1, ngv, &
609- & (1.0_dp, 0.0_dp), rho1(1:ngv,1:nbnd_p*nk_p,jpol), ngv, &
610- & Wscr(1:ngv, ngv0:ngv1, imf,ipol),ngv, &
611- & (0.0_dp, 0.0_dp), rho2( 1:nbnd_p_max*nk_p_max,ngv0:ngv1), nbnd_p_max*nk_p_max)
612- !
613- CALL stop_clock("zgemm(make_kel)")
614- !
615- DO ikib = 1, nbnd_p*nk_p
616- Kel0(ikib,ipol) = zdotc(ngv1-ngv0+1, &
617- & rho2(ikib,ngv0), nbnd_p_max*nk_p_max, &
618- & rho1(ngv0:ngv1,ikib,jpol), 1)
619- END DO !ikib
620- !
621- END DO ! ipol = 1, nwscr
507+ CALL calc_rhog(npol2, ibnd, rho1)
508+ !
509+ ! Infinite frequency -> Bare Coulomb
622510 !
623- ikib = 0
624- DO ik_p = 1, nk_p
625- DO ibnd_p = 1, nbnd_p
626- ikib = ikib + 1
511+ DO jbnd = 1, nb(2)
512+ DO ik = 1, nqbz
513+ Kel(nmf + 1,ik,jbnd,ibnd,1) = SUM(REAL(rho1(1:ngv,ik,jbnd,1) &
514+ & * CONJG(rho1(1:ngv,ik,jbnd,1)), dp) &
515+ & / gq2(1:ngv)) / omega
627516 !
628- Kel(imf, jbnd_g,ibnd_p,ik_p,1) = Kel(imf,jbnd_g,ibnd_p,ik_p,1) + REAL(Kel0(ikib,1), dp) / omega
517+ END DO ! ik = 1, nqbz
518+ END DO ! jbnd = 1, nb(2)
519+ !
520+ DO imf = 0, nmf
521+ !
522+ DO ipol = 1, nwscr
629523 !
630524 IF(nwscr == 2) THEN
631- Kel(imf,jbnd_g,ibnd_p,ik_p,2) = Kel(imf,jbnd_g,ibnd_p,ik_p,2) + REAL(Kel0(ikib,2), dp) / omega
632- ELSE IF(nwscr == 4) THEN
633- Kel(imf,jbnd_g,ibnd_p,ik_p,2) = Kel(imf,jbnd_g,ibnd_p,ik_p,2) + REAL(SUM(Kel0(ikib,2:4)), dp) / omega
525+ jpol = 1
526+ ELSE
527+ jpol = ipol
634528 END IF
635529 !
636- END DO ! ibnd_p
637- END DO ! ik_p
638- !
639- END DO ! imf = 0, nmf
640- !
641- END DO ! ipe = 1, nproc
642- !
643- END DO ! jbnd
530+ CALL start_clock("zgemm(make_kel)")
531+ !
532+ CALL zgemm("T", "N", nqbz*nb(2), ngv1-ngv0+1, ngv, &
533+ & (1.0_dp, 0.0_dp), rho1(1:ngv, 1:nqbz, 1:nb(2), jpol), ngv, &
534+ & Wscr(1:ngv, ngv0:ngv1, imf,ipol),ngv, &
535+ & (0.0_dp, 0.0_dp), rho2( 1:nqbz, 1:nb(2), ngv0:ngv1), nqbz*nb_max)
536+ !
537+ CALL stop_clock("zgemm(make_kel)")
538+ !
539+ DO jbnd = 1, nb(2)
540+ DO ik = 1, nqbz
541+ Kel0(ik, jbnd, ipol) = zdotc(ngv1-ngv0+1, &
542+ & rho2(ik, jbnd, ngv0), nqbz*nb_max, &
543+ & rho1( ngv0:ngv1, ik, jbnd, jpol), 1)
544+ !
545+ END DO
546+ END DO
547+ !
548+ END DO ! ipol = 1, nwscr
549+ !
550+ DO jbnd = 1, nb(2)
551+ DO ik = 1, nqbz
552+ !
553+ Kel(imf,ik, jbnd,ibnd,1) = Kel(imf,ik,jbnd,ibnd,1) + REAL(Kel0(ik, jbnd,1), dp) / omega
554+ !
555+ IF(nwscr == 2) THEN
556+ Kel(imf,ik,jbnd,ibnd,2) = Kel(imf,ik,jbnd,ibnd,2) + REAL(Kel0(ik, jbnd,2), dp) / omega
557+ ELSE IF(nwscr == 4) THEN
558+ Kel(imf,ik,jbnd,ibnd,2) = Kel(imf,ik,jbnd,ibnd,2) + REAL(SUM(Kel0(ik, jbnd,2:4)), dp) / omega
559+ END IF
560+ !
561+ END DO ! ik = 1, nqbz
562+ END DO ! jbnd = 1, nb(2)
563+ !
564+ END DO ! imf = 0, nmf
565+ END DO
566+ END DO
567+ END DO
644568 !
645569 DEALLOCATE(rho1, rho2, Kel0)
646570 !
647571 ! Average the weights for degenerated states
648572 !
649- ALLOCATE(Kel_temp(0:nmf + 1,nbnd,nbnd_p_max,nk_p_max))
573+ ALLOCATE(Kel_temp(0:nmf + 1,nqbz,nb_max,nb_max))
650574 IF(lsf>0) THEN
651- Kel_temp(0:nmf + 1,1:nbnd,1:nbnd_p,1:nk_p) = CMPLX(Kel(0:nmf + 1,1:nbnd,1:nbnd_p,1:nk_p,1), &
652- & Kel(0:nmf + 1,1:nbnd,1:nbnd_p,1:nk_p,2), DP)
575+ Kel_temp(0:nmf + 1, 1:nqbz, 1:nb_max, 1:nb_max) = CMPLX(Kel(0:nmf + 1, 1:nqbz, 1:nb_max, 1:nb_max, 1), &
576+ & Kel(0:nmf + 1, 1:nqbz, 1:nb_max, 1:nb_max, 2), DP)
653577 ELSE
654- Kel_temp(0:nmf + 1,1:nbnd,1:nbnd_p,1:nk_p) = CMPLX(Kel(0:nmf + 1,1:nbnd,1:nbnd_p,1:nk_p,1), 0.0_dp, DP)
578+ Kel_temp(0:nmf + 1, 1:nqbz, 1:nb_max, 1:nb_max) = CMPLX(Kel(0:nmf + 1, 1:nqbz, 1:nb_max, 1:nb_max, 1), 0.0_dp, DP)
655579 END IF
656- CALL average_wght_kel(nmf+2, Kel_temp(0:nmf+1,1:nbnd,1:nbnd_p_max,1:nk_p_max))
657- Kel( 0:nmf + 1,1:nbnd,1:nbnd_p,1:nk_p,1) = REAL( Kel_temp(0:nmf + 1,1:nbnd,1:nbnd_p,1:nk_p), DP)
658- IF(lsf>0) Kel(0:nmf + 1,1:nbnd,1:nbnd_p,1:nk_p,2) = AIMAG(Kel_temp(0:nmf + 1,1:nbnd,1:nbnd_p,1:nk_p))
580+ CALL average_wght_kel(nmf+2, Kel_temp(0:nmf+1,1:nqbz,1:nb_max,1:nb_max))
581+ Kel( 0:nmf + 1, 1:nqbz, 1:nb_max, 1:nb_max, 1) = REAL( Kel_temp(0:nmf + 1, 1:nqbz, 1:nb_max, 1:nb_max), DP)
582+ IF(lsf>0) Kel(0:nmf + 1, 1:nqbz, 1:nb_max, 1:nb_max, 2) = AIMAG(Kel_temp(0:nmf + 1, 1:nqbz, 1:nb_max, 1:nb_max))
659583 !
660584 DEALLOCATE(Kel_temp)
661585 !
@@ -665,92 +589,63 @@ END SUBROUTINE make_Kel
665589 !>
666590 !>
667591 !>
668-SUBROUTINE average_wght_kel(nmf0,wght)
592+SUBROUTINE average_wght_kel(nmf0, wght)
669593 !
670594 USE kinds, ONLY : dp
671595 USE wvfct, ONLY : nbnd
672- USE mp_pools, ONLY : intra_pool_comm, nproc_pool
673- USE mp, ONLY : mp_circular_shift_left
674- USE mp, ONLY : mp_barrier !debug
675- USE mp_world, ONLY : world_comm ! debug
596+ USE mp, ONLY : mp_sum
597+ USE mp_world, ONLY : world_comm
676598 !
677- USE sctk_val, ONLY : nbnd_p, nbnd_p_max, bnd0_p, nk_p, nk_p_max, k0_p, degen, ndegen
599+ USE sctk_val, ONLY : degen, ndegen, nqbz, nb, bdsp, nb_max
678600 !
679601 IMPLICIT NONE
680602 !
681603 INTEGER,INTENT(IN) :: nmf0
682- COMPLEX(DP),INTENT(INOUT) :: wght(nmf0,nbnd,nbnd_p_max,nk_p_max)
604+ COMPLEX(DP),INTENT(INOUT) :: wght(nmf0,nqbz, nb_max, nb_max)
683605 !
684- INTEGER :: ik_p, ik_g, ibnd, my_nbnd_p, my_bnd0_p, iproc, idegen
685- COMPLEX(DP) :: wght_ave(nmf0,nbnd_p_max)
686- COMPLEX(DP),ALLOCATABLE :: wght_tmp(:,:,:,:)
606+ INTEGER :: ik, ibnd, idegen
607+ COMPLEX(DP) :: wght_ave(nmf0, nbnd)
608+ COMPLEX(DP),ALLOCATABLE :: wght_tmp(:,:,:)
687609 !
688- ALLOCATE(wght_tmp(nmf0,nbnd_p_max,nbnd,nk_p_max))
610+ ALLOCATE(wght_tmp(nmf0,nbnd,nbnd))
689611 !
690- DO ik_p = 1, nk_p
612+ DO ik = 1, nqbz
691613 !
692- ik_g = ik_p + k0_p
614+ wght_tmp(1:nmf0, 1:nbnd, 1:nbnd) = 0.0_dp
615+ wght_tmp(1:nmf0, bdsp(2)+1:bdsp(2)+nb(2), bdsp(1)+1:bdsp(2)+nb(1)) &
616+ & = wght(1:nmf0, ik, 1: nb(2), 1: nb(1))
617+ CALL mp_sum(wght_tmp, world_comm)
693618 !
694- DO idegen = 1, ndegen(ik_g,2)
695- wght_ave(1:nmf0,1:nbnd_p) = SUM(wght(1:nmf0,degen(1,idegen,ik_g,2): &
696- & degen(2,idegen,ik_g,2),1:nbnd_p,ik_p), 2) &
697- & / REAL(degen(2,idegen,ik_g,2) - degen(1,idegen,ik_g,2) + 1, DP)
698- DO ibnd = degen(1,idegen,ik_g,2), degen(2,idegen,ik_g,2)
699- wght(1:nmf0,ibnd,1:nbnd_p,ik_p) = wght_ave(1:nmf0,1:nbnd_p)
619+ DO idegen = 1, ndegen(ik, 1)
620+ wght_ave(1:nmf0, 1:nbnd) = SUM(wght_tmp(1:nmf0, 1:nbnd, degen(1, idegen, ik, 1): &
621+ & degen(2, idegen, ik, 1)), 3) &
622+ & / REAL(degen(2, idegen, ik,1) - degen(1,idegen,ik,1) + 1, DP)
623+ DO ibnd = degen(1,idegen, ik, 1), degen(2, idegen, ik, 1)
624+ wght_tmp(1:nmf0, 1:nbnd, ibnd) = wght_ave(1:nmf0, 1:nbnd)
700625 END DO
701626 END DO
702- !
703- END DO
704- !
705- wght_tmp(1:nmf0,1:nbnd_p_max,1:nbnd,1:nk_p_max) = 0.0_dp
706- my_nbnd_p = nbnd_p
707- my_bnd0_p = bnd0_p
708- DO iproc = 1, nproc_pool
709- !
710- CALL mp_circular_shift_left(bnd0_p, 1, intra_pool_comm )
711- CALL mp_circular_shift_left(nbnd_p, 1, intra_pool_comm )
712- CALL circular_shift_wrapper_c(nmf0,nbnd*nbnd_p_max*nk_p_max,intra_pool_comm,wght)
713- !
714- wght_tmp(1:nmf0, 1: my_nbnd_p,bnd0_p+1:bnd0_p+nbnd_p,1:nk_p_max) &
715- & = wght(1:nmf0,my_bnd0_p+1:my_bnd0_p+my_nbnd_p, 1: nbnd_p,1:nk_p_max)
716- !
717- END DO
718- !
719- DO ik_p = 1, nk_p
720- !
721- ik_g = ik_p + k0_p
722- !
723- DO idegen = 1, ndegen(ik_g,1)
724- wght_ave(1:nmf0,1:my_nbnd_p) = SUM(wght_tmp(1:nmf0,1:my_nbnd_p,degen(1,idegen,ik_g,1): &
725- & degen(2,idegen,ik_g,1),ik_p), 3) &
726- & / REAL(degen(2,idegen,ik_g,1) - degen(1,idegen,ik_g,1) + 1, DP)
727- DO ibnd = degen(1,idegen,ik_g,1), degen(2,idegen,ik_g,1)
728- wght_tmp(1:nmf0,1:my_nbnd_p,ibnd,ik_p) = wght_ave(1:nmf0,1:my_nbnd_p)
627+ ! TO BE FIXED
628+ DO idegen = 1, ndegen(ik, 2)
629+ wght_ave(1:nmf0,1 :nbnd) = SUM(wght_tmp(1:nmf0, degen(1, idegen, ik, 2): &
630+ & degen(2, idegen, ik, 2), 1:nbnd), 2) &
631+ & / REAL(degen(2, idegen, ik, 2) - degen(1, idegen, ik, 2) + 1, DP)
632+ DO ibnd = degen(1, idegen, ik, 2), degen(2, idegen, ik, 2)
633+ wght_tmp(1:nmf0, ibnd, 1:nbnd) = wght_ave(1:nmf0, 1:nbnd)
729634 END DO
730635 END DO
731636 !
732- END DO
733- !
734- DO iproc = 1, nproc_pool
735- !
736- CALL mp_circular_shift_left(bnd0_p, 1, intra_pool_comm )
737- CALL mp_circular_shift_left(nbnd_p, 1, intra_pool_comm )
738- CALL circular_shift_wrapper_c(nmf0,nbnd*nbnd_p_max*nk_p_max,intra_pool_comm,wght)
739- !
740- wght( 1:nmf0,my_bnd0_p+1:my_bnd0_p+my_nbnd_p, 1: nbnd_p,1:nk_p_max) &
741- & = wght_tmp(1:nmf0, 1: my_nbnd_p,bnd0_p+1:bnd0_p+nbnd_p,1:nk_p_max)
637+ wght( 1:nmf0, ik, 1: nb(2), 1: nb(1)) &
638+ & = wght_tmp(1:nmf0, bdsp(2)+1:bdsp(2)+nb(2), bdsp(1)+1:bdsp(2)+nb(1))
742639 !
743640 END DO
744641 !
745642 DEALLOCATE(wght_tmp)
746643 !
747- CALL mp_barrier(world_comm)
748- !
749644 END SUBROUTINE average_wght_kel
750645 !>
751646 !>
752647 !>
753-SUBROUTINE calc_rhog(npol2, jbnd_g, rho1)
648+SUBROUTINE calc_rhog(npol2, ibnd, rho1)
754649 !
755650 USE kinds, ONLY : DP
756651 USE cell_base, ONLY : omega
@@ -759,57 +654,36 @@ SUBROUTINE calc_rhog(npol2, jbnd_g, rho1)
759654 USE exx, ONLY : dfftt
760655 USE us_exx, ONLY : addusxx_r
761656 USE uspp, ONLY : nkb, okvan
762- USE mp_pools, ONLY : intra_pool_comm
763- USE mp, ONLY : mp_max, mp_circular_shift_left, mp_sum
764657 !
765- USE sctk_val, ONLY : gindx, ngv, wfcq, becwfcq, nk_p, nk_p_max, nbnd_p, nbnd_p_max, bnd0_p
658+ USE sctk_val, ONLY : gindx, ngv, wfcq, becwfcq, nqbz, nb_max, nb
766659 !
767660 IMPLICIT none
768661 !
769- INTEGER,INTENT(IN) :: npol2, jbnd_g
770- COMPLEX(DP),INTENT(OUT) :: rho1(ngv,nbnd_p_max*nk_p_max,npol2)
662+ INTEGER,INTENT(IN) :: npol2, ibnd
663+ COMPLEX(DP),INTENT(OUT) :: rho1(ngv, nqbz, nb_max, npol2)
771664 !
772- INTEGER:: ibnd_p, jbnd_p, ik_p, ikib, ipol
665+ INTEGER:: jbnd, ik, ipol
773666 COMPLEX(dp) :: rho0(dfftt%nnr,4), rho4(dfftt%nnr)
774- COMPLEX(dp),ALLOCATABLE :: wfc2(:,:,:), becwfc2(:,:,:)
775- !
776- ALLOCATE(wfc2(dfftt%nnr,npol,nk_p_max))
777- IF(okvan) ALLOCATE(becwfc2(nkb,npol,nk_p_max))
778- !
779- IF(bnd0_p < jbnd_g .AND. jbnd_g <= bnd0_p+nbnd_p) THEN
780- jbnd_p = jbnd_g - bnd0_p
781- wfc2(1:dfftt%nnr,1:npol,1:nk_p_max) = wfcq(1:dfftt%nnr,jbnd_p,1:npol,1:nk_p_max,2)
782- IF(okvan) becwfc2(1:nkb,1:npol,1:nk_p_max) = becwfcq(1:nkb,jbnd_p,1:npol,1:nk_p_max,2)
783- ELSE
784- wfc2(1:dfftt%nnr,1:npol,1:nk_p_max) = 0.0_dp
785- IF(okvan) becwfc2(1:nkb,1:npol,1:nk_p_max) = 0.0_dp
786- END IF
787- CALL mp_sum(wfc2, intra_pool_comm)
788- IF(okvan) CALL mp_sum(becwfc2, intra_pool_comm)
789667 !
790- ikib = 0
791- DO ik_p = 1, nk_p
792- !
793- DO ibnd_p = 1, nbnd_p
794- !
795- ikib = ikib + 1
668+ DO jbnd = 1, nb(2)
669+ DO ik = 1, nqbz
796670 !
797671 ! debug
798- !rho4(1:dfftt%nnr) = conjg(wfcq(1:dfftt%nnr,ibnd_p,1,ik_p, 1)) * wfcq(1:dfftt%nnr,ibnd_p,1,ik_p, 1)
672+ !rho4(1:dfftt%nnr) = conjg(wfcq(1:dfftt%nnr,ibnd,1,ik, 1)) * wfcq(1:dfftt%nnr,ibnd,1,ik, 1)
799673 !write(*,'(a,2f15.8)',advance="no") "debug1", sum(rho4(1:dfftt%nnr)) / dble(dfftt%nnr)
800674 !IF(okvan) then
801675 ! rho4(1:dfftt%nnr) = rho4(1:dfftt%nnr) / omega
802- ! CALL addusxx_r(rho4(1:dfftt%nnr), becwfcq(1:nkb,ibnd_p,1,ik_p,1), &
803- ! & becwfcq(1:nkb,ibnd_p,1,ik_p,1))
676+ ! CALL addusxx_r(rho4(1:dfftt%nnr), becwfcq(1:nkb,ibnd,1,ik,1), &
677+ ! & becwfcq(1:nkb,ibnd,1,ik,1))
804678 ! rho4(1:dfftt%nnr) = rho4(1:dfftt%nnr) * omega
805679 ! write(*,'(2f15.8)',advance="no") sum(rho4(1:dfftt%nnr)) / dble(dfftt%nnr)
806680 !END IF
807- !rho4(1:dfftt%nnr) = conjg(wfcq(1:dfftt%nnr,ibnd_p,1,ik_p, 2)) * wfcq(1:dfftt%nnr,ibnd_p,1,ik_p, 2)
681+ !rho4(1:dfftt%nnr) = conjg(wfcq(1:dfftt%nnr,ibnd,1,ik, 2)) * wfcq(1:dfftt%nnr,ibnd,1,ik, 2)
808682 !write(*,'(a,2f15.8)',advance="no") "debug1", sum(rho4(1:dfftt%nnr)) / dble(dfftt%nnr)
809683 !IF(okvan) then
810684 ! rho4(1:dfftt%nnr) = rho4(1:dfftt%nnr) / omega
811- ! CALL addusxx_r(rho4(1:dfftt%nnr), becwfcq(1:nkb,ibnd_p,1,ik_p,2), &
812- ! & becwfcq(1:nkb,ibnd_p,1,ik_p,2))
685+ ! CALL addusxx_r(rho4(1:dfftt%nnr), becwfcq(1:nkb,ibnd,1,ik,2), &
686+ ! & becwfcq(1:nkb,ibnd,1,ik,2))
813687 ! rho4(1:dfftt%nnr) = rho4(1:dfftt%nnr) * omega
814688 ! write(*,'(2f15.8)') sum(rho4(1:dfftt%nnr)) / dble(dfftt%nnr)
815689 !END IF
@@ -820,31 +694,31 @@ SUBROUTINE calc_rhog(npol2, jbnd_g, rho1)
820694 rho0(1:dfftt%nnr, 1:npol2) = CMPLX(0.0_dp, 0.0_dp, kind=dp)
821695 DO ipol = 1, npol
822696 !
823- rho4(1:dfftt%nnr) = CONJG(wfcq(1:dfftt%nnr,ibnd_p,ipol,ik_p,1)) &
824- & * wfc2(1:dfftt%nnr, ipol,ik_p) / omega
825- IF(okvan) CALL addusxx_r(rho4(1:dfftt%nnr), becwfcq(1:nkb,ibnd_p,ipol,ik_p,1), &
826- & becwfc2(1:nkb, ipol,ik_p))
697+ rho4(1:dfftt%nnr) = CONJG(wfcq(1:dfftt%nnr, ibnd, ipol, ik, 1)) &
698+ & * wfcq(1:dfftt%nnr, jbnd, ipol, ik, 2) / omega
699+ IF(okvan) CALL addusxx_r(rho4(1:dfftt%nnr), becwfcq(1:nkb, ibnd, ipol, ik, 1), &
700+ & becwfcq(1:nkb, jbnd, ipol, ik, 2))
827701 rho0(1:dfftt%nnr,1) = rho0(1:dfftt%nnr,1) + rho4(1:dfftt%nnr) * omega
828702 !
829703 IF(npol2 == 4) THEN
830704 !
831- rho4(1:dfftt%nnr) = CONJG(wfcq(1:dfftt%nnr,ibnd_p, ipol,ik_p,1)) &
832- & * wfc2(1:dfftt%nnr, 3-ipol,ik_p) / omega
833- IF(okvan) CALL addusxx_r(rho4(1:dfftt%nnr), becwfcq(1:nkb,ibnd_p, ipol,ik_p,1), &
834- & becwfc2(1:nkb, 3-ipol,ik_p))
705+ rho4(1:dfftt%nnr) = CONJG(wfcq(1:dfftt%nnr, ibnd, ipol, ik, 1)) &
706+ & * wfcq(1:dfftt%nnr, jbnd, 3-ipol, ik, 2) / omega
707+ IF(okvan) CALL addusxx_r(rho4(1:dfftt%nnr), becwfcq(1:nkb, ibnd, ipol, ik, 1), &
708+ & becwfcq(1:nkb, jbnd, 3-ipol, ik, 2))
835709 rho0(1:dfftt%nnr,2) = rho0(1:dfftt%nnr,2) + rho4(1:dfftt%nnr) * omega
836710 !
837- rho4(1:dfftt%nnr) = CONJG(wfcq(1:dfftt%nnr,ibnd_p, ipol,ik_p,1)) &
838- & * wfc2(1:dfftt%nnr, 3-ipol,ik_p) / omega
839- IF(okvan) CALL addusxx_r(rho4(1:dfftt%nnr), becwfcq(1:nkb,ibnd_p, ipol,ik_p,1), &
840- & becwfc2(1:nkb, 3-ipol,ik_p))
711+ rho4(1:dfftt%nnr) = CONJG(wfcq(1:dfftt%nnr, ibnd, ipol, ik, 1)) &
712+ & * wfcq(1:dfftt%nnr, jbnd, 3-ipol, ik, 2) / omega
713+ IF(okvan) CALL addusxx_r(rho4(1:dfftt%nnr), becwfcq(1:nkb, ibnd, ipol, ik, 1), &
714+ & becwfcq(1:nkb, jbnd, 3-ipol, ik, 2))
841715 rho0(1:dfftt%nnr,3) = rho0(1:dfftt%nnr,3) &
842716 & + rho4(1:dfftt%nnr) * REAL(3-2*ipol, dp) * omega
843717 !
844- rho4(1:dfftt%nnr) = CONJG(wfcq(1:dfftt%nnr,ibnd_p,ipol,ik_p,1)) &
845- & * wfc2(1:dfftt%nnr, ipol,ik_p) / omega
846- IF(okvan) CALL addusxx_r(rho4(1:dfftt%nnr), becwfcq(1:nkb,ibnd_p,ipol,ik_p,1), &
847- & becwfc2(1:nkb, ipol,ik_p))
718+ rho4(1:dfftt%nnr) = CONJG(wfcq(1:dfftt%nnr, ibnd, ipol, ik, 1)) &
719+ & * wfcq(1:dfftt%nnr, jbnd, ipol, ik, 2) / omega
720+ IF(okvan) CALL addusxx_r(rho4(1:dfftt%nnr), becwfcq(1:nkb, ibnd, ipol, ik, 1), &
721+ & becwfcq(1:nkb, jbnd, ipol, ik, 2))
848722 rho0(1:dfftt%nnr,4) = rho0(1:dfftt%nnr,4) &
849723 & + rho4(1:dfftt%nnr) * REAL(3-2*ipol, dp) * omega
850724 !
@@ -857,16 +731,16 @@ SUBROUTINE calc_rhog(npol2, jbnd_g, rho1)
857731 CALL start_clock("fft[rho]")
858732 !
859733 DO ipol = 1, npol2
860- CALL cfft3d (rho0(1:dfftt%nnr,ipol), &
734+ CALL cfft3d (rho0(1:dfftt%nnr, ipol), &
861735 & dfftt%nr1, dfftt%nr2, dfftt%nr3, dfftt%nr1, dfftt%nr2, dfftt%nr3, 1, -1)
862736 !
863- rho1(1:ngv, ikib,ipol) = rho0(gindx(1:ngv),ipol)
737+ rho1(1:ngv, ik, jbnd, ipol) = rho0(gindx(1:ngv), ipol)
864738 END DO ! ipol
865739 !
866740 CALL stop_clock("fft[rho]")
867741 !
868- END DO ! ibnd_p
869- END DO ! ik_p
742+ END DO ! ik = 1, nqbz
743+ END DO ! jbnd = 1, nbnd_p
870744 !
871745 END SUBROUTINE calc_rhog
872746 !>
@@ -892,13 +766,12 @@ SUBROUTINE Coulomb_parameter()
892766 USE noncollin_module, ONLY : npol
893767 USE mp, ONLY : mp_sum, mp_circular_shift_left
894768 !
895- USE sctk_val, ONLY : nmf, mf, Kel, lsf, bnd0_p, nbnd_p, &
896- & k0_p, nk_p
769+ USE sctk_val, ONLY : nmf, mf, Kel, lsf, nb, bdsp
897770 !
898771 IMPLICIT NONE
899772 !
900- INTEGER :: ntetra0, ntetra1, ii, ikv(3), ik, ik_p, imf, kintp(8), nkel, ikel, &
901- & b_low_p, b_high_p, b_low_g, b_high_g
773+ INTEGER :: ntetra0, ntetra1, ii, ikv(3), ik, imf, kintp(8), nkel, ikel, &
774+ & b_low_p(2), b_high_p(2), b_low_g(2), b_high_g(2)
902775 REAL(dp) :: dost(2), kv(3), mu(0:nmf+1,2), wintp(8), scale_mu
903776 REAL(dp),allocatable :: wght(:,:,:)
904777 !
@@ -928,10 +801,12 @@ SUBROUTINE Coulomb_parameter()
928801 !
929802 mu(0:nmf+1, 1:nkel) = 0.0_dp
930803 !
931- b_low_g = MAX(b_low, bnd0_p+1)
932- b_high_g = MIN(b_high, bnd0_p+nbnd_p)
933- b_low_p = b_low_g - bnd0_p
934- b_high_p = b_high_g - bnd0_p
804+ DO ii = 1, 2
805+ b_low_g( ii) = MAX(b_low, bdsp(ii)+1)
806+ b_high_g(ii) = MIN(b_high, bdsp(ii)+nb(ii))
807+ b_low_p( ii) = b_low_g( ii) - bdsp(ii)
808+ b_high_p(ii) = b_high_g(ii) - bdsp(ii)
809+ END DO
935810 !
936811 DO ik = 1, nks
937812 !
@@ -946,15 +821,9 @@ SUBROUTINE Coulomb_parameter()
946821 DO imf = 0, nmf + 1
947822 DO ii = 1, 8
948823 !
949- IF(k0_p < kintp(ii) .AND. kintp(ii) <= k0_p + nk_p) THEN
950- !
951- ik_p = kintp(ii) - k0_p
952- !
953- mu(imf,ikel) = mu(imf,ikel) + &
954- & SUM(Kel(imf,b_low:b_high,b_low_p:b_high_p, ik_p, ikel) &
955- & * wght( b_low:b_high,b_low_g:b_high_g,ik)) * wintp(ii)
956- !
957- END IF
824+ mu(imf,ikel) = mu(imf,ikel) + &
825+ & SUM(Kel(imf, kintp(ii), b_low_p(2):b_high_p(2), b_low_p(1):b_high_p(1), ikel) &
826+ & * wght( b_low_g(2):b_high_g(2), b_low_g(1):b_high_g(1), ik)) * wintp(ii)
958827 !
959828 END DO ! ii = 1, 8
960829 END DO ! imf = 0, nmf + 1
@@ -999,7 +868,7 @@ SUBROUTINE chebyshev_interpol()
999868 USE mp, ONLY : mp_sum, mp_max
1000869 USE io_global, ONLY : stdout
1001870 !
1002- USE sctk_val, ONLY : Kel, nqbz, nmf, lsf, mf, nci, nk_p, nbnd_p
871+ USE sctk_val, ONLY : Kel, nqbz, nmf, lsf, mf, nci, nqbz, nb
1003872 !
1004873 IMPLICIT NONE
1005874 !
@@ -1044,26 +913,26 @@ SUBROUTINE chebyshev_interpol()
1044913 aveer(0:nmf+1) = 0.0_dp
1045914 maxer(0:nmf+1) = 0.0_dp
1046915 !
1047- DO ik = 1, nk_p
1048- DO ib = 1, nbnd_p
1049- DO jb = 1, nbnd
916+ DO ik = 1, nqbz
917+ DO ib = 1, nb(1)
918+ DO jb = 1, nb(2)
1050919 !
1051920 Kel0(1:nci) = 0.0_dp
1052921 DO imf = 1, nci
1053922 DO jmf = 1, nci
1054- Kel0(imf) = Kel0(imf) + coef(imf,jmf)*Kel((jmf-1)*2,jb,ib,ik,ikel)
923+ Kel0(imf) = Kel0(imf) + coef(imf,jmf)*Kel((jmf-1)*2,ik,jb,ib,ikel)
1055924 END DO
1056925 END DO
1057926 !
1058927 Kel1(0:nmf+1) = MATMUL(Kel0(1:nci), cheb(1:nci, 0:nmf+1))
1059928 !
1060- err(0:nmf+1) = ABS((Kel1(0:nmf+1) - Kel(0:nmf+1,jb,ib,ik,ikel))) &
1061- & * 2.0_dp / (ABS( Kel1(0:nmf+1)) + ABS(Kel(0:nmf+1,jb,ib,ik,ikel)))
929+ err(0:nmf+1) = ABS((Kel1(0:nmf+1) - Kel(0:nmf+1,ik,jb,ib,ikel))) &
930+ & * 2.0_dp / (ABS( Kel1(0:nmf+1)) + ABS(Kel(0:nmf+1,ik,jb,ib,ikel)))
1062931 !
1063932 aveer(0:nmf+1) = aveer(0:nmf+1) + err(0:nmf+1)
1064933 maxer(0:nmf+1) = MAX(maxer(0:nmf+1), err(0:nmf+1))
1065934 !
1066- Kel(0:nci-1,jb,ib,ik,ikel) = Kel0(1:nci)
935+ Kel(0:nci-1,ik,jb,ib,ikel) = Kel0(1:nci)
1067936 !
1068937 END DO ! jb
1069938 END DO ! ib
@@ -1109,12 +978,11 @@ SUBROUTINE write_Kel()
1109978 USE mp, ONLY : mp_circular_shift_left
1110979 USE io_files, ONLY : prefix, tmp_dir
1111980 !
1112- USE sctk_val, ONLY : Kel, nqbz, lsf, nci, bnd0_p, nbnd_p, nbnd_p_max, &
1113- & k0_p, nk_p, nk_p_max, nmf
981+ USE sctk_val, ONLY : Kel, nqbz, lsf, nci, nb, bdsp, nmf, nb_max
1114982 !
1115983 IMPLICIT NONE
1116984 !
1117- INTEGER :: fo = 20, nkel, iproc
985+ INTEGER :: fo = 20, nkel, iproc, ik
1118986 CHARACTER(LEN=6), EXTERNAL :: int_to_char
1119987 REAL(DP),ALLOCATABLE :: Kel_all(:,:,:,:,:)
1120988 !
@@ -1123,16 +991,21 @@ SUBROUTINE write_Kel()
1123991 ELSE
1124992 nkel = 1
1125993 END IF
1126- IF(mpime==0) ALLOCATE(Kel_all(0:nci-1,nbnd,nbnd,nqbz,nkel))
994+ IF(mpime==0) THEN
995+ ALLOCATE(Kel_all(0:nci-1,nbnd,nbnd,nqbz,nkel))
996+ ELSE
997+ ALLOCATE(Kel_all(1,1,1,1,1))
998+ END IF
1127999 !
11281000 DO iproc = 1, nproc
1129- CALL mp_circular_shift_left(nbnd_p, 1, world_comm )
1130- CALL mp_circular_shift_left(bnd0_p, 1, world_comm )
1131- CALL mp_circular_shift_left(k0_p, 1, world_comm )
1132- CALL mp_circular_shift_left(nk_p, 1, world_comm )
1133- CALL circular_shift_wrapper_r(nmf + 2,nbnd*nbnd_p_max*nk_p_max*nkel,world_comm,Kel)
1134- IF(mpime==0) Kel_all(0:nci-1,1:nbnd,bnd0_p+1:bnd0_p+nbnd_p,k0_p+1:k0_p+nk_p,1:nkel) &
1135- & = Kel(0:nci-1,1:nbnd, 1: nbnd_p, 1: nk_p,1:nkel)
1001+ CALL mp_circular_shift_left(nb, 1, world_comm )
1002+ CALL mp_circular_shift_left(bdsp, 1, world_comm )
1003+ CALL circular_shift_wrapper_r(nmf + 2,nqbz*nb_max*nb_max*nkel, world_comm, Kel)
1004+ DO ik = 1, nqbz
1005+ IF(mpime==0) Kel_all(0:nci-1, bdsp(2)+1:bdsp(2)+nb(2), bdsp(1)+1:bdsp(1)+nb(1), ik,1:nkel) &
1006+ & = Kel(0:nci-1, ik, 1: nb(2), 1: nb(1), 1:nkel)
1007+ !
1008+ END DO
11361009 END DO
11371010 !
11381011 IF(mpime == 0) THEN
@@ -1154,8 +1027,7 @@ SUBROUTINE write_Kel()
11541027 !
11551028 END IF
11561029 !
1157- IF(mpime==0) DEALLOCATE(Kel_all)
1158- DEALLOCATE(Kel)
1030+ DEALLOCATE(Kel_all, Kel)
11591031 !
11601032 END SUBROUTINE write_Kel
11611033 !
--- a/SCTK/src/sctk_kernel_weight.f90
+++ b/SCTK/src/sctk_kernel_weight.f90
@@ -25,6 +25,8 @@ FUNCTION Zweight(beta,x0,y0,z0) RESULT(Wz)
2525 REAL(dp) :: thr = 1e-3_dp, tzpx, tzmx, mm, mp, pm, pp, &
2626 & tx, ty, tz, x, y, z, bx, by, bz, txmy
2727 !
28+ Wz = 0.0_dp
29+ !
2830 IF(zero_kelvin) THEN
2931 IF(scdft_kernel == 1 .OR. scdft_kernel == 2) THEN
3032 Wz = -1d0/(x0+y0+z0)**2
@@ -219,6 +221,8 @@ FUNCTION Kweight(beta,x0,y0,z0) RESULT(Wk)
219221 REAL(dp) :: thr = 1e-3_dp, tx, ty, tz, tg2z, x, y, z, bxymin, bx, by, bz
220222 REAL(dp) :: g1 = 1.33_dp, g2 = 3.88_dp
221223 !
224+ Wk = 0.0_dp
225+ !
222226 IF(zero_kelvin) THEN
223227 IF(scdft_kernel == 1) THEN
224228 Wk = - 2.0_dp / (x0+y0+z0)
--- a/SCTK/src/sctk_stdin.f90
+++ b/SCTK/src/sctk_stdin.f90
@@ -166,7 +166,6 @@ SUBROUTINE stdin_scdft()
166166 !
167167 REAL(dp) :: temp
168168 LOGICAL :: spin_fluc
169- CHARACTER(256) :: elph, coulomb
170169 !
171170 NAMELIST /scdft/ temp, fbee, lbee, xic, nmf, nx, ne, emin, emax, lz_coulomb, electron_maxstep, &
172171 & conv_thr, fildyn, spin_fluc, scdft_kernel, freq_min, freq_min_ratio
--- a/SCTK/src/sctk_tetra.f90
+++ b/SCTK/src/sctk_tetra.f90
@@ -152,54 +152,50 @@ END SUBROUTINE calc_dosk
152152 !
153153 ! Integration weight with tetrahedron method
154154 !
155-SUBROUTINE tetraweight(nkd0,indx2,wghtd)
155+SUBROUTINE tetraweight(wghtd)
156156 !
157- USE wvfct, ONLY : nbnd
158- USE mp_pools, ONLY : inter_pool_comm
159157 USE kinds, ONLY : DP
160158 USE cell_base, ONLY : omega
161- USE wvfct, ONLY : et, nbnd
159+ USE wvfct, ONLY : et
162160 USE ktetra, ONLY : wlsm, tetra, ntetra
163161 USE klist, ONLY : nks
164162 USE ener, ONLY : ef
165163 USE lsda_mod, ONLY : nspin
166164 !
167- USE sctk_val, ONLY : nmf, nbnd_p, bnd0_p
165+ USE sctk_val, ONLY : nmf, nb, bdsp, nqbz
168166 !
169167 !
170168 IMPLICIT NONE
171169 !
172- INTEGER,INTENT(IN) :: nkd0, indx2(20,6 * nks)
173- COMPLEX(dp),INTENT(OUT) :: wghtd(nbnd*(nmf+1),nbnd_p,nkd0)
170+ COMPLEX(dp),INTENT(OUT) :: wghtd((nmf+1)*nb(2),nb(1),nks)
174171 !
175- INTEGER :: it, ibnd_p, ibnd_g, ii, ntetra0, ntetra1, itetra(4)
172+ INTEGER :: it, ibnd, ii, itetra(4)
176173 REAL(dp) :: thr = 1e-8_dp, V
177- REAL(dp) :: e(4), a(4,4), ei0(4,nbnd), ej0(4,nbnd), ei1(4), ej1(4,nbnd), tsmall(4,4)
178- COMPLEX(dp) :: w1(nbnd*(nmf+1),4), w2(nbnd*(nmf+1),4)
179- !
180- CALL divide(inter_pool_comm, ntetra, ntetra0, ntetra1)
174+ REAL(dp) :: e(4), a(4,4), ei0(4,nb(1)), ej0(4,nb(2)), ei1(4), ej1(4,nb(2)), tsmall(4,4)
175+ COMPLEX(dp) :: w1(nb(2)*(nmf+1),4), w2(nb(2)*(nmf+1),4)
181176 !
182- wghtd(1:nbnd*(nmf+1),1:nbnd_p,1:nkd0) = 0.0_dp
177+ wghtd(1:nb(2)*(nmf+1),1:nb(1),1:nks) = 0.0_dp
183178 !
184179 !$OMP PARALLEL DEFAULT(NONE) &
185- !$OMP & SHARED(ntetra0,ntetra1,nbnd,wlsm,nmf,et,wghtd,thr,ef,tetra,indx2,nks,nbnd_p,bnd0_p) &
186- !$OMP & PRIVATE(it,ii,ibnd_p,ibnd_g,itetra,ei0,ej0,ei1,ej1,w1,w2,a,e,V,tsmall)
180+ !$OMP & SHARED(wlsm,nmf,et,wghtd,thr,ef,tetra,nks,nb,bdsp,ntetra) &
181+ !$OMP & PRIVATE(it,ii,ibnd,itetra,ei0,ej0,ei1,ej1,w1,w2,a,e,V,tsmall)
187182 !
188- DO it = ntetra0, ntetra1
183+ DO it = 1, ntetra
189184 !
190- DO ibnd_g = 1, nbnd
191- ei0(1:4,ibnd_g) = MATMUL(wlsm(1:4,1:20), et(ibnd_g, tetra(1:20,it) )) - ef
192- ej0(1:4,ibnd_g) = MATMUL(wlsm(1:4,1:20), et(ibnd_g, tetra(1:20,it) + nks)) - ef
185+ DO ibnd = 1, nb(1)
186+ ei0(1:4,ibnd) = MATMUL(wlsm(1:4,1:20), et(ibnd+bdsp(1), tetra(1:20,it) )) - ef
187+ END DO
188+ DO ibnd = 1, nb(2)
189+ ej0(1:4,ibnd) = MATMUL(wlsm(1:4,1:20), et(ibnd+bdsp(2), tetra(1:20,it) + nks)) - ef
193190 END DO
194191 !
195192 !$OMP DO
196- DO ibnd_p = 1, nbnd_p
193+ DO ibnd = 1, nb(1)
197194 !
198- ibnd_g = ibnd_p + bnd0_p
199- w1(1:(nmf+1)*nbnd,1:4) = 0.0_dp
195+ w1(1:(nmf+1)*nb(2),1:4) = 0.0_dp
200196 !
201197 itetra(1) = 0
202- e(1:4) = ei0(1:4, ibnd_g)
198+ e(1:4) = ei0(1:4, ibnd)
203199 CALL hpsort (4, e, itetra)
204200 !
205201 DO ii = 1, 4
@@ -219,13 +215,13 @@ SUBROUTINE tetraweight(nkd0,indx2,wghtd)
219215 tsmall(3, 1:4) = (/a(1,3), 0.0_dp, a(3,1), 0.0_dp/)
220216 tsmall(4, 1:4) = (/a(1,4), 0.0_dp, 0.0_dp, a(4,1)/)
221217 !
222- ei1(1:4 ) = MATMUL(tsmall(1:4,1:4), ei0(itetra(1:4), ibnd_g))
223- ej1(1:4,1:nbnd) = MATMUL(tsmall(1:4,1:4), ej0(itetra(1:4), 1:nbnd))
218+ ei1(1:4 ) = MATMUL(tsmall(1:4,1:4), ei0(itetra(1:4), ibnd))
219+ ej1(1:4,1:nb(2)) = MATMUL(tsmall(1:4,1:4), ej0(itetra(1:4), 1:nb(2)))
224220 !
225221 CALL tetra2(ei1,ej1,w2)
226222 !
227- w1(1:(nmf+1)*nbnd,itetra(1:4)) = w1(1:(nmf+1)*nbnd, itetra(1:4)) &
228- & + V * MATMUL(w2(1:(nmf+1)*nbnd,1:4), tsmall(1:4,1:4))
223+ w1(1:(nmf+1)*nb(2),itetra(1:4)) = w1(1:(nmf+1)*nb(2), itetra(1:4)) &
224+ & + V * MATMUL(w2(1:(nmf+1)*nb(2),1:4), tsmall(1:4,1:4))
229225 !
230226 END IF
231227 !
@@ -242,13 +238,13 @@ SUBROUTINE tetraweight(nkd0,indx2,wghtd)
242238 tsmall(3, 1:4) = (/a(1,4), 0.0_dp, 0.0_dp, a(4,1)/)
243239 tsmall(4, 1:4) = (/0.0_dp, a(2,4), 0.0_dp, a(4,2)/)
244240 !
245- ei1(1:4 ) = MATMUL(tsmall(1:4,1:4), ei0(itetra(1:4), ibnd_g))
246- ej1(1:4,1:nbnd) = MATMUL(tsmall(1:4,1:4), ej0(itetra(1:4), 1:nbnd))
241+ ei1(1:4 ) = MATMUL(tsmall(1:4,1:4), ei0(itetra(1:4), ibnd))
242+ ej1(1:4,1:nb(2)) = MATMUL(tsmall(1:4,1:4), ej0(itetra(1:4), 1:nb(2)))
247243 !
248244 CALL tetra2(ei1,ej1,w2)
249245 !
250- w1(1:(nmf+1)*nbnd,itetra(1:4)) = w1(1:(nmf+1)*nbnd, itetra(1:4)) &
251- & + V * MATMUL(w2(1:(nmf+1)*nbnd,1:4), tsmall(1:4,1:4))
246+ w1(1:(nmf+1)*nb(2),itetra(1:4)) = w1(1:(nmf+1)*nb(2), itetra(1:4)) &
247+ & + V * MATMUL(w2(1:(nmf+1)*nb(2),1:4), tsmall(1:4,1:4))
252248 !
253249 END IF
254250 !
@@ -263,13 +259,13 @@ SUBROUTINE tetraweight(nkd0,indx2,wghtd)
263259 tsmall(3, 1:4) = (/0.0_dp, a(2,3), a(3,2), 0.0_dp/)
264260 tsmall(4, 1:4) = (/0.0_dp, a(2,4), 0.0_dp, a(4,2)/)
265261 !
266- ei1(1:4 ) = MATMUL(tsmall(1:4,1:4), ei0(itetra(1:4), ibnd_g))
267- ej1(1:4,1:nbnd) = MATMUL(tsmall(1:4,1:4), ej0(itetra(1:4), 1:nbnd))
262+ ei1(1:4 ) = MATMUL(tsmall(1:4,1:4), ei0(itetra(1:4), ibnd))
263+ ej1(1:4,1:nb(2)) = MATMUL(tsmall(1:4,1:4), ej0(itetra(1:4), 1:nb(2)))
268264 !
269265 CALL tetra2(ei1,ej1,w2)
270266 !
271- w1(1:(nmf+1)*nbnd,itetra(1:4)) = w1(1:(nmf+1)*nbnd, itetra(1:4)) &
272- & + V * MATMUL(w2(1:(nmf+1)*nbnd,1:4), tsmall(1:4,1:4))
267+ w1(1:(nmf+1)*nb(2),itetra(1:4)) = w1(1:(nmf+1)*nb(2), itetra(1:4)) &
268+ & + V * MATMUL(w2(1:(nmf+1)*nb(2),1:4), tsmall(1:4,1:4))
273269 !
274270 END IF
275271 !
@@ -284,13 +280,13 @@ SUBROUTINE tetraweight(nkd0,indx2,wghtd)
284280 tsmall(3, 1:4) = (/0.0_dp, a(2,3), a(3,2), 0.0_dp/)
285281 tsmall(4, 1:4) = (/0.0_dp, a(2,4), 0.0_dp, a(4,2)/)
286282 !
287- ei1(1:4 ) = MATMUL(tsmall(1:4,1:4), ei0(itetra(1:4), ibnd_g))
288- ej1(1:4,1:nbnd) = MATMUL(tsmall(1:4,1:4), ej0(itetra(1:4), 1:nbnd))
283+ ei1(1:4 ) = MATMUL(tsmall(1:4,1:4), ei0(itetra(1:4), ibnd))
284+ ej1(1:4,1:nb(2)) = MATMUL(tsmall(1:4,1:4), ej0(itetra(1:4), 1:nb(2)))
289285 !
290286 CALL tetra2(ei1,ej1,w2)
291287 !
292- w1(1:(nmf+1)*nbnd,itetra(1:4)) = w1(1:(nmf+1)*nbnd, itetra(1:4)) &
293- & + V * MATMUL(w2(1:(nmf+1)*nbnd,1:4), tsmall(1:4,1:4))
288+ w1(1:(nmf+1)*nb(2),itetra(1:4)) = w1(1:(nmf+1)*nb(2), itetra(1:4)) &
289+ & + V * MATMUL(w2(1:(nmf+1)*nb(2),1:4), tsmall(1:4,1:4))
294290 !
295291 END IF
296292 !
@@ -307,13 +303,13 @@ SUBROUTINE tetraweight(nkd0,indx2,wghtd)
307303 tsmall(3, 1:4) = (/0.0_dp, 0.0_dp, 1.0_dp, 0.0_dp/)
308304 tsmall(4, 1:4) = (/0.0_dp, 0.0_dp, a(3,4), a(4,3)/)
309305 !
310- ei1(1:4 ) = MATMUL(tsmall(1:4,1:4), ei0(itetra(1:4), ibnd_g))
311- ej1(1:4,1:nbnd) = MATMUL(tsmall(1:4,1:4), ej0(itetra(1:4), 1:nbnd))
306+ ei1(1:4 ) = MATMUL(tsmall(1:4,1:4), ei0(itetra(1:4), ibnd))
307+ ej1(1:4,1:nb(2)) = MATMUL(tsmall(1:4,1:4), ej0(itetra(1:4), 1:nb(2)))
312308 !
313309 CALL tetra2(ei1,ej1,w2)
314310 !
315- w1(1:(nmf+1)*nbnd,itetra(1:4)) = w1(1:(nmf+1)*nbnd, itetra(1:4)) &
316- & + V * MATMUL(w2(1:(nmf+1)*nbnd,1:4), tsmall(1:4,1:4))
311+ w1(1:(nmf+1)*nb(2),itetra(1:4)) = w1(1:(nmf+1)*nb(2), itetra(1:4)) &
312+ & + V * MATMUL(w2(1:(nmf+1)*nb(2),1:4), tsmall(1:4,1:4))
317313 !
318314 END IF
319315 !
@@ -328,13 +324,13 @@ SUBROUTINE tetraweight(nkd0,indx2,wghtd)
328324 tsmall(3, 1:4) = (/0.0_dp, a(2,4), 0.0_dp, a(4,2)/)
329325 tsmall(4, 1:4) = (/0.0_dp, 0.0_dp, a(3,4), a(4,3)/)
330326 !
331- ei1(1:4 ) = MATMUL(tsmall(1:4,1:4), ei0(itetra(1:4), ibnd_g))
332- ej1(1:4,1:nbnd) = MATMUL(tsmall(1:4,1:4), ej0(itetra(1:4), 1:nbnd))
327+ ei1(1:4 ) = MATMUL(tsmall(1:4,1:4), ei0(itetra(1:4), ibnd))
328+ ej1(1:4,1:nb(2)) = MATMUL(tsmall(1:4,1:4), ej0(itetra(1:4), 1:nb(2)))
333329 !
334330 CALL tetra2(ei1,ej1,w2)
335331 !
336- w1(1:(nmf+1)*nbnd,itetra(1:4)) = w1(1:(nmf+1)*nbnd, itetra(1:4)) &
337- & + V * MATMUL(w2(1:(nmf+1)*nbnd,1:4), tsmall(1:4,1:4))
332+ w1(1:(nmf+1)*nb(2),itetra(1:4)) = w1(1:(nmf+1)*nb(2), itetra(1:4)) &
333+ & + V * MATMUL(w2(1:(nmf+1)*nb(2),1:4), tsmall(1:4,1:4))
338334 !
339335 END IF
340336 !
@@ -349,13 +345,13 @@ SUBROUTINE tetraweight(nkd0,indx2,wghtd)
349345 tsmall(3, 1:4) = (/0.0_dp, a(2,4), 0.0_dp, a(4,2)/)
350346 tsmall(4, 1:4) = (/0.0_dp, 0.0_dp, a(3,4), a(4,3)/)
351347 !
352- ei1(1:4 ) = MATMUL(tsmall(1:4,1:4), ei0(itetra(1:4), ibnd_g))
353- ej1(1:4,1:nbnd) = MATMUL(tsmall(1:4,1:4), ej0(itetra(1:4), 1:nbnd))
348+ ei1(1:4 ) = MATMUL(tsmall(1:4,1:4), ei0(itetra(1:4), ibnd))
349+ ej1(1:4,1:nb(2)) = MATMUL(tsmall(1:4,1:4), ej0(itetra(1:4), 1:nb(2)))
354350 !
355351 CALL tetra2(ei1,ej1,w2)
356352 !
357- w1(1:(nmf+1)*nbnd,itetra(1:4)) = w1(1:(nmf+1)*nbnd, itetra(1:4)) &
358- & + V * MATMUL(w2(1:(nmf+1)*nbnd,1:4), tsmall(1:4,1:4))
353+ w1(1:(nmf+1)*nb(2),itetra(1:4)) = w1(1:(nmf+1)*nb(2), itetra(1:4)) &
354+ & + V * MATMUL(w2(1:(nmf+1)*nb(2),1:4), tsmall(1:4,1:4))
359355 !
360356 END IF
361357 !
@@ -363,13 +359,13 @@ SUBROUTINE tetraweight(nkd0,indx2,wghtd)
363359 !
364360 ! D - 1
365361 !
366- ei1(1:4 ) = ei0(itetra(1:4), ibnd_g)
367- ej1(1:4,1:nbnd) = ej0(itetra(1:4), 1:nbnd)
362+ ei1(1:4 ) = ei0(itetra(1:4), ibnd) ! TO BE FIXED
363+ ej1(1:4,1:nb(2)) = ej0(itetra(1:4), 1:nb(2)) ! TO BE FIXED
368364 !
369365 CALL tetra2(ei1,ej1,w2)
370366 !
371- w1(1:(nmf+1)*nbnd,1:4) = w1(1:(nmf+1)*nbnd,1:4) &
372- & + w2(1:(nmf+1)*nbnd,1:4)
367+ w1(1:(nmf+1)*nb(2),1:4) = w1(1:(nmf+1)*nb(2),1:4) &
368+ & + w2(1:(nmf+1)*nb(2),1:4)
373369 !
374370 ELSE
375371 !
@@ -377,10 +373,10 @@ SUBROUTINE tetraweight(nkd0,indx2,wghtd)
377373 !
378374 END IF
379375 !
380- wghtd(1:(nmf+1)*nbnd,ibnd_p,indx2(1:20,it)) = wghtd(1:(nmf+1)*nbnd,ibnd_p, indx2(1:20,it)) &
381- & + MATMUL(w1(1:(nmf+1)*nbnd,1:4), wlsm(1:4,1:20))
376+ wghtd(1:(nmf+1)*nb(2),ibnd,tetra(1:20,it)) = wghtd(1:(nmf+1)*nb(2),ibnd, tetra(1:20,it)) &
377+ & + MATMUL(w1(1:(nmf+1)*nb(2),1:4), wlsm(1:4,1:20))
382378 !
383- END DO ! ibnd_p
379+ END DO ! ibnd
384380 !$OMP END DO NOWAIT
385381 !
386382 END DO ! it
@@ -388,10 +384,10 @@ SUBROUTINE tetraweight(nkd0,indx2,wghtd)
388384 !$OMP END PARALLEL
389385 !
390386 IF(nspin == 1) THEN
391- wghtd(1:(nmf+1)*nbnd,1:nbnd_p,1:nkd0) = wghtd(1:(nmf+1)*nbnd,1:nbnd_p,1:nkd0) &
387+ wghtd(1:(nmf+1)*nb(2),1:nb(1),1:nqbz) = wghtd(1:(nmf+1)*nb(2),1:nb(1),1:nqbz) &
392388 & * 4.0_dp / (REAL(ntetra, dp) * omega)
393389 ELSE
394- wghtd(1:(nmf+1)*nbnd,1:nbnd_p,1:nkd0) = wghtd(1:(nmf+1)*nbnd,1:nbnd_p,1:nkd0) &
390+ wghtd(1:(nmf+1)*nb(2),1:nb(1),1:nqbz) = wghtd(1:(nmf+1)*nb(2),1:nb(1),1:nqbz) &
395391 & * 2.0_dp / (REAL(ntetra, dp) * omega)
396392 END IF
397393 !
@@ -404,23 +400,22 @@ SUBROUTINE tetra2(ei0,ej0,w0)
404400 ! This routine take the unoccupied region.
405401 !
406402 USE kinds, ONLY : dp
407- USE wvfct, ONLY : nbnd
408403 !
409- USE sctk_val, ONLY : nmf
404+ USE sctk_val, ONLY : nmf, nb
410405 !
411406 IMPLICIT NONE
412407 !
413- REAL(dp),INTENT(IN) :: ei0(4), ej0(4,nbnd)
414- COMPLEX(dp),INTENT(OUT) :: w0(0:nmf,nbnd,4)
408+ REAL(dp),INTENT(IN) :: ei0(4), ej0(4,nb(2))
409+ COMPLEX(dp),INTENT(OUT) :: w0(0:nmf,nb(2),4)
415410 !
416411 INTEGER :: ii, ibnd, itetra(4)
417412 REAL(dp) :: V, de(4), thr = 1.0e-8_dp
418413 REAL(dp) :: e(4), a(4,4), tsmall(4,4)
419414 COMPLEX(dp) :: w1(0:nmf,4)
420415 !
421- w0(0:nmf,1:nbnd,1:4) = 0.0_dp
416+ w0(0:nmf,1:nb(2),1:4) = 0.0_dp
422417 !
423- DO ibnd = 1, nbnd
418+ DO ibnd = 1, nb(2)
424419 !
425420 e(1:4) = ej0(1:4,ibnd)
426421 itetra(1) = 0
--- a/SCTK/src/sctk_val.f90
+++ b/SCTK/src/sctk_val.f90
@@ -12,6 +12,9 @@ MODULE sctk_val
1212 IMPLICIT NONE
1313 !
1414 INTEGER,SAVE :: &
15+ & nb_max, & !<
16+ & nb(2), & !<
17+ & bdsp(2), & !<
1518 & scdft_kernel, & !< 1: Luders2005, 2: Sanna2020
1619 & lsf, & !< =2 for spin-fluctuation, =0 for non-sf
1720 & fbee, & !< First band for Vc
@@ -27,12 +30,6 @@ MODULE sctk_val
2730 & ngv, & !< # of G-vector in ecutfock
2831 & ngv0, & !< # of G-vector in ecutfock
2932 & ngv1, & !< # of G-vector in ecutfock
30- & nk_p, & !< # of k par PE
31- & nk_p_max, & !< # of k par PE (Max across inter-pool)
32- & k0_p, & !< First k minus 1 for each PE
33- & nbnd_p, & !< # of bands par PE
34- & nbnd_p_max, & !< # of bands par PE (Max across intra-pool)
35- & bnd0_p, & !< First band minus 1 for each PE
3633 & nx !< = 2 * ne - 1
3734 !
3835 REAL(8),SAVE :: &
--- a/SCTK/src/sctk_wfc.f90
+++ b/SCTK/src/sctk_wfc.f90
@@ -18,117 +18,106 @@ SUBROUTINE get_wfcg()
1818 USE wvfct, ONLY : nbnd, npwx
1919 USE kinds, ONLY : DP
2020 USE io_files, ONLY : prefix, tmp_dir
21- USE mp_world, ONLY : world_comm
22- USE mp_pools, ONLY : intra_pool_comm, inter_pool_comm, me_pool, nproc_pool
21+ USE mp_world, ONLY : world_comm, mpime
22+ USE mp_pools, ONLY : inter_pool_comm, intra_pool_comm
2323 USE mp, ONLY : mp_max, mp_sum, mp_bcast, mp_circular_shift_left, mp_barrier
2424 USE disp, ONLY : nq1, nq2, nq3
2525 USE io_global, ONLY : stdout
2626 USE noncollin_module, ONLY : npol
2727 !
2828 USE sctk_coulomb, ONLY : circular_shift_wrapper_c
29- USE sctk_val, ONLY : nqbz, wfc0, igv, npw, nk_p, nk_p_max, k0_p, nbnd_p, nbnd_p_max, bnd0_p
29+ USE sctk_val, ONLY : nqbz, wfc0, igv, npw, nb, bdsp, nb_max
30+ USE mp_kel, ONLY : mp_start_kel, band1_comm, band2_comm
3031 !
3132 IMPLICIT NONE
3233 !
3334 INTEGER :: fi, ii, ngw0
3435 CHARACTER(LEN=320) :: filename
3536 !
36- INTEGER :: ik0, ispin, npol0, ierr, ikv(3), ibnd, iproc, ik_p, ik_g
37+ INTEGER :: ik0, ispin, npol0, ierr, ikv(3), ibnd, ik
3738 REAL(dp) :: scalef, xk0(3), bvec0(3,3), xk1(3), RAM_wfc
3839 LOGICAL :: gamma_only
39- COMPLEX(DP),ALLOCATABLE :: wfc0_all(:,:,:,:,:)
40+ COMPLEX(DP),ALLOCATABLE :: wfc0k(:,:,:)
4041 !
4142 CHARACTER(LEN=6), EXTERNAL :: int_to_char
4243 INTEGER, EXTERNAL :: find_free_unit
4344 !
4445 fi = find_free_unit()
4546 !
46- CALL divide(inter_pool_comm, nqbz, k0_p, nk_p)
47- k0_p = k0_p - 1
48- nk_p = nk_p - k0_p
49- nk_p_max = nk_p
50- CALL mp_max(nk_p_max, world_comm)
51- WRITE(stdout,'(7x,"k-point per PE : ",i0)') nk_p_max
47+ CALL mp_start_kel()
48+ CALL divide(band1_comm, nbnd, bdsp(1), nb(1))
49+ CALL divide(band2_comm, nbnd, bdsp(2), nb(2))
50+ !
51+ bdsp(1) = bdsp(1) - 1
52+ nb(1) = nb(1) - bdsp(1)
5253 !
53- CALL divide(intra_pool_comm, nbnd, bnd0_p, nbnd_p)
54- bnd0_p = bnd0_p - 1
55- nbnd_p = nbnd_p - bnd0_p
56- nbnd_p_max = nbnd_p
57- CALL mp_max(nbnd_p_max, world_comm)
58- WRITE(stdout,'(7x,"Bands per PE : ",i0)') nbnd_p_max
54+ bdsp(2) = bdsp(2) - 1
55+ nb(2) = nb(2) - bdsp(2)
56+ !
57+ nb_max = MAXVAL(nb(1:2))
58+ CALL mp_max(nb_max, world_comm)
5959 !
6060 CALL mp_sum(npwx, intra_pool_comm)
6161 CALL mp_max(npwx, inter_pool_comm)
6262 !
63- RAM_wfc = REAL(npwx,dp)*REAL(nbnd_p_max,dp)*REAL(npol,dp)*REAL(nk_p_max,dp)*2.0_dp
63+ RAM_wfc = REAL(npwx,dp)*REAL(nb_max,dp)*REAL(npol,dp)*REAL(nqbz,dp)*2.0_dp
6464 CALL mp_sum(RAM_wfc, world_comm)
6565 WRITE(stdout,'(7x,"Total RAM for input WFC : ",e10.2," GB")') RAM_wfc*16.0e-9_dp
6666 CALL mp_barrier(world_comm)
67- ALLOCATE(wfc0(npwx,nbnd_p_max,npol,nk_p_max,2), igv(3,npwx,nk_p_max,2), &
68- & npw(nk_p_max,2))
69- igv(1:3,1:npwx, 1:nk_p_max,1:2) = 0
70- wfc0( 1:npwx,1:nbnd_p_max,1:npol,1:nk_p_max,1:2) = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
67+ ALLOCATE(wfc0(npwx,nb_max,npol,nqbz,2), igv(3,npwx,nqbz,2), &
68+ & npw(nqbz,2), wfc0k(npwx,nbnd,npol))
69+ igv(1:3,1:npwx, 1:nqbz,1:2) = 0
70+ wfc0( 1:npwx,1:nb_max,1:npol,1:nqbz,1:2) = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
7171 !
7272 ! Read Wfc(k)
7373 !
74- IF(me_pool == 0) THEN
75- !
76- ALLOCATE(wfc0_all(npwx,nbnd,npol,nk_p_max,2))
74+ DO ii = 1, 2
7775 !
78- DO ii = 1, 2
76+ DO ik = 1, nqbz
7977 !
80- DO ik_p = 1, nk_p
81- !
82- ik_g = ik_p + k0_p
78+ IF(mpime == 0) THEN
8379 !
8480 filename = TRIM(tmp_dir) // TRIM(prefix) // '.save/' // 'wfc' // &
85- & TRIM(int_to_char(nqbz * (ii-1) + ik_g))
81+ & TRIM(int_to_char(nqbz * (ii-1) + ik))
8682 !
8783 OPEN(unit=fi, file=trim(filename)//'.dat', form='unformatted', status='old', iostat=ierr)
8884 READ(fi) ik0, xk0(1:3), ispin, gamma_only, scalef
89- READ(fi) ngw0, npw(ik_p,ii), npol0, ibnd
85+ READ(fi) ngw0, npw(ik,ii), npol0, ibnd
9086 READ(fi) bvec0(1:3,1:3)
91- READ(fi) igv(1:3,1:npw(ik_p,ii),ik_p,ii)
87+ READ(fi) igv(1:3,1:npw(ik,ii),ik,ii)
9288 DO ibnd = 1, nbnd
93- READ(fi) wfc0_all(1:npw(ik_p,ii),ibnd,1:npol,ik_p,ii)
89+ READ(fi) wfc0k(1:npw(ik,ii),ibnd,1:npol)
9490 END DO
9591 CLOSE(unit=fi, status='keep')
9692 !
9793 ! Verify k-point
9894 !
99- ikv(1) = (ik_g - 1) / (nq3*nq2)
100- ikv(2) = (ik_g - 1 - ikv(1)*nq2*nq3) / nq3
101- ikv(3) = ik_g - 1 - ikv(1)*nq2*nq3 - ikv(2)*nq3
95+ ikv(1) = (ik - 1) / (nq3*nq2)
96+ ikv(2) = (ik - 1 - ikv(1)*nq2*nq3) / nq3
97+ ikv(3) = ik - 1 - ikv(1)*nq2*nq3 - ikv(2)*nq3
10298 WHERE(ikv(1:3)*2 + ii - 1 >= (/nq1,nq2,nq3/)) ikv(1:3) = ikv(1:3) - (/nq1,nq2,nq3/)
10399 !
104100 xk1(1:3) = (REAL(ikv(1:3), dp) + 0.5_dp*REAL(ii-1, dp)) / REAL((/nq1, nq2, nq3/), dp)
105101 xk1(1:3) = MATMUL(bvec0(1:3, 1:3), xk1(1:3))
106102 IF(ANY(ABS(xk1(1:3) - xk0(1:3)) > 1.0e-5_dp)) THEN
107- WRITE(stdout,'("ik = ",i5,", ii = ",i5)') ik_g, ii
103+ WRITE(stdout,'("ik = ",i5,", ii = ",i5)') ik, ii
108104 WRITE(stdout,'("In ",3f15.10,", Required ",3f15.10)') xk0(1:3), xk1(1:3)
109- CALL errore ('get_wfcg', 'Invalid k-point (order)', ik_g)
105+ CALL errore ('get_wfcg', 'Invalid k-point (order)', ik)
110106 END IF
111107 !
112- END DO ! ik_p
108+ END IF
109+ !
110+ CALL mp_bcast(npw( ik, ii), 0, world_comm)
111+ CALL mp_bcast(igv(1:3,1:npwx, ik, ii), 0, world_comm)
112+ CALL mp_bcast(wfc0k (1:npwx, 1:nbnd, 1:npol), 0, world_comm)
113113 !
114- END DO ! ii
114+ wfc0(1:npwx, 1:nb(ii),1:npol,ik,ii) = wfc0k(1:npwx,bdsp(ii)+1:bdsp(ii)+nb(ii),1:npol)
115+ !
116+ END DO ! ik
115117 !
116- END IF
118+ END DO ! ii
117119 !
118- ! Scatter wfc0_all(bnd0_p+1:bnd0_p+nbnd_p) -> wfc0(1:nbnd_p) intra pool
119- !
120- CALL mp_bcast(npw,0,intra_pool_comm)
121- CALL mp_bcast(igv,0,intra_pool_comm)
122- !
123- DO iproc = 1, nproc_pool
124- IF(me_pool == 0) wfc0(1:npwx, 1: nbnd_p,1:npol,1:nk_p_max,1:2) &
125- & = wfc0_all(1:npwx,bnd0_p+1:bnd0_p+nbnd_p,1:npol,1:nk_p_max,1:2)
126- CALL circular_shift_wrapper_c(npwx,nbnd_p_max*npol*nk_p_max*2,intra_pool_comm,wfc0)
127- CALL mp_circular_shift_left(nbnd_p, 1, intra_pool_comm)
128- CALL mp_circular_shift_left(bnd0_p, 1, intra_pool_comm)
129- END DO
130- !
131- IF(me_pool == 0) DEALLOCATE(wfc0_all)
120+ DEALLOCATE(wfc0k)
132121 !
133122 END SUBROUTINE get_wfcg
134123 !
@@ -150,44 +139,44 @@ SUBROUTINE fft_wfc()
150139 USE uspp, ONLY : nkb, okvan
151140 USE disp, ONLY : nq1, nq2, nq3
152141 !
153- USE sctk_val, ONLY : igv, npw, wfc0, k0_p, nk_p, nk_p_max, nbnd_p, nbnd_p_max, &
142+ USE sctk_val, ONLY : igv, npw, wfc0, nb, nb_max, &
154143 & wfc, wfcq, becwfc, becwfcq, nqbz
155144 !
156145 IMPLICIT NONE
157146 !
158- INTEGER :: ig, ik_p, ik_g, ib, igv2(3), ig2, ipol, ikv(3), ii, igm
147+ INTEGER :: ig, ik, ib, igv2(3), ig2, ipol, ikv(3), ii, igm
159148 REAL(DP) :: xk(3), RAM_wfc
160149 INTEGER,ALLOCATABLE :: igk(:)
161150 COMPLEX(DP),ALLOCATABLE :: vkb(:,:)
162151 !
163152 ALLOCATE(vkb(npwx,nkb), igk(npwx))
164153 !
165- RAM_wfc = REAL(dfftt%nnr, dp)*REAL(nbnd_p_max, dp)*REAL(npol, dp)*REAL(nk_p_max, dp)*4.0_dp
154+ RAM_wfc = REAL(dfftt%nnr, dp)*REAL(nb_max, dp)*REAL(npol, dp)*REAL(nqbz, dp)*4.0_dp
166155 CALL mp_sum(RAM_wfc, world_comm)
167156 WRITE(stdout,'(7x,"Total RAM for WFC(r) : ",e10.2," GB")') &
168157 & REAL(dfftt%nnr,dp)*REAL(npol*4,dp)*REAL(nbnd,dp)*REAL(nqbz,dp)*16.0e-9_dp
169158 !
170- ALLOCATE(wfc( dfftt%nnr, nbnd_p_max, npol, nk_p_max, 2), &
171- & wfcq(dfftt%nnr, nbnd_p_max, npol, nk_p_max, 2))
172- ALLOCATE(becwfc( nkb, nbnd_p_max, npol, nk_p_max, 2), &
173- & becwfcq( nkb, nbnd_p_max, npol, nk_p_max, 2))
174- wfc( 1:dfftt%nnr,1:nbnd_p_max,1:npol,1:nk_p_max,1:2) = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
175- IF(okvan) becwfc(1:nkb,1:nbnd_p_max,1:npol,1:nk_p_max,1:2) = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
159+ ALLOCATE(wfc( dfftt%nnr, nb_max, npol, nqbz, 2), &
160+ & wfcq(dfftt%nnr, nb_max, npol, nqbz, 2))
161+ ALLOCATE(becwfc( nkb, nb_max, npol, nqbz, 2), &
162+ & becwfcq( nkb, nb_max, npol, nqbz, 2))
163+ wfc( 1:dfftt%nnr,1:nb_max,1:npol,1:nqbz,1:2) = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
164+ IF(okvan) becwfc(1:nkb,1:nb_max,1:npol,1:nqbz,1:2) = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
176165 !
177166 ! Fourier trans. wfc(G) -> wfc(r)
178167 !
179168 DO ii = 1, 2
180- DO ik_p = 1, nk_p
169+ DO ik = 1, nqbz
181170 !
182- DO ig = 1, npw(ik_p,ii)
183- igv2(1:3) = MODULO(igv(1:3,ig,ik_p,ii), (/dfftt%nr1, dfftt%nr2, dfftt%nr3/))
171+ DO ig = 1, npw(ik, ii)
172+ igv2(1:3) = MODULO(igv(1:3,ig,ik,ii), (/dfftt%nr1, dfftt%nr2, dfftt%nr3/))
184173 ig2 = 1 + igv2(1) + dfftt%nr1 * igv2(2) + dfftt%nr1 * dfftt%nr2 * igv2(3)
185- wfc(ig2, 1:nbnd_p, 1:npol, ik_p, ii) = wfc0(ig, 1:nbnd_p, 1:npol, ik_p, ii)
174+ wfc(ig2, 1:nb(ii), 1:npol, ik, ii) = wfc0(ig, 1:nb(ii), 1:npol, ik, ii)
186175 END DO
187176 !
188177 DO ipol = 1, npol
189- DO ib = 1, nbnd_p
190- CALL cfft3d (wfc(1:dfftt%nnr, ib, ipol, ik_p, ii), &
178+ DO ib = 1, nb(ii)
179+ CALL cfft3d (wfc(1:dfftt%nnr, ib, ipol, ik, ii), &
191180 & dfftt%nr1, dfftt%nr2, dfftt%nr3, dfftt%nr1, dfftt%nr2, dfftt%nr3, 1, 1)
192181 END DO
193182 END DO
@@ -199,30 +188,28 @@ SUBROUTINE fft_wfc()
199188 !
200189 IF ( okvan ) THEN
201190 DO ii = 1, 2
202- DO ik_p = 1, nk_p
203- !
204- ik_g = ik_p + k0_p
191+ DO ik = 1, nqbz
205192 !
206- ikv(1) = (ik_g - 1) / (nq3*nq2)
207- ikv(2) = (ik_g - 1 - ikv(1)*nq2*nq3) / nq3
208- ikv(3) = ik_g - 1 - ikv(1)*nq2*nq3 - ikv(2)*nq3
193+ ikv(1) = (ik - 1) / (nq3*nq2)
194+ ikv(2) = (ik - 1 - ikv(1)*nq2*nq3) / nq3
195+ ikv(3) = ik - 1 - ikv(1)*nq2*nq3 - ikv(2)*nq3
209196 WHERE(ikv(1:3)*2 + ii - 1 >= (/nq1,nq2,nq3/)) ikv(1:3) = ikv(1:3) - (/nq1,nq2,nq3/)
210197 !
211198 xk(1:3) = (REAL(ikv(1:3), dp) + 0.5_dp*REAL(ii-1, dp)) / REAL((/nq1, nq2, nq3/), dp)
212199 xk(1:3) = MATMUL(bg(1:3, 1:3), xk(1:3))
213200 !
214- DO ig = 1, npw(ik_p,ii)
201+ DO ig = 1, npw(ik,ii)
215202 DO igm = 1, ngm
216- IF(ALL(igv(1:3,ig,ik_p,ii) == mill(1:3,igm))) THEN
203+ IF(ALL(igv(1:3,ig,ik,ii) == mill(1:3,igm))) THEN
217204 igk(ig) = igm
218205 EXIT
219206 END IF
220207 END DO
221208 END DO
222- CALL init_us_2(npw(ik_p,ii), igk, xk, vkb)
209+ CALL init_us_2(npw(ik,ii), igk, xk, vkb)
223210 DO ipol = 1, npol
224- CALL calbec(npw(ik_p,ii), vkb, wfc0(1:npwx, 1:nbnd_p, ipol,ik_p,ii), &
225- & becwfc(1:nkb, 1:nbnd_p, ipol, ik_p, ii), nbnd_p)
211+ CALL calbec(npw(ik,ii), vkb, wfc0(1:npwx, 1:nb(ii), ipol,ik,ii), &
212+ & becwfc(1:nkb, 1:nb(ii), ipol, ik, ii), nb(ii))
226213 END DO
227214 END DO
228215 END DO
Show on old repository browser