Octopus
states_elec_parallel.F90
Go to the documentation of this file.
1!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
21
27 use batch_oct_m
28 use debug_oct_m
29 use global_oct_m
30 use mesh_oct_m
32 use mpi_oct_m
38
39 implicit none
40
41 private
42
43 public :: &
51
55 end interface states_elec_parallel_gather
56
57
58contains
60 !
61 subroutine states_elec_parallel_blacs_blocksize(st, namespace, mesh, blocksize, total_np)
62 type(states_elec_t), intent(in) :: st
63 type(namespace_t), intent(in) :: namespace
64 type(mesh_t), intent(in) :: mesh
65 integer, intent(out) :: blocksize(2)
66 integer, intent(out) :: total_np
67
69
70#ifdef HAVE_SCALAPACK
71 ! We need to select the block size of the decomposition. This is
72 ! tricky, since not all processors have the same number of
73 ! points.
74 !
75 ! What we do for now is to use the maximum of the number of
76 ! points and we set to zero the remaining points.
77
78 if (.not. st%scalapack_compatible) then
79 message(1) = "Attempt to use ScaLAPACK when processes have not been distributed in compatible layout."
80 message(2) = "You need to set ScaLAPACKCompatible = yes in the input file and re-run."
81 call messages_fatal(2, only_root_writes = .true., namespace=namespace)
82 end if
83
84 if (mesh%parallel_in_domains) then
85 blocksize(1) = maxval(mesh%pv%np_local_vec) + (st%d%dim - 1) * &
86 maxval(mesh%pv%np_local_vec + mesh%pv%np_bndry + mesh%pv%np_ghost)
87 else
88 blocksize(1) = mesh%np + (st%d%dim - 1)*mesh%np_part
89 end if
90
91 if (st%parallel_in_states) then
92 blocksize(2) = maxval(st%dist%num)
93 else
94 blocksize(2) = st%nst
95 end if
96
97 total_np = blocksize(1)*st%dom_st_proc_grid%nprow
98
99
100 assert(st%d%dim*mesh%np_part >= blocksize(1))
101#else
102 blocksize(1) = 0
103 blocksize(2) = 0
104 total_np = 0
105#endif
106
109
110 ! ------------------------------------------------------------
118 !
120 type(states_elec_t), intent(inout) :: this
121
122 integer :: ib, iqn
123
125
126 assert(allocated(this%group%psib))
127
128 if (this%mpi_grp%size == 1) then
130 return
131 end if
132
133 assert(.not. allocated(this%group%rma_win))
134
135 safe_allocate(this%group%rma_win(1:this%group%nblocks, 1:this%nik))
136
137 do iqn = this%d%kpt%start, this%d%kpt%end
138 do ib = 1, this%group%nblocks
139 if (this%group%block_is_local(ib, iqn)) then
140 call this%group%psib(ib, iqn)%remote_access_start(this%mpi_grp, this%group%rma_win(ib, iqn))
141 else
142#ifdef HAVE_MPI
143 ! create an empty window
144 call mpi_win_create(0, int(0, mpi_address_kind), 1, &
145 mpi_info_null, this%mpi_grp%comm, this%group%rma_win(ib, iqn), mpi_err)
146#endif
147 end if
148 end do
149 end do
150
153
154 ! ---------------------------------------------------------
159 !
161 type(states_elec_t), intent(inout) :: this
162
163 integer :: ib, iqn
164
165 if (.not. allocated(this%group%rma_win)) return
166
168
169 assert(allocated(this%group%psib))
170
171 do iqn = this%d%kpt%start, this%d%kpt%end
172 do ib = 1, this%group%nblocks
173 if (this%group%block_is_local(ib, iqn)) then
174 call this%group%psib(ib, iqn)%remote_access_stop(this%group%rma_win(ib, iqn))
175 else
176#ifdef HAVE_MPI
177 call mpi_win_free(this%group%rma_win(ib, iqn), mpi_err)
178#endif
179 end if
180 end do
181 end do
182
183 safe_deallocate_a(this%group%rma_win)
184
187
189 subroutine states_elec_parallel_allocate_batch(st, psib, np, ib, ik, packed)
190 type(states_elec_t), intent(in) :: st
191 type(wfs_elec_t), intent(inout) :: psib
192 integer, intent(in) :: np
193 integer, intent(in) :: ib
194 integer, intent(in) :: ik
195 logical, optional, intent(in) :: packed
196
198
199 if (states_are_real(st)) then
200 call dwfs_elec_init(psib, st%d%dim, st%group%block_range(ib, 1), st%group%block_range(ib, 2), &
201 np, ik, packed=packed)
202 else
203 call zwfs_elec_init(psib, st%d%dim, st%group%block_range(ib, 1), st%group%block_range(ib, 2), &
204 np, ik, packed=packed)
205 end if
206
209
210 ! --------------------------------------
217 subroutine states_elec_parallel_get_block(this, mesh, ib, iqn, psib)
218 type(states_elec_t), target, intent(in) :: this
219 type(mesh_t), intent(in) :: mesh
220 integer, intent(in) :: ib
221 integer, intent(in) :: iqn
222 class(wfs_elec_t), pointer, intent(out) :: psib
223
224 integer :: total_size
225#ifdef HAVE_MPI
226#endif
227
229
230 call profiling_in("STATES_GET_BLOCK")
231
232 if (this%group%block_is_local(ib, iqn)) then
233 psib => this%group%psib(ib, iqn)
234 call profiling_out("STATES_GET_BLOCK")
236 return
237 else
238 safe_allocate_type(wfs_elec_t, psib)
239 call states_elec_parallel_allocate_batch(this, psib, mesh%np_part, ib, iqn)
240
241 assert(allocated(this%group%rma_win))
242
243 call psib%do_pack(copy = .false.)
244 assert(product(psib%pack_size) < huge(0_int32))
245 total_size = product(int(psib%pack_size, int32))
246
247#ifdef HAVE_MPI
248
249 call profiling_in("STATES_GET_BLOCK_MPI")
250 call mpi_win_lock(mpi_lock_shared, this%group%block_node(ib), 0, this%group%rma_win(ib, iqn), mpi_err)
251
252 if (states_are_real(this)) then
253 call mpi_get(psib%dff_pack(1, 1), total_size, mpi_double_precision, &
254 this%group%block_node(ib), int(0, mpi_address_kind), total_size, mpi_double_precision, &
255 this%group%rma_win(ib, iqn), mpi_err)
256 else
257 call mpi_get(psib%zff_pack(1, 1), total_size, mpi_double_complex, &
258 this%group%block_node(ib), int(0, mpi_address_kind), total_size, mpi_double_complex, &
259 this%group%rma_win(ib, iqn), mpi_err)
260 end if
261
262 call mpi_win_unlock(this%group%block_node(ib), this%group%rma_win(ib, iqn), mpi_err)
263
264 call profiling_out("STATES_GET_BLOCK_MPI")
265#endif
266 end if
267
268 call profiling_out("STATES_GET_BLOCK")
269
271 end subroutine states_elec_parallel_get_block
272
273 ! --------------------------------------
274 !
275 subroutine states_elec_parallel_release_block(this, ib, psib)
276 type(states_elec_t), target, intent(in) :: this
277 integer, intent(in) :: ib
278 class(wfs_elec_t), pointer :: psib
279
281
282 if (this%group%block_is_local(ib, psib%ik)) then
283 nullify(psib)
284 else
285 call psib%end()
286 safe_deallocate_p(psib)
287 end if
288
291
292 ! --------------------------------------
293
294#include "undef.F90"
295#include "real.F90"
296#include "states_elec_parallel_inc.F90"
297
298#include "undef.F90"
299#include "complex.F90"
300#include "states_elec_parallel_inc.F90"
301#include "undef.F90"
302
303
305
306!! Local Variables:
307!! mode: f90
308!! coding: utf-8
309!! End:
This module implements batches of mesh functions.
Definition: batch.F90:133
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:118
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:160
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:420
integer, public mpi_err
used to store return values of mpi calls
Definition: mpi.F90:269
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:623
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:552
pure logical function, public states_are_real(st)
This module provides routines for communicating states when using states parallelization.
subroutine, public states_elec_parallel_remote_access_stop(this)
stop remote memory access for states on other processors
subroutine dstates_elec_parallel_gather_3(st, dims, psi)
gather distributed states into a local array
subroutine dstates_elec_parallel_gather_1(st, aa)
gather a one-dimensional array, distributed over states
subroutine zstates_elec_parallel_gather_1(st, aa)
gather a one-dimensional array, distributed over states
subroutine, public states_elec_parallel_get_block(this, mesh, ib, iqn, psib)
retrieve wave functions from a different node
subroutine, public states_elec_parallel_allocate_batch(st, psib, np, ib, ik, packed)
Initializes a wfs_elec_t for np points, given block index ib, ad the k-point index ik.
subroutine, public states_elec_parallel_remote_access_start(this)
start remote memory access for states on other processors
subroutine zstates_elec_parallel_gather_3(st, dims, psi)
gather distributed states into a local array
subroutine, public states_elec_parallel_blacs_blocksize(st, namespace, mesh, blocksize, total_np)
determine the block size for a BLACS distribution
subroutine, public states_elec_parallel_release_block(this, ib, psib)
subroutine, public zwfs_elec_init(this, dim, st_start, st_end, np, ik, special, packed)
initialize an empty wfs_elec_t object
Definition: wfs_elec.F90:537
subroutine, public dwfs_elec_init(this, dim, st_start, st_end, np, ik, special, packed)
initialize an empty wfs_elec_t object
Definition: wfs_elec.F90:394
Describes mesh distribution to nodes.
Definition: mesh.F90:186
The states_elec_t class contains all electronic wave functions.
batches of electronic states
Definition: wfs_elec.F90:138
int true(void)