schwz  Generated automatically from develop
conv_tools.hpp (92dbd95)
1 /*******************************<SCHWARZ LIB LICENSE>***********************
2 Copyright (c) 2019, the SCHWARZ LIB authors
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8 
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 
16 3. Neither the name of the copyright holder nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 ******************************<SCHWARZ LIB LICENSE>*************************/
32 
33 
34 #ifndef conv_tools_hpp
35 #define conv_tools_hpp
36 
37 #include <algorithm>
38 #include <functional>
39 #include <memory>
40 
41 
42 #include <communicate.hpp>
43 #include <settings.hpp>
44 
45 
46 namespace schwz {
52 namespace conv_tools {
53 
54 
55 template <typename ValueType, typename IndexType>
56 void put_all_local_residual_norms(
57  const Settings &settings, Metadata<ValueType, IndexType> &metadata,
58  ValueType &local_resnorm,
59  std::shared_ptr<gko::matrix::Dense<ValueType>> &local_residual_vector,
60  MPI_Win &window_residual_vector)
61 {
62  auto num_subdomains = metadata.num_subdomains;
63  auto my_rank = metadata.my_rank;
64  auto l_res_vec = local_residual_vector->get_values();
65  auto iter = metadata.iter_count;
66  auto mpi_vtype = schwz::mpi::get_mpi_datatype(l_res_vec[my_rank]);
67 
68  l_res_vec[my_rank] = std::min(l_res_vec[my_rank], local_resnorm);
69  for (auto j = 0; j < num_subdomains; j++) {
70  auto gres =
71  metadata.post_process_data.global_residual_vector_out[my_rank];
72  if (j != my_rank && iter > 0 && l_res_vec[my_rank] != gres[iter - 1]) {
73  MPI_Put(&l_res_vec[my_rank], 1, mpi_vtype, j, my_rank, 1, mpi_vtype,
74  window_residual_vector);
75  if (settings.comm_settings.enable_flush_all) {
76  MPI_Win_flush(j, window_residual_vector);
77  } else if (settings.comm_settings.enable_flush_local) {
78  MPI_Win_flush_local(j, window_residual_vector);
79  }
80  }
81  }
82 }
83 
84 
85 template <typename ValueType, typename IndexType, typename MixedValueType>
86 void propagate_all_local_residual_norms(
87  const Settings &settings, Metadata<ValueType, IndexType> &metadata,
89  &comm_s,
90  ValueType &local_resnorm,
91  std::shared_ptr<gko::matrix::Dense<ValueType>> &local_residual_vector,
92  MPI_Win &window_residual_vector)
93 {
94  auto num_subdomains = metadata.num_subdomains;
95  auto my_rank = metadata.my_rank;
96  auto l_res_vec = local_residual_vector->get_values();
97  auto iter = metadata.iter_count;
98  auto global_put = comm_s.global_put->get_data();
99  auto neighbors_out = comm_s.neighbors_out->get_data();
100  auto max_valtype = std::numeric_limits<ValueType>::max();
101  auto mpi_vtype = schwz::mpi::get_mpi_datatype(l_res_vec[my_rank]);
102 
103  l_res_vec[my_rank] = std::min(l_res_vec[my_rank], local_resnorm);
104  auto gres = metadata.post_process_data.global_residual_vector_out[my_rank];
105  for (auto i = 0; i < comm_s.num_neighbors_out; i++) {
106  if ((global_put[i])[0] > 0) {
107  auto p = neighbors_out[i];
108  int flag = 0;
109  if (iter == 0 || l_res_vec[my_rank] != gres[iter - 1]) flag = 1;
110  if (flag == 0) {
111  for (auto j = 0; j < num_subdomains; j++) {
112  if (j != p && iter > 0 && l_res_vec[j] != max_valtype &&
113  l_res_vec[j] !=
114  (metadata.post_process_data
115  .global_residual_vector_out[j])[iter - 1]) {
116  flag++;
117  }
118  }
119  }
120  if (flag > 0) {
121  for (auto j = 0; j < num_subdomains; j++) {
122  if ((j == my_rank &&
123  (iter == 0 || l_res_vec[my_rank] != gres[iter - 1])) ||
124  (j != p && iter > 0 && l_res_vec[j] != max_valtype &&
125  l_res_vec[j] !=
126  (metadata.post_process_data
127  .global_residual_vector_out[j])[iter - 1])) {
128  // double result;
129  MPI_Accumulate(&l_res_vec[j], 1, mpi_vtype, p, j, 1,
130  mpi_vtype, MPI_MIN,
131  window_residual_vector);
132  }
133  }
134  if (settings.comm_settings.enable_flush_all) {
135  MPI_Win_flush(p, window_residual_vector);
136  } else if (settings.comm_settings.enable_flush_local) {
137  MPI_Win_flush_local(p, window_residual_vector);
138  }
139  }
140  }
141  }
142 }
143 
144 // This implementation is from Yamazaki et.al 2019
145 // (https://doi.org/10.1016/j.parco.2019.05.004)
146 template <typename ValueType, typename IndexType>
147 void global_convergence_check_onesided_tree(
148  const Settings &settings, const Metadata<ValueType, IndexType> &metadata,
149  std::shared_ptr<gko::Array<IndexType>> &convergence_vector,
150  int &converged_all_local, int &num_converged_procs,
151  MPI_Win &window_convergence)
152 {
153  int ione = 1;
154  auto num_subdomains = metadata.num_subdomains;
155  auto my_rank = metadata.my_rank;
156  auto conv_vector = convergence_vector->get_data();
157 
158  // if the child locally converged and if first time local convergence
159  // detected, push up
160  if (((conv_vector[0] == 1 &&
161  conv_vector[1] == 1) || // both children converged
162  (conv_vector[0] == 1 &&
163  my_rank == num_subdomains / 2 - 1) || // only one child
164  (my_rank >= num_subdomains / 2 && conv_vector[0] != 2)) && // leaf
165  converged_all_local > 0) // locally deteced global convergence
166  {
167  if (my_rank == 0) {
168  // on the top, start going down
169  conv_vector[2] = 1;
170  } else {
171  // push to parent
172  int p = (my_rank - 1) / 2;
173  int id = (my_rank % 2 == 0 ? 1 : 0);
174  MPI_Put(&ione, 1, MPI_INT, p, id, 1, MPI_INT, window_convergence);
175  if (settings.comm_settings.enable_flush_all) {
176  MPI_Win_flush(p, window_convergence);
177  } else if (settings.comm_settings.enable_flush_local) {
178  MPI_Win_flush_local(p, window_convergence);
179  }
180  }
181  conv_vector[0] = 2; // to push up only once
182  }
183 
184  // if first time global convergence detected, push down
185  if (conv_vector[2] == 1) {
186  int p = 2 * my_rank + 1;
187  if (p < num_subdomains) {
188  MPI_Put(&ione, 1, MPI_INT, p, 2, 1, MPI_INT, window_convergence);
189  if (settings.comm_settings.enable_flush_all) {
190  MPI_Win_flush(p, window_convergence);
191  } else if (settings.comm_settings.enable_flush_local) {
192  MPI_Win_flush_local(p, window_convergence);
193  }
194  }
195  p++;
196  if (p < num_subdomains) {
197  MPI_Put(&ione, 1, MPI_INT, p, 2, 1, MPI_INT, window_convergence);
198  if (settings.comm_settings.enable_flush_all) {
199  MPI_Win_flush(p, window_convergence);
200  } else if (settings.comm_settings.enable_flush_local) {
201  MPI_Win_flush_local(p, window_convergence);
202  }
203  }
204  conv_vector[1]++;
205  num_converged_procs = num_subdomains;
206  } else {
207  num_converged_procs = 0;
208  }
209 }
210 
211 
212 template <typename ValueType, typename IndexType, typename MixedValueType>
213 void global_convergence_decentralized(
214  const Settings &settings, const Metadata<ValueType, IndexType> &metadata,
216  &comm_s,
217  std::shared_ptr<gko::Array<IndexType>> &convergence_vector,
218  std::shared_ptr<gko::Array<IndexType>> &convergence_sent,
219  std::shared_ptr<gko::Array<IndexType>> &convergence_local,
220  int &converged_all_local, int &num_converged_procs,
221  MPI_Win &window_convergence)
222 {
223  auto num_subdomains = metadata.num_subdomains;
224  auto my_rank = metadata.my_rank;
225  auto conv_vector = convergence_vector->get_data();
226  auto conv_sent = convergence_sent->get_data();
227  auto conv_local = convergence_local->get_data();
228  auto global_put = comm_s.global_put->get_data();
229  auto neighbors_out = comm_s.neighbors_out->get_data();
230  if (settings.convergence_settings.enable_accumulate) {
231  if (converged_all_local == 1) {
232  for (auto j = 0; j < num_subdomains; j++) {
233  if (j != my_rank) {
234  int ione = 1;
235  MPI_Accumulate(&ione, 1, MPI_INT, j, 0, 1, MPI_INT, MPI_SUM,
236  window_convergence);
237  if (settings.comm_settings.enable_flush_all) {
238  MPI_Win_flush(j, window_convergence);
239  } else if (settings.comm_settings.enable_flush_local) {
240  MPI_Win_flush_local(j, window_convergence);
241  }
242  } else {
243  conv_vector[0]++;
244  }
245  }
246  }
247  num_converged_procs = conv_vector[0];
248  } else {
249  if (converged_all_local == 1) {
250  conv_vector[my_rank] = 1;
251  }
252  num_converged_procs = 0;
253  std::copy(conv_vector, conv_vector + num_subdomains, conv_local);
254  num_converged_procs =
255  std::accumulate(conv_vector, conv_vector + num_subdomains, 0);
256  for (auto i = 0; i < comm_s.num_neighbors_out; i++) {
257  if ((global_put[i])[0] > 0) {
258  auto p = neighbors_out[i];
259  int ione = 1;
260  for (auto j = 0; j < num_subdomains; j++) {
261  if (conv_sent[j] == 0 && conv_local[j] == 1) {
262  MPI_Put(&ione, 1, MPI_INT, p, j, 1, MPI_INT,
263  window_convergence);
264  }
265  }
266  if (settings.comm_settings.enable_flush_all) {
267  MPI_Win_flush(p, window_convergence);
268  } else if (settings.comm_settings.enable_flush_local) {
269  MPI_Win_flush_local(p, window_convergence);
270  }
271  }
272  }
273  std::copy(conv_local, conv_local + num_subdomains, conv_sent);
274  }
275 }
276 
277 /*
278 // Explicit Instantiations
279 #define DECLARE_FUNCTION(ValueType, IndexType) \
280  void put_all_local_residual_norms( \
281  const Settings &, Metadata<ValueType, IndexType> &, ValueType &, \
282  std::shared_ptr<gko::matrix::Dense<ValueType>> &, MPI_Win &);
283 INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(DECLARE_FUNCTION);
284 #undef DECLARE_FUNCTION
285 
286 #define DECLARE_FUNCTION2(ValueType, IndexType, MixedValueType) \
287  void propagate_all_local_residual_norms( \
288  const Settings &, Metadata<ValueType, IndexType> &, \
289  struct Communicate<ValueType, IndexType, MixedValueType>::comm_struct \
290  &, \
291  ValueType &, std::shared_ptr<gko::matrix::Dense<ValueType>> &, \
292  MPI_Win &);
293 INSTANTIATE_FOR_EACH_VALUE_MIXEDVALUE_AND_INDEX_TYPE(DECLARE_FUNCTION2);
294 #undef DECLARE_FUNCTION2
295 
296 #define DECLARE_FUNCTION3(ValueType, IndexType) \
297  void global_convergence_check_onesided_tree( \
298  const Settings &, const Metadata<ValueType, IndexType> &, \
299  std::shared_ptr<gko::Array<IndexType>> &, int &, int &, MPI_Win &);
300 INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(DECLARE_FUNCTION3);
301 #undef DECLARE_FUNCTION3
302 
303 #define DECLARE_FUNCTION4(ValueType, IndexType, MixedValueType) \
304  void global_convergence_decentralized( \
305  const Settings &, const Metadata<ValueType, IndexType> &, \
306  struct Communicate<ValueType, IndexType, MixedValueType>::comm_struct \
307  &, \
308  std::shared_ptr<gko::Array<IndexType>> &, \
309  std::shared_ptr<gko::Array<IndexType>> &, \
310  std::shared_ptr<gko::Array<IndexType>> &, int &, int &, MPI_Win &);
311 INSTANTIATE_FOR_EACH_VALUE_MIXEDVALUE_AND_INDEX_TYPE(DECLARE_FUNCTION4);
312 #undef DECLARE_FUNCTION4
313 */
314 
315 } // namespace conv_tools
316 } // namespace schwz
317 
318 
319 #endif // conv_tools.hpp
bool enable_flush_local
Use local flush.
Definition: settings.hpp:251
std::shared_ptr< gko::Array< IndexType > > neighbors_out
The neighbors this subdomain has to send data to.
Definition: communicate.hpp:87
The solver metadata struct.
Definition: settings.hpp:319
IndexType iter_count
The iteration count of the solver.
Definition: settings.hpp:389
The communication struct used to store the communication data.
Definition: communicate.hpp:67
std::shared_ptr< gko::Array< IndexType * > > global_put
The array containing the number of elements that each subdomain sends from the other.
Definition: communicate.hpp:120
int num_neighbors_out
The number of neighbors this subdomain has to send data to.
Definition: communicate.hpp:77
gko::size_type num_subdomains
The number of subdomains used within the solver.
Definition: settings.hpp:358
The struct that contains the solver settings and the parameters to be set by the user.
Definition: settings.hpp:77
int my_rank
The rank of the subdomain.
Definition: settings.hpp:363
The Schwarz wrappers namespace.
Definition: comm_helpers.hpp:49
bool enable_flush_all
Use flush all.
Definition: settings.hpp:256