schwz  Generated automatically from develop
settings.hpp (cd61077)
1 
2 /*******************************<SCHWARZ LIB LICENSE>***********************
3 Copyright (c) 2019, the SCHWARZ LIB authors
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 
10 1. Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16 
17 3. Neither the name of the copyright holder nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
20 
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 ******************************<SCHWARZ LIB LICENSE>*************************/
33 
34 
35 #ifndef settings_hpp
36 #define settings_hpp
37 
38 
39 #include <map>
40 #include <memory>
41 #include <string>
42 #include <tuple>
43 #include <vector>
44 
45 
46 #include <mpi.h>
47 #include <ginkgo/ginkgo.hpp>
48 
49 #include <device_guard.hpp>
50 #include <exception_helpers.hpp>
51 #include <gather.hpp>
52 #include <mpi_datatype.hpp>
53 #include <scatter.hpp>
54 
55 
56 #if SCHW_HAVE_METIS
57 #include <metis.h>
58 #define metis_indextype idx_t
59 #else
60 #define metis_indextype gko::int32
61 #endif
62 
63 
64 #define MINIMAL_OVERLAP 2
65 
66 
67 namespace schwz {
68 
69 
77 struct Settings {
81  std::string executor_string;
82 
86  std::shared_ptr<gko::Executor> executor = gko::ReferenceExecutor::create();
87 
91  std::shared_ptr<device_guard> cuda_device_guard;
92 
97  partition_regular = 0x0,
98  partition_regular2d = 0x4,
99  partition_metis = 0x1,
100  partition_zoltan = 0x2,
101  partition_custom = 0x3
102  };
103  partition_settings partition = partition_settings::partition_regular;
104 
108  gko::int32 overlap = MINIMAL_OVERLAP;
109 
113  std::string matrix_filename = "null";
114 
119  bool explicit_laplacian = true;
120 
124  bool use_mixed_precision = false;
125 
129  bool enable_random_rhs = false;
130 
134  bool print_matrices = false;
135 
139  bool debug_print = false;
140 
145  direct_solver_cholmod = 0x0,
146  direct_solver_umfpack = 0x5,
147  direct_solver_ginkgo = 0x1,
148  iterative_solver_ginkgo = 0x2,
149  iterative_solver_dealii = 0x3,
150  solver_custom = 0x4
151  };
152  local_solver_settings local_solver =
153  local_solver_settings::iterative_solver_ginkgo;
154 
158  bool non_symmetric_matrix = false;
159 
163  unsigned int restart_iter = 1u;
164 
169 
177 
181  std::string metis_objtype;
182 
186  bool use_precond = false;
187 
191  bool write_debug_out = false;
192 
197 
202  bool enable_logging = false;
203 
207  bool write_perm_data = false;
208 
212  int shifted_iter = 1;
213 
217  struct comm_settings {
221  bool enable_onesided = false;
222 
226  bool enable_overlap = false;
227 
231  bool enable_put = false;
232 
236  bool enable_get = true;
237 
241  bool stage_through_host = false;
242 
246  bool enable_one_by_one = false;
247 
251  bool enable_flush_local = false;
252 
256  bool enable_flush_all = true;
257 
261  bool enable_lock_local = false;
262 
266  bool enable_lock_all = true;
267  };
269 
274  bool put_all_local_residual_norms = true;
275  bool enable_global_simple_tree = false;
276  bool enable_decentralized_leader_election = false;
277  bool enable_global_check = true;
278  bool enable_accumulate = false;
279 
280  bool enable_global_check_iter_offset = false;
281 
282  enum local_convergence_crit {
283  residual_based = 0x0,
284  solution_based = 0x1
285  };
286 
287  local_convergence_crit convergence_crit =
288  local_convergence_crit::solution_based;
289  };
291 
295  std::string factorization = "cholmod";
296 
300  std::string reorder;
301 
302  Settings(std::string executor_string = "reference")
303  : executor_string(executor_string)
304  {}
305 };
306 
307 
318 template <typename ValueType, typename IndexType>
319 struct Metadata {
324 
328  gko::size_type global_size = 0;
329 
333  gko::size_type oned_laplacian_size = 0;
334 
338  gko::size_type local_size = 0;
339 
343  gko::size_type local_size_x = 0;
344 
348  gko::size_type local_size_o = 0;
349 
353  gko::size_type overlap_size = 0;
354 
358  gko::size_type num_subdomains = 1;
359 
363  int my_rank;
364 
369 
374 
380 
385 
389  IndexType iter_count;
390 
395  ValueType tolerance;
396 
402 
406  IndexType max_iters;
407 
411  IndexType local_max_iters;
412 
416  IndexType updated_max_iters;
417 
421  std::string local_precond;
422 
427 
431  ValueType current_residual_norm = -1.0;
432 
436  ValueType min_residual_norm = -1.0;
437 
442  std::vector<std::tuple<int, int, int, std::string, std::vector<ValueType>>>
444 
449  std::vector<std::tuple<int, std::vector<std::tuple<int, int>>,
450  std::vector<std::tuple<int, int>>, int, int>>
452 
458  std::vector<std::vector<ValueType>> global_residual_vector_out;
459  std::vector<ValueType> local_residual_vector_out;
460  std::vector<ValueType> local_converged_iter_count;
461  std::vector<ValueType> local_converged_resnorm;
462  std::vector<ValueType> local_timestamp;
463  };
465 
466  double init_mpi_wtime = 0.0;
470  std::shared_ptr<gko::Array<IndexType>> global_to_local;
471 
475  std::shared_ptr<gko::Array<IndexType>> local_to_global;
476 
480  gko::Array<IndexType> overlap_row;
481 
485  std::shared_ptr<gko::Array<IndexType>> first_row;
486 
490  std::shared_ptr<gko::Array<IndexType>> permutation;
491 
495  std::shared_ptr<gko::Array<IndexType>> i_permutation;
496 };
497 
498 
508 #define MEASURE_ELAPSED_FUNC_TIME(_func, _id, _rank, _name, _iter) \
509  { \
510  auto start_time = std::chrono::steady_clock::now(); \
511  _func; \
512  auto elapsed_time = std::chrono::duration<ValueType>( \
513  std::chrono::steady_clock::now() - start_time); \
514  if (_iter == 0) { \
515  std::vector<ValueType> temp_vec(1, elapsed_time.count()); \
516  metadata.time_struct.push_back( \
517  std::make_tuple(_id, _rank, _iter, #_name, temp_vec)); \
518  } else { \
519  std::get<2>(metadata.time_struct[_id]) = _iter; \
520  (std::get<4>(metadata.time_struct[_id])) \
521  .push_back(elapsed_time.count()); \
522  } \
523  }
524 
525 
526 #define INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(_macro) \
527  template _macro(float, gko::int32); \
528  template _macro(double, gko::int32); \
529  template _macro(float, gko::int64); \
530  template _macro(double, gko::int64);
531 
532 
533 #define INSTANTIATE_FOR_EACH_VALUE_MIXEDVALUE_AND_INDEX_TYPE(_macro) \
534  template _macro(double, gko::int32, float); \
535  template _macro(double, gko::int32, double); \
536  template _macro(double, gko::int64, float); \
537  template _macro(double, gko::int64, double);
538 
539 // #define INSTANTIATE_FOR_EACH_VALUE_MIXEDVALUE_AND_INDEX_TYPE(_macro) \
540 // template _macro(float, gko::int32, float); \
541 // template _macro(double, gko::int32, float); \
542 // template _macro(double, gko::int32, double); \
543 // template _macro(float, gko::int64, float); \
544 // template _macro(double, gko::int64, float); \
545 // template _macro(double, gko::int64, double);
546 
547 
548 // explicit instantiations for schwz
549 #define DECLARE_METADATA(ValueType, IndexType) \
550  struct Metadata<ValueType, IndexType>
551 INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(DECLARE_METADATA);
552 #undef DECLARE_METADATA
553 
554 
555 } // namespace schwz
556 
557 
558 #endif // settings.hpp
bool enable_lock_local
Use local locks.
Definition: settings.hpp:261
ValueType local_solver_tolerance
The tolerance of the local solver in case of an iterative solve.
Definition: settings.hpp:401
bool enable_flush_local
Use local flush.
Definition: settings.hpp:251
std::shared_ptr< gko::Array< IndexType > > global_to_local
The mapping containing the global to local indices.
Definition: settings.hpp:470
bool non_symmetric_matrix
Is the matrix non-symmetric ? , Use GMRES for local solves.
Definition: settings.hpp:158
IndexType updated_max_iters
The updated maximum iteration count of the local iterative solver.
Definition: settings.hpp:416
The solver metadata struct.
Definition: settings.hpp:319
IndexType max_iters
The maximum iteration count of the Schwarz solver.
Definition: settings.hpp:406
IndexType iter_count
The iteration count of the solver.
Definition: settings.hpp:389
int shifted_iter
Iteration shift for node local communication.
Definition: settings.hpp:212
bool enable_one_by_one
Push each element separately directly into the buffer.
Definition: settings.hpp:246
unsigned int restart_iter
The restart iter for the GMRES solver.
Definition: settings.hpp:163
int reset_local_crit_iter
The global iter at which to reset the local solver criterion.
Definition: settings.hpp:168
bool explicit_laplacian
Flag if the laplacian matrix should be generated within the library.
Definition: settings.hpp:119
bool use_precond
Enable the block jacobi local preconditioner for the local solver.
Definition: settings.hpp:186
partition_settings
The partition algorithm to be used for partitioning the matrix.
Definition: settings.hpp:96
std::vector< std::tuple< int, std::vector< std::tuple< int, int > >, std::vector< std::tuple< int, int > >, int, int > > comm_data_struct
The struct used to measure the timings of each function within the solver loop.
Definition: settings.hpp:451
bool enable_put
Put the data to the window using MPI_Put rather than get.
Definition: settings.hpp:231
std::vector< std::tuple< int, int, int, std::string, std::vector< ValueType > > > time_struct
The struct used to measure the timings of each function within the solver loop.
Definition: settings.hpp:443
std::string local_precond
Local preconditioner.
Definition: settings.hpp:421
bool enable_lock_all
Use lock all.
Definition: settings.hpp:266
bool write_debug_out
Enable the writing of debug out to file.
Definition: settings.hpp:191
bool stage_through_host
Stage the MPI transfers through the host.
Definition: settings.hpp:241
gko::int32 overlap
The overlap between the subdomains.
Definition: settings.hpp:108
The settings for the various available communication paradigms.
Definition: settings.hpp:217
unsigned int precond_max_block_size
The maximum block size for the preconditioner.
Definition: settings.hpp:426
std::shared_ptr< gko::Array< IndexType > > local_to_global
The mapping containing the local to global indices.
Definition: settings.hpp:475
std::string metis_objtype
This setting defines the objective type for the metis partitioning.
Definition: settings.hpp:181
std::shared_ptr< gko::Array< IndexType > > permutation
The permutation used for the re-ordering.
Definition: settings.hpp:490
local_solver_settings
The local solver algorithm for the local subdomain solves.
Definition: settings.hpp:144
int local_num_procs
The local number of procs in the subdomain.
Definition: settings.hpp:373
The struct that contains the solver settings and the parameters to be set by the user.
Definition: settings.hpp:77
int my_local_rank
The local rank of the subdomain.
Definition: settings.hpp:368
bool naturally_ordered_factor
Disables the re-ordering of the matrix before computing the triangular factors during the CHOLMOD fac...
Definition: settings.hpp:176
std::shared_ptr< device_guard > cuda_device_guard
The ginkgo executor the code is to be executed on.
Definition: settings.hpp:91
bool write_perm_data
Enable the local permutations from CHOLMOD to a file.
Definition: settings.hpp:207
bool debug_print
Flag to enable some debug printing.
Definition: settings.hpp:139
int num_threads
The number of threads used within the solver for each subdomain.
Definition: settings.hpp:384
bool enable_logging
Flag to enable logging for local iterative solvers.
Definition: settings.hpp:202
int my_rank
The rank of the subdomain.
Definition: settings.hpp:363
std::string executor_string
The string that contains the ginkgo executor paradigm.
Definition: settings.hpp:81
gko::Array< IndexType > overlap_row
The overlap row indices.
Definition: settings.hpp:480
std::string factorization
The factorization for the local direct solver.
Definition: settings.hpp:295
The Schwarz wrappers namespace.
Definition: comm_helpers.hpp:49
bool enable_overlap
Enable explicit overlap between communication and computation.
Definition: settings.hpp:226
std::shared_ptr< gko::Array< IndexType > > i_permutation
The inverse permutation used for the re-ordering.
Definition: settings.hpp:495
std::shared_ptr< gko::Executor > executor
The ginkgo executor the code is to be executed on.
Definition: settings.hpp:86
The various convergence settings available.
Definition: settings.hpp:273
std::shared_ptr< gko::Array< IndexType > > first_row
The starting row of each subdomain in the matrix.
Definition: settings.hpp:485
MPI_Comm mpi_communicator
The MPI communicator.
Definition: settings.hpp:323
bool use_mixed_precision
Flag if mixed precision should be used.
Definition: settings.hpp:124
bool write_iters_and_residuals
Enable writing the iters and residuals to a file.
Definition: settings.hpp:196
bool enable_get
Get the data to the window using MPI_Get rather than put.
Definition: settings.hpp:236
bool enable_flush_all
Use flush all.
Definition: settings.hpp:256
bool enable_random_rhs
Flag to enable a random rhs.
Definition: settings.hpp:129
IndexType local_max_iters
The maximum iteration count of the local iterative solver.
Definition: settings.hpp:411
bool print_matrices
Flag to enable printing of matrices.
Definition: settings.hpp:134
ValueType tolerance
The tolerance of the complete solver.
Definition: settings.hpp:395
bool enable_onesided
Enable one-sided communication.
Definition: settings.hpp:221
std::string reorder
The reordering for the local solve.
Definition: settings.hpp:300
int comm_size
The number of subdomains used within the solver, size of the communicator.
Definition: settings.hpp:379
std::string matrix_filename
The string that contains the matrix file name to read from .
Definition: settings.hpp:113
The struct used for storing data for post-processing.
Definition: settings.hpp:457