schwz  Generated automatically from develop
cusparse_helpers.hpp (5a15602)
1 
2 /*******************************<SCHWARZ LIB LICENSE>***********************
3 Copyright (c) 2019, the SCHWARZ LIB authors
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 
10 1. Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16 
17 3. Neither the name of the copyright holder nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
20 
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 ******************************<SCHWARZ LIB LICENSE>*************************/
33 
34 
35 #ifndef cusparse_helpers_hpp
36 #define cusparse_helpers_hpp
37 
38 
39 #include <memory>
40 #include <vector>
41 
42 
43 #include <cuda_runtime.h>
44 #include <cusparse.h>
45 #include <exception_helpers.hpp>
46 #include <settings.hpp>
47 #include <solve.hpp>
48 
49 
50 namespace schwz {
51 namespace CusparseWrappers {
52 
53 
54 template <typename ValueType, typename IndexType>
55 void initialize(const Settings &settings,
56  const Metadata<ValueType, IndexType> &metadata,
57  struct Solve<ValueType, IndexType>::cusparse &cusparse,
58  const std::shared_ptr<gko::matrix::Csr<ValueType, IndexType>>
59  &triangular_factor,
60  std::shared_ptr<gko::matrix::Dense<ValueType>> &local_solution)
61 {
62  auto handle = (static_cast<gko::CudaExecutor *>(settings.executor.get()))
63  ->get_cusparse_handle();
64  auto num_rows = triangular_factor->get_size()[0];
65  auto sol_size = local_solution->get_size()[0];
66  auto num_rhs = local_solution->get_size()[1];
67  auto factor_nnz =
68  triangular_factor->get_num_stored_elements(); // Check if this is
69  // actually equal to nnz
70  auto row_ptrs = triangular_factor->get_const_row_ptrs();
71  auto col_idxs = triangular_factor->get_const_col_idxs();
72  auto factor_values = triangular_factor->get_const_values();
73  auto sol_values = local_solution->get_values();
74  auto one = 1.0;
75 
76  cusparse.policy = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
77  cusparse.algo = 0;
78  cusparse.info = NULL;
79  cusparse.L_factor_descr = NULL;
80  cusparse.L_factor_info = NULL;
81  cusparse.L_factor_work_size = 0;
82  cusparse.U_factor_descr = NULL;
83  cusparse.U_factor_info = NULL;
84  cusparse.U_factor_work_size = 0;
85  SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseCreate(&handle));
86  SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(
87  cusparseCreateSolveAnalysisInfo(&cusparse.info));
88 
89  /* configuration of matrices */
90  SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(
91  cusparseCreateCsrsm2Info(&cusparse.L_factor_info));
92  SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(
93  cusparseCreateMatDescr(&cusparse.L_factor_descr));
94  cusparseSetMatIndexBase(cusparse.L_factor_descr, CUSPARSE_INDEX_BASE_ZERO);
95  cusparseSetMatType(cusparse.L_factor_descr, CUSPARSE_MATRIX_TYPE_GENERAL);
96  cusparseSetMatFillMode(cusparse.L_factor_descr, CUSPARSE_FILL_MODE_UPPER);
97  cusparseSetMatDiagType(cusparse.L_factor_descr,
98  CUSPARSE_DIAG_TYPE_NON_UNIT);
99 
100  SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(
101  cusparseCreateCsrsm2Info(&cusparse.U_factor_info));
102  SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(
103  cusparseCreateMatDescr(&cusparse.U_factor_descr));
104  cusparseSetMatIndexBase(cusparse.U_factor_descr, CUSPARSE_INDEX_BASE_ZERO);
105  cusparseSetMatType(cusparse.U_factor_descr, CUSPARSE_MATRIX_TYPE_GENERAL);
106  cusparseSetMatFillMode(cusparse.U_factor_descr, CUSPARSE_FILL_MODE_UPPER);
107  cusparseSetMatDiagType(cusparse.U_factor_descr,
108  CUSPARSE_DIAG_TYPE_NON_UNIT);
109 
110  SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrsm2_bufferSizeExt(
111  handle, cusparse.algo, CUSPARSE_OPERATION_NON_TRANSPOSE,
112  CUSPARSE_OPERATION_NON_TRANSPOSE, num_rows, num_rhs, factor_nnz, &one,
113  cusparse.U_factor_descr, // descriptor
114  factor_values, row_ptrs, col_idxs, sol_values, sol_size,
115  cusparse.U_factor_info, cusparse.policy, &cusparse.U_factor_work_size));
116  // > L solve
117  SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrsm2_bufferSizeExt(
118  handle, cusparse.algo, CUSPARSE_OPERATION_TRANSPOSE,
119  CUSPARSE_OPERATION_NON_TRANSPOSE, num_rows, num_rhs, factor_nnz, &one,
120  cusparse.L_factor_descr, // descriptor
121  factor_values, row_ptrs, col_idxs, sol_values, sol_size,
122  cusparse.L_factor_info, cusparse.policy, &cusparse.L_factor_work_size));
123 
124  // size_t lwork = (gpu_struct->lwork_L > gpu_struct->lwork_U ?
125  // gpu_struct->lwork_L : gpu_struct->lwork_U); gpu_struct->lwork_L =
126  // lwork; gpu_struct->lwork_U = lwork;
127 
128  // allocate workspace
129  if (cusparse.L_factor_work_vec != nullptr) {
130  cudaFree(cusparse.L_factor_work_vec);
131  }
132  SCHWARZ_ASSERT_NO_CUDA_ERRORS(cudaMalloc(
133  (void **)&cusparse.L_factor_work_vec, cusparse.L_factor_work_size));
134 
135  if (cusparse.U_factor_work_vec != nullptr) {
136  cudaFree(cusparse.U_factor_work_vec);
137  }
138  SCHWARZ_ASSERT_NO_CUDA_ERRORS(cudaMalloc(
139  (void **)&cusparse.U_factor_work_vec, cusparse.U_factor_work_size));
140 
141  // Analyze U solve.
142  SCHWARZ_ASSERT_NO_CUDA_ERRORS(cudaDeviceSynchronize());
143  SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrsm2_analysis(
144  handle, cusparse.algo, CUSPARSE_OPERATION_NON_TRANSPOSE,
145  CUSPARSE_OPERATION_NON_TRANSPOSE, num_rows, num_rhs, factor_nnz, &one,
146  cusparse.U_factor_descr, // descriptor
147  factor_values, row_ptrs, col_idxs, sol_values, sol_size,
148  cusparse.U_factor_info, cusparse.policy, cusparse.U_factor_work_vec));
149  SCHWARZ_ASSERT_NO_CUDA_ERRORS(cudaDeviceSynchronize());
150 
151  // Analyze L solve.
152  SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrsm2_analysis(
153  handle, cusparse.algo, CUSPARSE_OPERATION_TRANSPOSE,
154  CUSPARSE_OPERATION_NON_TRANSPOSE, num_rows, num_rhs, factor_nnz, &one,
155  cusparse.L_factor_descr, // descriptor
156  factor_values, row_ptrs, col_idxs, sol_values, sol_size,
157  cusparse.L_factor_info, cusparse.policy, cusparse.L_factor_work_vec));
158  SCHWARZ_ASSERT_NO_CUDA_ERRORS(cudaDeviceSynchronize());
159 }
160 
161 template <typename ValueType, typename IndexType>
162 void solve(const Settings &settings,
163  const Metadata<ValueType, IndexType> &metadata,
164  struct Solve<ValueType, IndexType>::cusparse &cusparse,
165  const std::shared_ptr<gko::matrix::Csr<ValueType, IndexType>>
166  &triangular_factor,
167  std::shared_ptr<gko::matrix::Dense<ValueType>> &local_solution)
168 {
169  auto handle = (static_cast<gko::CudaExecutor *>(settings.executor.get()))
170  ->get_cusparse_handle();
171  // cusparseOperation_t t_flag;
172 
173  // if (transpose_flag == "upper")
174  // {
175  // t_flag = CUSPARSE_OPERATION_NON_TRANSPOSE;
176 
177  // }
178  // else if (transpose_flag == "lower")
179  // {
180  // t_flag = CUSPARSE_OPERATION_TRANSPOSE;
181  // }
182  // else
183  // {
184  // std::cout
185  // << " transpose flag needs to be non-transpose or transpose, Check
186  // the calling function"
187  // << std::endl;
188  // }
189  auto num_rows = triangular_factor->get_size()[0];
190  auto sol_size = local_solution->get_size()[0];
191  auto num_rhs = local_solution->get_size()[1];
192  auto factor_nnz =
193  triangular_factor->get_num_stored_elements(); // Check if this is
194  // actually equal to nnz
195  auto row_ptrs = triangular_factor->get_const_row_ptrs();
196  auto col_idxs = triangular_factor->get_const_col_idxs();
197  auto factor_values = triangular_factor->get_const_values();
198  auto sol_values = local_solution->get_values();
199  auto one = 1.0;
200  SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrsm2_solve(
201  handle, cusparse.algo, CUSPARSE_OPERATION_TRANSPOSE,
202  CUSPARSE_OPERATION_NON_TRANSPOSE, num_rows, num_rhs, factor_nnz, &one,
203  cusparse.L_factor_descr, factor_values, row_ptrs, col_idxs, sol_values,
204  sol_size, cusparse.L_factor_info, cusparse.policy,
205  cusparse.L_factor_work_vec));
206 
207  SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrsm2_solve(
208  handle, cusparse.algo, CUSPARSE_OPERATION_NON_TRANSPOSE,
209  CUSPARSE_OPERATION_NON_TRANSPOSE, num_rows, num_rhs, factor_nnz, &one,
210  cusparse.U_factor_descr, factor_values, row_ptrs, col_idxs, sol_values,
211  sol_size, cusparse.U_factor_info, cusparse.policy,
212  cusparse.U_factor_work_vec));
213 }
214 
215 // Weird bug with template parameter fro cusparse struct
216 // being asked for class template instead of a basic typename.
217 template <typename ValueType, typename IndexType>
218 void clear(const Settings &settings,
219  const Metadata<ValueType, IndexType> &metadata,
220  struct Solve<ValueType, IndexType>::cusparse &cusparse)
221 {
222  SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyCsrsm2Info(cusparse.info));
223  SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(
224  cusparseDestroyMatDescr(cusparse.L_factor_descr));
225  SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(
226  cusparseDestroyMatDescr(cusparse.U_factor_descr));
227  cusparse.algo = 0;
228  cusparse.L_factor_info = NULL;
229  cusparse.L_factor_work_size = 0;
230  cusparse.U_factor_info = NULL;
231  cusparse.U_factor_work_size = 0;
232 }
233 
234 } // namespace CusparseWrappers
235 
236 // Explicit Instantiations
237 #define DECLARE_FUNCTION(ValueType, IndexType) \
238  void CusparseWrappers::initialize( \
239  const Settings &, const Metadata<ValueType, IndexType> &, \
240  struct Solve<ValueType, IndexType>::cusparse &, \
241  const std::shared_ptr<gko::matrix::Csr<ValueType, IndexType>> &, \
242  std::shared_ptr<gko::matrix::Dense<ValueType>> &);
243 INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(DECLARE_FUNCTION);
244 #undef DECLARE_FUNCTION
245 
246 #define DECLARE_FUNCTION(ValueType, IndexType) \
247  void CusparseWrappers::solve( \
248  const Settings &, const Metadata<ValueType, IndexType> &, \
249  struct Solve<ValueType, IndexType>::cusparse &, \
250  const std::shared_ptr<gko::matrix::Csr<ValueType, IndexType>> &, \
251  std::shared_ptr<gko::matrix::Dense<ValueType>> &);
252 INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(DECLARE_FUNCTION);
253 #undef DECLARE_FUNCTION
254 
255 #define DECLARE_FUNCTION(ValueType, IndexType) \
256  void CusparseWrappers::clear( \
257  const Settings &, const Metadata<ValueType, IndexType> &, \
258  struct Solve<ValueType, IndexType>::cusparse &);
259 INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(DECLARE_FUNCTION);
260 #undef DECLARE_FUNCTION
261 } // namespace schwz
262 
263 #endif
The Schwarz wrappers namespace.
Definition: comm_helpers.hpp:49