35 #ifndef cusparse_helpers_hpp 36 #define cusparse_helpers_hpp 43 #include <cuda_runtime.h> 45 #include <exception_helpers.hpp> 46 #include <settings.hpp> 51 namespace CusparseWrappers {
54 template <
typename ValueType,
typename IndexType>
55 void initialize(
const Settings &settings,
56 const Metadata<ValueType, IndexType> &metadata,
57 struct Solve<ValueType, IndexType>::cusparse &cusparse,
58 const std::shared_ptr<gko::matrix::Csr<ValueType, IndexType>>
60 std::shared_ptr<gko::matrix::Dense<ValueType>> &local_solution)
62 auto handle = (
static_cast<gko::CudaExecutor *
>(settings.executor.get()))
63 ->get_cusparse_handle();
64 auto num_rows = triangular_factor->get_size()[0];
65 auto sol_size = local_solution->get_size()[0];
66 auto num_rhs = local_solution->get_size()[1];
68 triangular_factor->get_num_stored_elements();
70 auto row_ptrs = triangular_factor->get_const_row_ptrs();
71 auto col_idxs = triangular_factor->get_const_col_idxs();
72 auto factor_values = triangular_factor->get_const_values();
73 auto sol_values = local_solution->get_values();
76 cusparse.policy = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
79 cusparse.L_factor_descr = NULL;
80 cusparse.L_factor_info = NULL;
81 cusparse.L_factor_work_size = 0;
82 cusparse.U_factor_descr = NULL;
83 cusparse.U_factor_info = NULL;
84 cusparse.U_factor_work_size = 0;
85 SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseCreate(&handle));
86 SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(
87 cusparseCreateSolveAnalysisInfo(&cusparse.info));
90 SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(
91 cusparseCreateCsrsm2Info(&cusparse.L_factor_info));
92 SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(
93 cusparseCreateMatDescr(&cusparse.L_factor_descr));
94 cusparseSetMatIndexBase(cusparse.L_factor_descr, CUSPARSE_INDEX_BASE_ZERO);
95 cusparseSetMatType(cusparse.L_factor_descr, CUSPARSE_MATRIX_TYPE_GENERAL);
96 cusparseSetMatFillMode(cusparse.L_factor_descr, CUSPARSE_FILL_MODE_UPPER);
97 cusparseSetMatDiagType(cusparse.L_factor_descr,
98 CUSPARSE_DIAG_TYPE_NON_UNIT);
100 SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(
101 cusparseCreateCsrsm2Info(&cusparse.U_factor_info));
102 SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(
103 cusparseCreateMatDescr(&cusparse.U_factor_descr));
104 cusparseSetMatIndexBase(cusparse.U_factor_descr, CUSPARSE_INDEX_BASE_ZERO);
105 cusparseSetMatType(cusparse.U_factor_descr, CUSPARSE_MATRIX_TYPE_GENERAL);
106 cusparseSetMatFillMode(cusparse.U_factor_descr, CUSPARSE_FILL_MODE_UPPER);
107 cusparseSetMatDiagType(cusparse.U_factor_descr,
108 CUSPARSE_DIAG_TYPE_NON_UNIT);
110 SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrsm2_bufferSizeExt(
111 handle, cusparse.algo, CUSPARSE_OPERATION_NON_TRANSPOSE,
112 CUSPARSE_OPERATION_NON_TRANSPOSE, num_rows, num_rhs, factor_nnz, &one,
113 cusparse.U_factor_descr,
114 factor_values, row_ptrs, col_idxs, sol_values, sol_size,
115 cusparse.U_factor_info, cusparse.policy, &cusparse.U_factor_work_size));
117 SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrsm2_bufferSizeExt(
118 handle, cusparse.algo, CUSPARSE_OPERATION_TRANSPOSE,
119 CUSPARSE_OPERATION_NON_TRANSPOSE, num_rows, num_rhs, factor_nnz, &one,
120 cusparse.L_factor_descr,
121 factor_values, row_ptrs, col_idxs, sol_values, sol_size,
122 cusparse.L_factor_info, cusparse.policy, &cusparse.L_factor_work_size));
129 if (cusparse.L_factor_work_vec !=
nullptr) {
130 cudaFree(cusparse.L_factor_work_vec);
132 SCHWARZ_ASSERT_NO_CUDA_ERRORS(cudaMalloc(
133 (
void **)&cusparse.L_factor_work_vec, cusparse.L_factor_work_size));
135 if (cusparse.U_factor_work_vec !=
nullptr) {
136 cudaFree(cusparse.U_factor_work_vec);
138 SCHWARZ_ASSERT_NO_CUDA_ERRORS(cudaMalloc(
139 (
void **)&cusparse.U_factor_work_vec, cusparse.U_factor_work_size));
142 SCHWARZ_ASSERT_NO_CUDA_ERRORS(cudaDeviceSynchronize());
143 SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrsm2_analysis(
144 handle, cusparse.algo, CUSPARSE_OPERATION_NON_TRANSPOSE,
145 CUSPARSE_OPERATION_NON_TRANSPOSE, num_rows, num_rhs, factor_nnz, &one,
146 cusparse.U_factor_descr,
147 factor_values, row_ptrs, col_idxs, sol_values, sol_size,
148 cusparse.U_factor_info, cusparse.policy, cusparse.U_factor_work_vec));
149 SCHWARZ_ASSERT_NO_CUDA_ERRORS(cudaDeviceSynchronize());
152 SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrsm2_analysis(
153 handle, cusparse.algo, CUSPARSE_OPERATION_TRANSPOSE,
154 CUSPARSE_OPERATION_NON_TRANSPOSE, num_rows, num_rhs, factor_nnz, &one,
155 cusparse.L_factor_descr,
156 factor_values, row_ptrs, col_idxs, sol_values, sol_size,
157 cusparse.L_factor_info, cusparse.policy, cusparse.L_factor_work_vec));
158 SCHWARZ_ASSERT_NO_CUDA_ERRORS(cudaDeviceSynchronize());
161 template <
typename ValueType,
typename IndexType>
162 void solve(
const Settings &settings,
163 const Metadata<ValueType, IndexType> &metadata,
164 struct Solve<ValueType, IndexType>::cusparse &cusparse,
165 const std::shared_ptr<gko::matrix::Csr<ValueType, IndexType>>
167 std::shared_ptr<gko::matrix::Dense<ValueType>> &local_solution)
169 auto handle = (
static_cast<gko::CudaExecutor *
>(settings.executor.get()))
170 ->get_cusparse_handle();
189 auto num_rows = triangular_factor->get_size()[0];
190 auto sol_size = local_solution->get_size()[0];
191 auto num_rhs = local_solution->get_size()[1];
193 triangular_factor->get_num_stored_elements();
195 auto row_ptrs = triangular_factor->get_const_row_ptrs();
196 auto col_idxs = triangular_factor->get_const_col_idxs();
197 auto factor_values = triangular_factor->get_const_values();
198 auto sol_values = local_solution->get_values();
200 SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrsm2_solve(
201 handle, cusparse.algo, CUSPARSE_OPERATION_TRANSPOSE,
202 CUSPARSE_OPERATION_NON_TRANSPOSE, num_rows, num_rhs, factor_nnz, &one,
203 cusparse.L_factor_descr, factor_values, row_ptrs, col_idxs, sol_values,
204 sol_size, cusparse.L_factor_info, cusparse.policy,
205 cusparse.L_factor_work_vec));
207 SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrsm2_solve(
208 handle, cusparse.algo, CUSPARSE_OPERATION_NON_TRANSPOSE,
209 CUSPARSE_OPERATION_NON_TRANSPOSE, num_rows, num_rhs, factor_nnz, &one,
210 cusparse.U_factor_descr, factor_values, row_ptrs, col_idxs, sol_values,
211 sol_size, cusparse.U_factor_info, cusparse.policy,
212 cusparse.U_factor_work_vec));
217 template <
typename ValueType,
typename IndexType>
218 void clear(
const Settings &settings,
219 const Metadata<ValueType, IndexType> &metadata,
220 struct Solve<ValueType, IndexType>::cusparse &cusparse)
222 SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyCsrsm2Info(cusparse.info));
223 SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(
224 cusparseDestroyMatDescr(cusparse.L_factor_descr));
225 SCHWARZ_ASSERT_NO_CUSPARSE_ERRORS(
226 cusparseDestroyMatDescr(cusparse.U_factor_descr));
228 cusparse.L_factor_info = NULL;
229 cusparse.L_factor_work_size = 0;
230 cusparse.U_factor_info = NULL;
231 cusparse.U_factor_work_size = 0;
237 #define DECLARE_FUNCTION(ValueType, IndexType) \ 238 void CusparseWrappers::initialize( \ 239 const Settings &, const Metadata<ValueType, IndexType> &, \ 240 struct Solve<ValueType, IndexType>::cusparse &, \ 241 const std::shared_ptr<gko::matrix::Csr<ValueType, IndexType>> &, \ 242 std::shared_ptr<gko::matrix::Dense<ValueType>> &); 243 INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(DECLARE_FUNCTION);
244 #undef DECLARE_FUNCTION 246 #define DECLARE_FUNCTION(ValueType, IndexType) \ 247 void CusparseWrappers::solve( \ 248 const Settings &, const Metadata<ValueType, IndexType> &, \ 249 struct Solve<ValueType, IndexType>::cusparse &, \ 250 const std::shared_ptr<gko::matrix::Csr<ValueType, IndexType>> &, \ 251 std::shared_ptr<gko::matrix::Dense<ValueType>> &); 252 INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(DECLARE_FUNCTION);
253 #undef DECLARE_FUNCTION 255 #define DECLARE_FUNCTION(ValueType, IndexType) \ 256 void CusparseWrappers::clear( \ 257 const Settings &, const Metadata<ValueType, IndexType> &, \ 258 struct Solve<ValueType, IndexType>::cusparse &); 259 INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(DECLARE_FUNCTION);
260 #undef DECLARE_FUNCTION The Schwarz wrappers namespace.
Definition: comm_helpers.hpp:49