FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
cudss.cpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
7
8#ifdef FEAT_HAVE_CUDSS
9#include <kernel/util/memory_pool.hpp>
10#include <kernel/util/cuda_util.hpp>
11#include <kernel/solver/cudss.hpp>
12
13#include <cudss.h>
14
15namespace FEAT
16{
17 namespace Solver
18 {
19 namespace Intern
20 {
21 struct CUDSSCore
22 {
23 cudssHandle_t handle;
24 cudssConfig_t config;
25 cudssData_t data;
26 cudssMatrix_t matrix;
27 cudssMatrix_t vec_sol;
28 cudssMatrix_t vec_rhs;
29 std::uint32_t* row_ptr;
30 std::uint32_t* col_idx;
31 };
32 } // namespace Intern
33
34 CUDSS::CUDSS(const MatrixType& system_matrix) :
35 BaseClass(),
36 _system_matrix(system_matrix),
37 _cudss_core(new Intern::CUDSSCore)
38 {
39 Intern::CUDSSCore& core = *reinterpret_cast<Intern::CUDSSCore*>(_cudss_core);
40 core.handle = reinterpret_cast<cudssHandle_t>(Runtime::get_cudss_handle());
41 if(CUDSS_STATUS_SUCCESS != cudssConfigCreate(&core.config))
42 throw InternalError(__func__, __FILE__, __LINE__, "cudssConfigCreate failed!");
43 if(CUDSS_STATUS_SUCCESS != cudssDataCreate(core.handle, &core.data))
44 throw InternalError(__func__, __FILE__, __LINE__, "cudssDataCreate failed!");
45 }
46
47 CUDSS::~CUDSS()
48 {
49 Intern::CUDSSCore* core = reinterpret_cast<Intern::CUDSSCore*>(_cudss_core);
50 if(core)
51 {
52 cudssDataDestroy(core->handle, core->data);
53 cudssConfigDestroy(core->config);
54 delete core;
55 }
56 }
57
58 void CUDSS::init_symbolic()
59 {
60 BaseClass::init_symbolic();
61
62 Intern::CUDSSCore& core = *reinterpret_cast<Intern::CUDSSCore*>(_cudss_core);
63
64 cudssStatus_t ret = CUDSS_STATUS_INTERNAL_ERROR;
65
66 // set the basic configuration
67 cudssAlgType_t alg_type = CUDSS_ALG_1; // COLAMD based ordering
68 ret = cudssConfigSet(core.config, CUDSS_CONFIG_REORDERING_ALG, &alg_type, sizeof(cudssAlgType_t));
69 if(ret != CUDSS_STATUS_SUCCESS)
70 throw SolverException("CUDSS: cudssConfigSet() for 'CUDSS_CONFIG_REORDERING_ALG' failed!");
71
72 // get the index arrays of the matrix
73 const Index neq = _system_matrix.rows();
74 const Index nze = _system_matrix.used_elements();
75 const Index* a_row_ptr = _system_matrix.row_ptr();
76 const Index* a_col_idx = _system_matrix.col_ind();
77 std::uint32_t* row_ptr = nullptr;
78 std::uint32_t* col_idx = nullptr;
79
80 // convert indices to 32 bit if necessary
81 if constexpr (sizeof(Index) == 4u)
82 {
83 row_ptr = reinterpret_cast<std::uint32_t*>(const_cast<Index*>(_system_matrix.row_ptr()));
84 col_idx = reinterpret_cast<std::uint32_t*>(const_cast<Index*>(_system_matrix.col_ind()));
85 }
86 else
87 {
88 row_ptr = core.row_ptr = MemoryPool::allocate_memory<std::uint32_t>(neq+1u);
89 col_idx = core.col_idx = MemoryPool::allocate_memory<std::uint32_t>(nze);
90 for(Index i(0); i <= neq; ++i)
91 core.row_ptr[i] = std::uint32_t(a_row_ptr[i]);
92 for(Index i(0); i < nze; ++i)
93 core.col_idx[i] = std::uint32_t(a_col_idx[i]);
94 }
95
96 // set matrix data
97 ret = cudssMatrixCreateCsr(
98 &core.matrix,
99 neq,
100 neq,
101 nze,
102 row_ptr,
103 nullptr,
104 col_idx,
105 const_cast<double*>(_system_matrix.val()),
106 CUDA_R_32I,
107 CUDA_R_64F,
108 CUDSS_MTYPE_GENERAL,
109 CUDSS_MVIEW_FULL, // is ignored
110 CUDSS_BASE_ZERO);
111 if(ret != CUDSS_STATUS_SUCCESS)
112 throw SolverException("CUDSS: cudssMatrixCreateCsr() for system matrix failed!");
113
114 // allocate solution vector
115 ret = cudssMatrixCreateDn(
116 &core.vec_sol,
117 neq,
118 1,
119 neq,
120 nullptr,
121 CUDA_R_64F,
122 CUDSS_LAYOUT_COL_MAJOR);
123
124 if(ret != CUDSS_STATUS_SUCCESS)
125 throw SolverException("CUDSS: cudssMatrixCreateDn() for solution vector failed!");
126
127 // allocate rhs vector
128 ret = cudssMatrixCreateDn(
129 &core.vec_rhs,
130 neq,
131 1,
132 neq,
133 nullptr,
134 CUDA_R_64F,
135 CUDSS_LAYOUT_COL_MAJOR);
136
137 if(ret != CUDSS_STATUS_SUCCESS)
138 throw SolverException("CUDSS: cudssMatrixCreateDn() for rhs vector failed!");
139
140 // perform symbolic factorization
141 ret = cudssExecute(
142 core.handle,
143 CUDSS_PHASE_ANALYSIS,
144 core.config,
145 core.data,
146 core.matrix,
147 core.vec_sol,
148 core.vec_rhs);
149 if(ret != CUDSS_STATUS_SUCCESS)
150 throw SolverException("CUDSS: cudssExecute() for phase 'CUDSS_PHASE_ANALYSIS' failed!");
151
152 // wait until CUDA is done
153 Util::cuda_synchronize();
154 }
155
156 void CUDSS::done_symbolic()
157 {
158 BaseClass::done_symbolic();
159 if(_cudss_core == nullptr)
160 return;
161
162 Intern::CUDSSCore& core = *reinterpret_cast<Intern::CUDSSCore*>(_cudss_core);
163
164 cudssMatrixDestroy(core.vec_sol);
165 cudssMatrixDestroy(core.vec_rhs);
166 cudssMatrixDestroy(core.matrix);
167
168 if(core.col_idx)
169 {
170 MemoryPool::release_memory(core.col_idx);
171 core.col_idx = nullptr;
172 }
173 if(core.row_ptr)
174 {
175 MemoryPool::release_memory(core.row_ptr);
176 core.row_ptr = nullptr;
177 }
178
179 // wait until CUDA is done
180 Util::cuda_synchronize();
181 }
182
183 void CUDSS::init_numeric()
184 {
185 BaseClass::init_numeric();
186
187 Intern::CUDSSCore& core = *reinterpret_cast<Intern::CUDSSCore*>(_cudss_core);
188
189 // perform numeric factorization
190 cudssStatus_t ret = cudssExecute(
191 core.handle,
192 CUDSS_PHASE_FACTORIZATION,
193 core.config,
194 core.data,
195 core.matrix,
196 core.vec_sol,
197 core.vec_rhs);
198 if(ret != CUDSS_STATUS_SUCCESS)
199 throw SolverException("CUDSS: cudssExecute() for phase 'CUDSS_PHASE_FACTORIZATION' failed!");
200
201 // wait until CUDA is done
202 Util::cuda_synchronize();
203 }
204
205 void CUDSS::done_numeric()
206 {
207 // nothing to do here
208 BaseClass::done_numeric();
209 }
210
211 Status CUDSS::apply(VectorType& vec_sol, const VectorType& vec_rhs)
212 {
213 Intern::CUDSSCore& core = *reinterpret_cast<Intern::CUDSSCore*>(_cudss_core);
214
215 cudssStatus_t ret = CUDSS_STATUS_INTERNAL_ERROR;
216
217 // set solution vector data array
218 ret = cudssMatrixSetValues(core.vec_sol, vec_sol.elements());
219 if(ret != CUDSS_STATUS_SUCCESS)
220 throw SolverException("CUDSS: cudssMatrixSetValues() failed for vec_sol!");
221
222 // set rhs vector data array
223 cudssMatrixSetValues(core.vec_rhs, const_cast<double*>(vec_rhs.elements()));
224 if(ret != CUDSS_STATUS_SUCCESS)
225 throw SolverException("CUDSS: cudssMatrixSetValues() failed for vec_rhs!");
226
227 // solve
228 ret = cudssExecute(
229 core.handle,
230 CUDSS_PHASE_SOLVE,
231 core.config,
232 core.data,
233 core.matrix,
234 core.vec_sol,
235 core.vec_rhs);
236
237 // wait until CUDA is done
238 Util::cuda_synchronize();
239
240 return (ret == CUDSS_STATUS_SUCCESS ? Status::success : Status::aborted);
241 }
242 } // namespace Solver
243} // namespace FEAT
244#else
245// insert dummy function to suppress linker warnings
246void dummy_function_cudss() {}
247#endif // FEAT_HAVE_CUDSS
FEAT Kernel base header.
CUDSS(const MatrixType &system_matrix)
Constructor.
Status
Solver status return codes enumeration.
Definition: base.hpp:47
FEAT namespace.
Definition: adjactor.hpp:12
std::uint64_t Index
Index data type.