FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
ilu_precond.cu
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6// includes, FEAT
7#include <kernel/base_header.hpp>
8
9#include <kernel/util/exception.hpp>
10#include <kernel/util/memory_pool.hpp>
11
12#include "cusparse_v2.h"
13
14// http://docs.nvidia.com/cuda/cusparse/#cusparse-lt-t-gt-csrilu02_solve
15
16using namespace FEAT;
17
18namespace FEAT
19{
20 namespace Solver
21 {
22 /// \cond internal
23 namespace Intern
24 {
25 // CSR
26 struct CudaIluSolveInfo
27 {
28 cusparseMatDescr_t descr_M;
29#if CUSPARSE_VER_MAJOR < 12
30 cusparseMatDescr_t descr_L;
31 cusparseMatDescr_t descr_U;
32#else
33 cusparseSpMatDescr_t descr_L;
34 cusparseSpMatDescr_t descr_U;
35 //in this case, we also need handler for input and output vectors
36 //cusparseDnVecDescr_t descr_X; //should not be needed, since buffersize and anaylsis accept NULL as vectors...
37 //cusparseDnVecDescr_t descr_Y;
38
39#endif
40 csrilu02Info_t info_M;
41#if CUSPARSE_VER_MAJOR < 12
42 csrsv2Info_t info_L;
43 csrsv2Info_t info_U;
44#else
45 cusparseSpSVDescr_t info_L;
46 cusparseSpSVDescr_t info_U;
47#endif
48
49 int pBufferSize_M;
50#if CUSPARSE_VER_MAJOR < 12
51 int pBufferSize_L;
52 int pBufferSize_U;
53#else
54 size_t pBufferSize_L;
55 size_t pBufferSize_U;
56#endif
57 int pBufferSize;
58 void *pBuffer;
59 int structural_zero;
60 int numerical_zero;
61 const cusparseOperation_t trans_L = CUSPARSE_OPERATION_NON_TRANSPOSE;
62 const cusparseOperation_t trans_U = CUSPARSE_OPERATION_NON_TRANSPOSE;
63 const cusparseSolvePolicy_t policy_M = CUSPARSE_SOLVE_POLICY_NO_LEVEL;
64 const cusparseSolvePolicy_t policy_L = CUSPARSE_SOLVE_POLICY_NO_LEVEL;
65 const cusparseSolvePolicy_t policy_U = CUSPARSE_SOLVE_POLICY_USE_LEVEL; //why?
66 double * z;
67 int m;
68 int nnz;
69 };
70
71 void * cuda_ilu_init_symbolic(int m, int nnz, double * csrVal, int * csrRowPtr, int * csrColInd)
72 {
73 CudaIluSolveInfo * info = new CudaIluSolveInfo;
74 info->m = m;
75 info->nnz = nnz;
76
77 info->z = (double*)Util::cuda_malloc(m * sizeof(double));
78
79
80 cusparseStatus_t status;
81
82 cusparseCreateMatDescr(&(info->descr_M));
83 cusparseSetMatIndexBase(info->descr_M, CUSPARSE_INDEX_BASE_ZERO);
84 cusparseSetMatType(info->descr_M, CUSPARSE_MATRIX_TYPE_GENERAL);
85
86#if CUSPARSE_VER_MAJOR < 12
87 cusparseCreateMatDescr(&(info->descr_L));
88 cusparseSetMatIndexBase(info->descr_L, CUSPARSE_INDEX_BASE_ZERO);
89 cusparseSetMatType(info->descr_L, CUSPARSE_MATRIX_TYPE_GENERAL);
90 cusparseSetMatFillMode(info->descr_L, CUSPARSE_FILL_MODE_LOWER);
91 cusparseSetMatDiagType(info->descr_L, CUSPARSE_DIAG_TYPE_UNIT);
92#else
93 //assertion if int is 32 bits
94 static_assert(sizeof(int) == 4u, "ERROR: Size of int is not 32 bits");
95 cusparseCreateCsr(&(info->descr_L), info->m, info->m, info->nnz,
96 csrRowPtr, csrColInd, csrVal,
97 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, //use typedef somewhere? Since this goes wrong, if int is something other than 32 bits...
98 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); //Also variable size in theroy...
99 //set attributes
100 {
101 cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_LOWER;
102 cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_UNIT;
103 cusparseSpMatSetAttribute(info->descr_L, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(cusparseFillMode_t)); //set relevant data, rest should be set by default due to new CSR implementation...
104 cusparseSpMatSetAttribute(info->descr_L, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(cusparseSpMatAttribute_t));
105 }
106#endif
107
108#if CUSPARSE_VER_MAJOR < 12
109 cusparseCreateMatDescr(&(info->descr_U));
110 cusparseSetMatIndexBase(info->descr_U, CUSPARSE_INDEX_BASE_ZERO);
111 cusparseSetMatType(info->descr_U, CUSPARSE_MATRIX_TYPE_GENERAL);
112 cusparseSetMatFillMode(info->descr_U, CUSPARSE_FILL_MODE_UPPER);
113 cusparseSetMatDiagType(info->descr_U, CUSPARSE_DIAG_TYPE_NON_UNIT);
114#else
115 cusparseCreateCsr(&(info->descr_U), info->m, info->m, info->nnz,
116 csrRowPtr, csrColInd, csrVal,
117 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
118 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
119 {
120 cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_UPPER;
121 cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_NON_UNIT;
122 cusparseSpMatSetAttribute(info->descr_U, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(cusparseFillMode_t)); //set relevant data, rest should be set by default due to new CSR implementation...
123 cusparseSpMatSetAttribute(info->descr_U, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(cusparseSpMatAttribute_t));
124 }
125#endif
126
127 cusparseCreateCsrilu02Info(&(info->info_M));
128#if CUSPARSE_VER_MAJOR < 12
129 cusparseCreateCsrsv2Info(&(info->info_L));
130 cusparseCreateCsrsv2Info(&(info->info_U));
131#else
132 // create information handler for cuSparseSolver
133 cusparseSpSV_createDescr(&(info->info_L));
134 cusparseSpSV_createDescr(&(info->info_U));
135#endif
136
137// #if CUSPARSE_VER_MAJOR >= 12
138// //for now, we need to create pseudo vector arrays... we will set these later to the real vector by transfering the data pointer
139// cusparseCreateDnVec(&(info->descr_X), info->m, nullptr, CUDA_R_64F);
140// cusparseCreateDnVec(&(info->descr_Y), info->m, nullptr, CUDA_R_64F);
141// #endif
142
143 status = cusparseDcsrilu02_bufferSize(Util::Intern::cusparse_handle, m, nnz,
144 info->descr_M, csrVal, csrRowPtr, csrColInd, info->info_M, &(info->pBufferSize_M));
145 if (status != CUSPARSE_STATUS_SUCCESS)
146 throw InternalError(__func__, __FILE__, __LINE__, "cusparsecsrilu02_bufferSize failed with status code: " + stringify(status));
147#if CUSPARSE_VER_MAJOR < 12
148 status = cusparseDcsrsv2_bufferSize(Util::Intern::cusparse_handle, info->trans_L, m, nnz,
149 info->descr_L, csrVal, csrRowPtr, csrColInd, info->info_L, &(info->pBufferSize_L));
150 if (status != CUSPARSE_STATUS_SUCCESS)
151 throw InternalError(__func__, __FILE__, __LINE__, "cusparseDcsrsv2_bufferSize failed with status code: " + stringify(status));
152
153 status = cusparseDcsrsv2_bufferSize(Util::Intern::cusparse_handle, info->trans_U, m, nnz,
154 info->descr_U, csrVal, csrRowPtr, csrColInd, info->info_L, &(info->pBufferSize_U)); //TODO: Error using info_L here?
155 if (status != CUSPARSE_STATUS_SUCCESS)
156 throw InternalError(__func__, __FILE__, __LINE__, "cusparseDcsrsv2_bufferSize failed with status code: " + stringify(status));
157#else
158 const double alpha = 1.;
159 status = cusparseSpSV_bufferSize(Util::Intern::cusparse_handle, info->trans_L, &alpha,
160 info->descr_L, NULL /*info->descr_X*/, NULL /*info->descr_Y*/, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, info->info_L, &(info->pBufferSize_L));
161 if (status != CUSPARSE_STATUS_SUCCESS)
162 throw InternalError(__func__, __FILE__, __LINE__, "cusparseSpSV_bufferSize failed with status code: " + stringify(status));
163
164 status = cusparseSpSV_bufferSize(Util::Intern::cusparse_handle, info->trans_U, &alpha,
165 info->descr_U, NULL /*info->descr_X*/, NULL /*info->descr_Y*/, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, info->info_U, &(info->pBufferSize_U));
166 if (status != CUSPARSE_STATUS_SUCCESS)
167 throw InternalError(__func__, __FILE__, __LINE__, "cusparseSpSV_bufferSize failed with status code: " + stringify(status));
168#endif
169 info->pBufferSize = max(info->pBufferSize_M, int(max(info->pBufferSize_L, info->pBufferSize_U)));
170 info->pBuffer = Util::cuda_malloc(info->pBufferSize_M);
171
172 status = cusparseDcsrilu02_analysis(Util::Intern::cusparse_handle, m, nnz, info->descr_M,
173 csrVal, csrRowPtr, csrColInd, info->info_M,
174 info->policy_M, info->pBuffer);
175 if (status != CUSPARSE_STATUS_SUCCESS)
176 throw InternalError(__func__, __FILE__, __LINE__, "cusparsecsrilu02_analysis failed with status code: " + stringify(status));
177 status = cusparseXcsrilu02_zeroPivot(Util::Intern::cusparse_handle, info->info_M, &(info->structural_zero));
178 if (CUSPARSE_STATUS_ZERO_PIVOT == status)
179 {
180 throw InternalError(__func__, __FILE__, __LINE__, "CUSPARSE ZERO PIVOT ERROR!");
181 }
182#if CUSPARSE_VER_MAJOR< 12
183 status = cusparseDcsrsv2_analysis(Util::Intern::cusparse_handle, info->trans_L, m, nnz, info->descr_L,
184 csrVal, csrRowPtr, csrColInd, info->info_L, info->policy_L, info->pBuffer);
185 if (status != CUSPARSE_STATUS_SUCCESS)
186 throw InternalError(__func__, __FILE__, __LINE__, "cusparse_csrv_analysis failed with status code: " + stringify(status));
187
188 status = cusparseDcsrsv2_analysis(Util::Intern::cusparse_handle, info->trans_U, m, nnz, info->descr_U,
189 csrVal, csrRowPtr, csrColInd, info->info_U, info->policy_U, info->pBuffer);
190 if (status != CUSPARSE_STATUS_SUCCESS)
191 throw InternalError(__func__, __FILE__, __LINE__, "cusparse_csrv_analysis failed with status code: " + stringify(status));
192#else
193 status = cusparseSpSV_analysis(Util::Intern::cusparse_handle, info->trans_L, &alpha,
194 info->descr_L, NULL /*info->descr_X*/, NULL /*info->descr_Y*/, CUDA_R_64F,
195 CUSPARSE_SPSV_ALG_DEFAULT, info->info_L, info->pBuffer);
196 if (status != CUSPARSE_STATUS_SUCCESS)
197 throw InternalError(__func__, __FILE__, __LINE__, "cusparseSpSV_analysis failed with status code: " + stringify(status));
198
199 status = cusparseSpSV_analysis(Util::Intern::cusparse_handle, info->trans_U, &alpha,
200 info->descr_U, NULL /*info->descr_X*/, NULL /*info->descr_Y*/, CUDA_R_64F,
201 CUSPARSE_SPSV_ALG_DEFAULT, info->info_U, info->pBuffer);
202 if (status != CUSPARSE_STATUS_SUCCESS)
203 throw InternalError(__func__, __FILE__, __LINE__, "cusparseSpSV_analysis failed with status code: " + stringify(status));
204#endif
205
206 cudaDeviceSynchronize();
207#ifdef FEAT_DEBUG_MODE
208 cudaError_t last_error(cudaGetLastError());
209 if (cudaSuccess != last_error)
210 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
211#endif
212
213 return (void*)info;
214 }
215
216 void cuda_ilu_init_numeric(double * csrVal, int * csrRowPtr, int * csrColInd, void * vinfo)
217 {
218 CudaIluSolveInfo * info = (CudaIluSolveInfo *) vinfo;
219
220 cusparseStatus_t status = cusparseDcsrilu02(Util::Intern::cusparse_handle, info->m, info->nnz, info->descr_M,
221 csrVal, csrRowPtr, csrColInd, info->info_M, info->policy_M, info->pBuffer);
222 if (status != CUSPARSE_STATUS_SUCCESS)
223 throw InternalError(__func__, __FILE__, __LINE__, "cusparsecsrilu02 failed with status code: " + stringify(status));
224 status = cusparseXcsrilu02_zeroPivot(Util::Intern::cusparse_handle, info->info_M, &(info->numerical_zero));
225 if (CUSPARSE_STATUS_ZERO_PIVOT == status)
226 {
227 throw InternalError(__func__, __FILE__, __LINE__, "CUSPARSE ZERO PIVOT ERROR!");
228 }
229 }
230
231 int cuda_ilu_apply(double * y, const double * x, double * csrVal, int * csrRowPtr, int * csrColInd, void * vinfo)
232 {
233 CudaIluSolveInfo * info = (CudaIluSolveInfo *) vinfo;
234 const double alpha = 1.;
235#if CUSPARSE_VER_MAJOR < 12
236 cusparseStatus_t status = cusparseDcsrsv2_solve(Util::Intern::cusparse_handle, info->trans_L, info->m, info->nnz, &alpha, info->descr_L,
237 csrVal, csrRowPtr, csrColInd, info->info_L,
238 x, info->z, info->policy_L, info->pBuffer);
239 if (status != CUSPARSE_STATUS_SUCCESS)
240 throw InternalError(__func__, __FILE__, __LINE__, "cusparseDcsrsv2_solve failed with status code: " + stringify(status));
241
242 status = cusparseDcsrsv2_solve(Util::Intern::cusparse_handle, info->trans_U, info->m, info->nnz, &alpha, info->descr_U,
243 csrVal, csrRowPtr, csrColInd, info->info_U,
244 info->z, y, info->policy_U, info->pBuffer);
245 if (status != CUSPARSE_STATUS_SUCCESS)
246 throw InternalError(__func__, __FILE__, __LINE__, "cusparsecsr2_solve failed with status code: " + stringify(status));
247#else
248 (void)csrVal;
249 (void)csrRowPtr;
250 (void)csrColInd;
251 //we have to create vector handlers to use the vector data in the new api (or shift them into our vector handlers... lets see whats necessary)
252 cusparseConstDnVecDescr_t descr_X;
253 cusparseDnVecDescr_t descr_Y, descr_Z;
254 cusparseCreateConstDnVec(&descr_X, info->m, x, CUDA_R_64F);
255 cusparseCreateDnVec(&descr_Z, info->m, info->z, CUDA_R_64F); //first write into z...
256 cusparseCreateDnVec(&descr_Y, info->m, y, CUDA_R_64F);
257 //now solve first triang system
258 cusparseStatus_t status = cusparseSpSV_solve(Util::Intern::cusparse_handle, info->trans_L, &alpha,
259 info->descr_L, descr_X, descr_Z, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, info->info_L);
260 if (status != CUSPARSE_STATUS_SUCCESS)
261 {
262 //delete vecs descr if someone catches the error
263 cusparseDestroyDnVec(descr_Y);
264 cusparseDestroyDnVec(descr_Z);
265 cusparseDestroyDnVec(descr_X);
266 throw InternalError(__func__, __FILE__, __LINE__, "cusparseSpSV_solve failed with status code: " + stringify(status));
267 }
268
269 status = cusparseSpSV_solve(Util::Intern::cusparse_handle, info->trans_U, &alpha,
270 info->descr_U, descr_Z, descr_Y, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, info->info_U);
271 if (status != CUSPARSE_STATUS_SUCCESS)
272 {
273 cusparseDestroyDnVec(descr_Y);
274 cusparseDestroyDnVec(descr_Z);
275 cusparseDestroyDnVec(descr_X);
276 throw InternalError(__func__, __FILE__, __LINE__, "SECONDcusparseSpSV_solve failed with status code: " + stringify(status));
277 }
278 //destroy descr
279 cusparseDestroyDnVec(descr_Y);
280 cusparseDestroyDnVec(descr_Z);
281 cusparseDestroyDnVec(descr_X);
282#endif
283
284 cudaDeviceSynchronize();
285#ifdef FEAT_DEBUG_MODE
286 cudaError_t last_error(cudaGetLastError());
287 if (cudaSuccess != last_error)
288 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
289#endif
290
291 return 0;
292 }
293
294 void cuda_ilu_done_symbolic(void * vinfo)
295 {
296 CudaIluSolveInfo * info = (CudaIluSolveInfo *) vinfo;
297
298 Util::cuda_free(info->z);
299 Util::cuda_free(info->pBuffer);
300 cusparseDestroyMatDescr(info->descr_M);
301#if CUSPARSE_VER_MAJOR < 12
302 cusparseDestroyMatDescr(info->descr_L);
303 cusparseDestroyMatDescr(info->descr_U);
304#else
305 cusparseDestroySpMat(info->descr_L);
306 cusparseDestroySpMat(info->descr_U);
307#endif
308 cusparseDestroyCsrilu02Info(info->info_M);
309#if CUSPARSE_VER_MAJOR < 12
310 cusparseDestroyCsrsv2Info(info->info_L);
311 cusparseDestroyCsrsv2Info(info->info_U);
312#else
313 cusparseSpSV_destroyDescr(info->info_L);
314 cusparseSpSV_destroyDescr(info->info_L);
315#endif
316
317 delete info;
318 }
319
320 // BCSR
321 struct CudaIluBSolveInfo
322 {
323 cusparseMatDescr_t descr_M;
324 cusparseMatDescr_t descr_L;
325 cusparseMatDescr_t descr_U;
326 bsrilu02Info_t info_M;
327 bsrsv2Info_t info_L;
328 bsrsv2Info_t info_U;
329 cusparseOperation_t trans_L;
330 cusparseOperation_t trans_U;
331 cusparseDirection_t dir;
332 cusparseSolvePolicy_t policy_M;
333 cusparseSolvePolicy_t policy_L;
334 cusparseSolvePolicy_t policy_U;
335 void * pBuffer;
336 double * z;
337 int m;
338 int nnz;
339 int blocksize;
340 };
341
342 void * cuda_ilub_init_symbolic(int m, int nnz, double * csrVal, int * csrRowPtr, int * csrColInd, const int blocksize)
343 {
344 double * z = (double*)Util::cuda_malloc(m * blocksize * sizeof(double));
345
346 cusparseMatDescr_t descr_M = 0;
347 cusparseMatDescr_t descr_L = 0;
348 cusparseMatDescr_t descr_U = 0;
349 bsrilu02Info_t info_M = 0;
350 bsrsv2Info_t info_L = 0;
351 bsrsv2Info_t info_U = 0;
352 int pBufferSize_M;
353 int pBufferSize_L;
354 int pBufferSize_U;
355 int pBufferSize;
356 void *pBuffer = 0;
357 int structural_zero;
358 const cusparseSolvePolicy_t policy_M = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
359 const cusparseSolvePolicy_t policy_L = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
360 const cusparseSolvePolicy_t policy_U = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
361 const cusparseOperation_t trans_L = CUSPARSE_OPERATION_NON_TRANSPOSE;
362 const cusparseOperation_t trans_U = CUSPARSE_OPERATION_NON_TRANSPOSE;
363 const cusparseDirection_t dir = CUSPARSE_DIRECTION_ROW;
364
365 cusparseStatus_t status;
366
367 cusparseCreateMatDescr(&descr_M);
368 cusparseSetMatIndexBase(descr_M, CUSPARSE_INDEX_BASE_ZERO);
369 cusparseSetMatType(descr_M, CUSPARSE_MATRIX_TYPE_GENERAL);
370
371 cusparseCreateMatDescr(&descr_L);
372 cusparseSetMatIndexBase(descr_L, CUSPARSE_INDEX_BASE_ZERO);
373 cusparseSetMatType(descr_L, CUSPARSE_MATRIX_TYPE_GENERAL);
374 cusparseSetMatFillMode(descr_L, CUSPARSE_FILL_MODE_LOWER);
375 cusparseSetMatDiagType(descr_L, CUSPARSE_DIAG_TYPE_UNIT);
376
377 cusparseCreateMatDescr(&descr_U);
378 cusparseSetMatIndexBase(descr_U, CUSPARSE_INDEX_BASE_ZERO);
379 cusparseSetMatType(descr_U, CUSPARSE_MATRIX_TYPE_GENERAL);
380 cusparseSetMatFillMode(descr_U, CUSPARSE_FILL_MODE_UPPER);
381 cusparseSetMatDiagType(descr_U, CUSPARSE_DIAG_TYPE_NON_UNIT);
382
383 cusparseCreateBsrilu02Info(&info_M);
384 cusparseCreateBsrsv2Info(&info_L);
385 cusparseCreateBsrsv2Info(&info_U);
386
387 cusparseDbsrilu02_bufferSize(Util::Intern::cusparse_handle, dir, m, nnz,
388 descr_M, csrVal, csrRowPtr, csrColInd, blocksize, info_M, &pBufferSize_M);
389 cusparseDbsrsv2_bufferSize(Util::Intern::cusparse_handle, dir, trans_L, m, nnz,
390 descr_L, csrVal, csrRowPtr, csrColInd, blocksize, info_L, &pBufferSize_L);
391 cusparseDbsrsv2_bufferSize(Util::Intern::cusparse_handle, dir, trans_U, m, nnz,
392 descr_U, csrVal, csrRowPtr, csrColInd, blocksize, info_U, &pBufferSize_U);
393
394 pBufferSize = max(pBufferSize_M, max(pBufferSize_L, pBufferSize_U));
395
396 pBuffer = Util::cuda_malloc(pBufferSize);
397
398 status = cusparseDbsrilu02_analysis(Util::Intern::cusparse_handle, dir, m, nnz, descr_M,
399 csrVal, csrRowPtr, csrColInd, blocksize, info_M,
400 policy_M, pBuffer);
401 if (status != CUSPARSE_STATUS_SUCCESS)
402 throw InternalError(__func__, __FILE__, __LINE__, "cusparsebsrilu02 failed with status code: " + stringify(status));
403 status = cusparseXbsrilu02_zeroPivot(Util::Intern::cusparse_handle, info_M, &structural_zero);
404 if (CUSPARSE_STATUS_ZERO_PIVOT == status)
405 {
406 throw InternalError(__func__, __FILE__, __LINE__, "CUSPARSE ZERO PIVOT ERROR!");
407 }
408
409 status = cusparseDbsrsv2_analysis(Util::Intern::cusparse_handle, dir, trans_L, m, nnz, descr_L,
410 csrVal, csrRowPtr, csrColInd, blocksize, info_L, policy_L, pBuffer);
411 if (status != CUSPARSE_STATUS_SUCCESS)
412 throw InternalError(__func__, __FILE__, __LINE__, "cusparsebsrv2_analysis failed with status code: " + stringify(status));
413
414 status = cusparseDbsrsv2_analysis(Util::Intern::cusparse_handle, dir, trans_U, m, nnz, descr_U,
415 csrVal, csrRowPtr, csrColInd, blocksize, info_U, policy_U, pBuffer);
416 if (status != CUSPARSE_STATUS_SUCCESS)
417 throw InternalError(__func__, __FILE__, __LINE__, "cusparsebsrv2_analysis failed with status code: " + stringify(status));
418
419 cudaDeviceSynchronize();
420#ifdef FEAT_DEBUG_MODE
421 cudaError_t last_error(cudaGetLastError());
422 if (cudaSuccess != last_error)
423 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
424#endif
425
426 CudaIluBSolveInfo * info = new CudaIluBSolveInfo;
427 info->descr_M = descr_M;
428 info->descr_L = descr_L;
429 info->descr_U = descr_U;
430 info->info_M = info_M;
431 info->info_L = info_L;
432 info->info_U = info_U;
433 info->trans_L = trans_L;
434 info->trans_U = trans_U;
435 info->dir = dir;
436 info->policy_M = policy_M;
437 info->policy_L = policy_L;
438 info->policy_U = policy_U;
439 info->pBuffer = pBuffer;
440 info->z = z;
441 info->m = m;
442 info->nnz = nnz;
443 info->blocksize = blocksize;
444
445 return (void*)info;
446 }
447
448 void cuda_ilub_init_numeric(double * csrVal, int * csrRowPtr, int * csrColInd, void * vinfo)
449 {
450 CudaIluBSolveInfo * info = (CudaIluBSolveInfo *) vinfo;
451
452 cusparseStatus_t status = cusparseDbsrilu02(Util::Intern::cusparse_handle, info->dir, info->m, info->nnz, info->descr_M,
453 csrVal, csrRowPtr, csrColInd, info->blocksize, info->info_M, info->policy_M, info->pBuffer);
454 if (status != CUSPARSE_STATUS_SUCCESS)
455 throw InternalError(__func__, __FILE__, __LINE__, "cusparsebsrilu02 failed with status code: " + stringify(status));
456 int numerical_zero;
457 status = cusparseXbsrilu02_zeroPivot(Util::Intern::cusparse_handle, info->info_M, &numerical_zero);
458 if (CUSPARSE_STATUS_ZERO_PIVOT == status)
459 {
460 throw InternalError(__func__, __FILE__, __LINE__, "CUSPARSE ZERO PIVOT ERROR!");
461 }
462 }
463
464 int cuda_ilub_apply(double * y, const double * x, double * csrVal, int * csrRowPtr, int * csrColInd, void * vinfo)
465 {
466 CudaIluBSolveInfo * info = (CudaIluBSolveInfo *) vinfo;
467 const double alpha = 1.;
468
469 cusparseStatus_t status = cusparseDbsrsv2_solve(Util::Intern::cusparse_handle, info->dir, info->trans_L, info->m, info->nnz, &alpha, info->descr_L,
470 csrVal, csrRowPtr, csrColInd, info->blocksize, info->info_L,
471 x, info->z, info->policy_L, info->pBuffer);
472 if (status != CUSPARSE_STATUS_SUCCESS)
473 throw InternalError(__func__, __FILE__, __LINE__, "cusparsebsrsv2_solve failed with status code: " + stringify(status));
474
475 status = cusparseDbsrsv2_solve(Util::Intern::cusparse_handle, info->dir, info->trans_U, info->m, info->nnz, &alpha, info->descr_U,
476 csrVal, csrRowPtr, csrColInd, info->blocksize, info->info_U,
477 info->z, y, info->policy_U, info->pBuffer);
478 if (status != CUSPARSE_STATUS_SUCCESS)
479 throw InternalError(__func__, __FILE__, __LINE__, "cusparsebsrsv2_solve failed with status code: " + stringify(status));
480
481 cudaDeviceSynchronize();
482#ifdef FEAT_DEBUG_MODE
483 cudaError_t last_error(cudaGetLastError());
484 if (cudaSuccess != last_error)
485 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
486#endif
487
488 return 0;
489 }
490
491 void cuda_ilub_done_symbolic(void * vinfo)
492 {
493 CudaIluBSolveInfo * info = (CudaIluBSolveInfo *) vinfo;
494
495 Util::cuda_free(info->z);
496 Util::cuda_free(info->pBuffer);
497 cusparseDestroyMatDescr(info->descr_M);
498 cusparseDestroyMatDescr(info->descr_L);
499 cusparseDestroyMatDescr(info->descr_U);
500 cusparseDestroyBsrilu02Info(info->info_M);
501 cusparseDestroyBsrsv2Info(info->info_L);
502 cusparseDestroyBsrsv2Info(info->info_U);
503
504 delete info;
505 }
506 } // namespace Intern
507 /// \endcond
508 } // namespace Solver
509} // namespace FEAT