1// FEAT3: Finite Element Analysis Toolbox, Version 3
 
    2// Copyright (C) 2010 by Stefan Turek & the FEAT group
 
    3// FEAT3 is released under the GNU General Public License version 3,
 
    4// see the file 'copyright.txt' in the top level directory for details.
 
    7#include <kernel/base_header.hpp>
 
    9#include <kernel/util/exception.hpp>
 
   10#include <kernel/util/memory_pool.hpp>
 
   12#include "cusparse_v2.h"
 
   14// http://docs.nvidia.com/cuda/cusparse/#cusparse-lt-t-gt-csrilu02_solve
 
   26      struct CudaIluSolveInfo
 
   28        cusparseMatDescr_t descr_M;
 
   29#if CUSPARSE_VER_MAJOR < 12
 
   30        cusparseMatDescr_t descr_L;
 
   31        cusparseMatDescr_t descr_U;
 
   33        cusparseSpMatDescr_t descr_L;
 
   34        cusparseSpMatDescr_t descr_U;
 
   35        //in this case, we also need handler for input and output vectors
 
   36        //cusparseDnVecDescr_t descr_X;  //should not be needed, since buffersize and anaylsis accept NULL as vectors...
 
   37        //cusparseDnVecDescr_t descr_Y;
 
   40        csrilu02Info_t info_M;
 
   41#if CUSPARSE_VER_MAJOR < 12
 
   45        cusparseSpSVDescr_t  info_L;
 
   46        cusparseSpSVDescr_t  info_U;
 
   50#if CUSPARSE_VER_MAJOR < 12
 
   61        const cusparseOperation_t trans_L  = CUSPARSE_OPERATION_NON_TRANSPOSE;
 
   62        const cusparseOperation_t trans_U  = CUSPARSE_OPERATION_NON_TRANSPOSE;
 
   63        const cusparseSolvePolicy_t policy_M = CUSPARSE_SOLVE_POLICY_NO_LEVEL;
 
   64        const cusparseSolvePolicy_t policy_L = CUSPARSE_SOLVE_POLICY_NO_LEVEL;
 
   65        const cusparseSolvePolicy_t policy_U = CUSPARSE_SOLVE_POLICY_USE_LEVEL; //why?
 
   71      void * cuda_ilu_init_symbolic(int m, int nnz, double * csrVal, int * csrRowPtr, int * csrColInd)
 
   73        CudaIluSolveInfo * info = new CudaIluSolveInfo;
 
   77        info->z = (double*)Util::cuda_malloc(m * sizeof(double));
 
   80        cusparseStatus_t status;
 
   82        cusparseCreateMatDescr(&(info->descr_M));
 
   83        cusparseSetMatIndexBase(info->descr_M, CUSPARSE_INDEX_BASE_ZERO);
 
   84        cusparseSetMatType(info->descr_M, CUSPARSE_MATRIX_TYPE_GENERAL);
 
   86#if CUSPARSE_VER_MAJOR < 12
 
   87        cusparseCreateMatDescr(&(info->descr_L));
 
   88        cusparseSetMatIndexBase(info->descr_L, CUSPARSE_INDEX_BASE_ZERO);
 
   89        cusparseSetMatType(info->descr_L, CUSPARSE_MATRIX_TYPE_GENERAL);
 
   90        cusparseSetMatFillMode(info->descr_L, CUSPARSE_FILL_MODE_LOWER);
 
   91        cusparseSetMatDiagType(info->descr_L, CUSPARSE_DIAG_TYPE_UNIT);
 
   93        //assertion if int is 32 bits
 
   94        static_assert(sizeof(int) == 4u, "ERROR: Size of int is not 32 bits");
 
   95        cusparseCreateCsr(&(info->descr_L), info->m, info->m, info->nnz,
 
   96                                      csrRowPtr, csrColInd, csrVal,
 
   97                                      CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,   //use typedef somewhere? Since this goes wrong, if int is something other than 32 bits...
 
   98                                      CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);     //Also variable size in theroy...
 
  101          cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_LOWER;
 
  102          cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_UNIT;
 
  103          cusparseSpMatSetAttribute(info->descr_L, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(cusparseFillMode_t)); //set relevant data, rest should be set by default due to new CSR implementation...
 
  104          cusparseSpMatSetAttribute(info->descr_L, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(cusparseSpMatAttribute_t));
 
  108#if CUSPARSE_VER_MAJOR < 12
 
  109        cusparseCreateMatDescr(&(info->descr_U));
 
  110        cusparseSetMatIndexBase(info->descr_U, CUSPARSE_INDEX_BASE_ZERO);
 
  111        cusparseSetMatType(info->descr_U, CUSPARSE_MATRIX_TYPE_GENERAL);
 
  112        cusparseSetMatFillMode(info->descr_U, CUSPARSE_FILL_MODE_UPPER);
 
  113        cusparseSetMatDiagType(info->descr_U, CUSPARSE_DIAG_TYPE_NON_UNIT);
 
  115        cusparseCreateCsr(&(info->descr_U), info->m, info->m, info->nnz,
 
  116                                      csrRowPtr, csrColInd, csrVal,
 
  117                                      CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
 
  118                                      CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
 
  120          cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_UPPER;
 
  121          cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_NON_UNIT;
 
  122          cusparseSpMatSetAttribute(info->descr_U, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(cusparseFillMode_t)); //set relevant data, rest should be set by default due to new CSR implementation...
 
  123          cusparseSpMatSetAttribute(info->descr_U, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(cusparseSpMatAttribute_t));
 
  127        cusparseCreateCsrilu02Info(&(info->info_M));
 
  128#if CUSPARSE_VER_MAJOR < 12
 
  129        cusparseCreateCsrsv2Info(&(info->info_L));
 
  130        cusparseCreateCsrsv2Info(&(info->info_U));
 
  132        // create information handler for cuSparseSolver
 
  133        cusparseSpSV_createDescr(&(info->info_L));
 
  134        cusparseSpSV_createDescr(&(info->info_U));
 
  137// #if CUSPARSE_VER_MAJOR >= 12
 
  138//         //for now, we need to create pseudo vector arrays... we will set these later to the real vector by transfering the data pointer
 
  139//         cusparseCreateDnVec(&(info->descr_X), info->m, nullptr, CUDA_R_64F);
 
  140//         cusparseCreateDnVec(&(info->descr_Y), info->m, nullptr, CUDA_R_64F);
 
  143        status = cusparseDcsrilu02_bufferSize(Util::Intern::cusparse_handle, m, nnz,
 
  144                info->descr_M, csrVal, csrRowPtr, csrColInd, info->info_M, &(info->pBufferSize_M));
 
  145        if (status != CUSPARSE_STATUS_SUCCESS)
 
  146          throw InternalError(__func__, __FILE__, __LINE__, "cusparsecsrilu02_bufferSize failed with status code: " + stringify(status));
 
  147#if CUSPARSE_VER_MAJOR < 12
 
  148        status = cusparseDcsrsv2_bufferSize(Util::Intern::cusparse_handle, info->trans_L, m, nnz,
 
  149            info->descr_L, csrVal, csrRowPtr, csrColInd, info->info_L, &(info->pBufferSize_L));
 
  150        if (status != CUSPARSE_STATUS_SUCCESS)
 
  151          throw InternalError(__func__, __FILE__, __LINE__, "cusparseDcsrsv2_bufferSize failed with status code: " + stringify(status));
 
  153        status = cusparseDcsrsv2_bufferSize(Util::Intern::cusparse_handle, info->trans_U, m, nnz,
 
  154            info->descr_U, csrVal, csrRowPtr, csrColInd, info->info_L, &(info->pBufferSize_U)); //TODO: Error using info_L here?
 
  155        if (status != CUSPARSE_STATUS_SUCCESS)
 
  156          throw InternalError(__func__, __FILE__, __LINE__, "cusparseDcsrsv2_bufferSize failed with status code: " + stringify(status));
 
  158        const double alpha = 1.;
 
  159        status = cusparseSpSV_bufferSize(Util::Intern::cusparse_handle, info->trans_L, &alpha,
 
  160            info->descr_L, NULL /*info->descr_X*/, NULL /*info->descr_Y*/, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, info->info_L, &(info->pBufferSize_L));
 
  161        if (status != CUSPARSE_STATUS_SUCCESS)
 
  162          throw InternalError(__func__, __FILE__, __LINE__, "cusparseSpSV_bufferSize failed with status code: " + stringify(status));
 
  164        status = cusparseSpSV_bufferSize(Util::Intern::cusparse_handle, info->trans_U, &alpha,
 
  165            info->descr_U, NULL /*info->descr_X*/, NULL /*info->descr_Y*/, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, info->info_U, &(info->pBufferSize_U));
 
  166        if (status != CUSPARSE_STATUS_SUCCESS)
 
  167          throw InternalError(__func__, __FILE__, __LINE__, "cusparseSpSV_bufferSize failed with status code: " + stringify(status));
 
  169        info->pBufferSize = max(info->pBufferSize_M, int(max(info->pBufferSize_L, info->pBufferSize_U)));
 
  170        info->pBuffer = Util::cuda_malloc(info->pBufferSize_M);
 
  172        status = cusparseDcsrilu02_analysis(Util::Intern::cusparse_handle, m, nnz, info->descr_M,
 
  173                csrVal, csrRowPtr, csrColInd, info->info_M,
 
  174                    info->policy_M, info->pBuffer);
 
  175        if (status != CUSPARSE_STATUS_SUCCESS)
 
  176          throw InternalError(__func__, __FILE__, __LINE__, "cusparsecsrilu02_analysis failed with status code: " + stringify(status));
 
  177        status = cusparseXcsrilu02_zeroPivot(Util::Intern::cusparse_handle, info->info_M, &(info->structural_zero));
 
  178        if (CUSPARSE_STATUS_ZERO_PIVOT == status)
 
  180          throw InternalError(__func__, __FILE__, __LINE__, "CUSPARSE ZERO PIVOT ERROR!");
 
  182#if CUSPARSE_VER_MAJOR< 12
 
  183        status = cusparseDcsrsv2_analysis(Util::Intern::cusparse_handle, info->trans_L, m, nnz, info->descr_L,
 
  184                csrVal, csrRowPtr, csrColInd, info->info_L, info->policy_L, info->pBuffer);
 
  185        if (status != CUSPARSE_STATUS_SUCCESS)
 
  186          throw InternalError(__func__, __FILE__, __LINE__, "cusparse_csrv_analysis failed with status code: " + stringify(status));
 
  188        status = cusparseDcsrsv2_analysis(Util::Intern::cusparse_handle, info->trans_U, m, nnz, info->descr_U,
 
  189                csrVal, csrRowPtr, csrColInd, info->info_U, info->policy_U, info->pBuffer);
 
  190        if (status != CUSPARSE_STATUS_SUCCESS)
 
  191          throw InternalError(__func__, __FILE__, __LINE__, "cusparse_csrv_analysis failed with status code: " + stringify(status));
 
  193        status = cusparseSpSV_analysis(Util::Intern::cusparse_handle, info->trans_L, &alpha,
 
  194                              info->descr_L, NULL /*info->descr_X*/, NULL /*info->descr_Y*/, CUDA_R_64F,
 
  195                              CUSPARSE_SPSV_ALG_DEFAULT, info->info_L, info->pBuffer);
 
  196        if (status != CUSPARSE_STATUS_SUCCESS)
 
  197          throw InternalError(__func__, __FILE__, __LINE__, "cusparseSpSV_analysis failed with status code: " + stringify(status));
 
  199        status = cusparseSpSV_analysis(Util::Intern::cusparse_handle, info->trans_U, &alpha,
 
  200                              info->descr_U, NULL /*info->descr_X*/, NULL /*info->descr_Y*/, CUDA_R_64F,
 
  201                              CUSPARSE_SPSV_ALG_DEFAULT, info->info_U, info->pBuffer);
 
  202        if (status != CUSPARSE_STATUS_SUCCESS)
 
  203          throw InternalError(__func__, __FILE__, __LINE__, "cusparseSpSV_analysis failed with status code: " + stringify(status));
 
  206        cudaDeviceSynchronize();
 
  207#ifdef FEAT_DEBUG_MODE
 
  208        cudaError_t last_error(cudaGetLastError());
 
  209        if (cudaSuccess != last_error)
 
  210          throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
 
  216      void cuda_ilu_init_numeric(double * csrVal, int * csrRowPtr, int * csrColInd, void * vinfo)
 
  218        CudaIluSolveInfo * info = (CudaIluSolveInfo *) vinfo;
 
  220        cusparseStatus_t status = cusparseDcsrilu02(Util::Intern::cusparse_handle, info->m, info->nnz, info->descr_M,
 
  221                csrVal, csrRowPtr, csrColInd, info->info_M, info->policy_M, info->pBuffer);
 
  222        if (status != CUSPARSE_STATUS_SUCCESS)
 
  223          throw InternalError(__func__, __FILE__, __LINE__, "cusparsecsrilu02 failed with status code: " + stringify(status));
 
  224        status = cusparseXcsrilu02_zeroPivot(Util::Intern::cusparse_handle, info->info_M, &(info->numerical_zero));
 
  225        if (CUSPARSE_STATUS_ZERO_PIVOT == status)
 
  227          throw InternalError(__func__, __FILE__, __LINE__, "CUSPARSE ZERO PIVOT ERROR!");
 
  231      int cuda_ilu_apply(double * y, const double * x, double * csrVal, int * csrRowPtr, int * csrColInd, void * vinfo)
 
  233        CudaIluSolveInfo * info = (CudaIluSolveInfo *) vinfo;
 
  234        const double alpha = 1.;
 
  235#if CUSPARSE_VER_MAJOR < 12
 
  236        cusparseStatus_t status = cusparseDcsrsv2_solve(Util::Intern::cusparse_handle, info->trans_L, info->m, info->nnz, &alpha, info->descr_L,
 
  237               csrVal, csrRowPtr, csrColInd, info->info_L,
 
  238                  x, info->z, info->policy_L, info->pBuffer);
 
  239        if (status != CUSPARSE_STATUS_SUCCESS)
 
  240          throw InternalError(__func__, __FILE__, __LINE__, "cusparseDcsrsv2_solve failed with status code: " + stringify(status));
 
  242        status = cusparseDcsrsv2_solve(Util::Intern::cusparse_handle, info->trans_U, info->m, info->nnz, &alpha, info->descr_U,
 
  243               csrVal, csrRowPtr, csrColInd, info->info_U,
 
  244                  info->z, y, info->policy_U, info->pBuffer);
 
  245        if (status != CUSPARSE_STATUS_SUCCESS)
 
  246          throw InternalError(__func__, __FILE__, __LINE__, "cusparsecsr2_solve failed with status code: " + stringify(status));
 
  251        //we have to create vector handlers to use the vector data in the new api (or shift them into our vector handlers... lets see whats necessary)
 
  252        cusparseConstDnVecDescr_t descr_X;
 
  253        cusparseDnVecDescr_t descr_Y, descr_Z;
 
  254        cusparseCreateConstDnVec(&descr_X, info->m, x, CUDA_R_64F);
 
  255        cusparseCreateDnVec(&descr_Z, info->m, info->z, CUDA_R_64F); //first write into z...
 
  256        cusparseCreateDnVec(&descr_Y, info->m, y, CUDA_R_64F);
 
  257        //now solve first triang system
 
  258        cusparseStatus_t status = cusparseSpSV_solve(Util::Intern::cusparse_handle, info->trans_L, &alpha,
 
  259                           info->descr_L, descr_X, descr_Z, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, info->info_L);
 
  260        if (status != CUSPARSE_STATUS_SUCCESS)
 
  262          //delete vecs descr if someone catches the error
 
  263          cusparseDestroyDnVec(descr_Y);
 
  264          cusparseDestroyDnVec(descr_Z);
 
  265          cusparseDestroyDnVec(descr_X);
 
  266          throw InternalError(__func__, __FILE__, __LINE__, "cusparseSpSV_solve failed with status code: " + stringify(status));
 
  269        status = cusparseSpSV_solve(Util::Intern::cusparse_handle, info->trans_U, &alpha,
 
  270                           info->descr_U, descr_Z, descr_Y, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, info->info_U);
 
  271        if (status != CUSPARSE_STATUS_SUCCESS)
 
  273          cusparseDestroyDnVec(descr_Y);
 
  274          cusparseDestroyDnVec(descr_Z);
 
  275          cusparseDestroyDnVec(descr_X);
 
  276          throw InternalError(__func__, __FILE__, __LINE__, "SECONDcusparseSpSV_solve failed with status code: " + stringify(status));
 
  279        cusparseDestroyDnVec(descr_Y);
 
  280        cusparseDestroyDnVec(descr_Z);
 
  281        cusparseDestroyDnVec(descr_X);
 
  284        cudaDeviceSynchronize();
 
  285#ifdef FEAT_DEBUG_MODE
 
  286        cudaError_t last_error(cudaGetLastError());
 
  287        if (cudaSuccess != last_error)
 
  288          throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
 
  294      void cuda_ilu_done_symbolic(void * vinfo)
 
  296        CudaIluSolveInfo * info = (CudaIluSolveInfo *) vinfo;
 
  298        Util::cuda_free(info->z);
 
  299        Util::cuda_free(info->pBuffer);
 
  300        cusparseDestroyMatDescr(info->descr_M);
 
  301#if CUSPARSE_VER_MAJOR < 12
 
  302        cusparseDestroyMatDescr(info->descr_L);
 
  303        cusparseDestroyMatDescr(info->descr_U);
 
  305        cusparseDestroySpMat(info->descr_L);
 
  306        cusparseDestroySpMat(info->descr_U);
 
  308        cusparseDestroyCsrilu02Info(info->info_M);
 
  309#if CUSPARSE_VER_MAJOR < 12
 
  310        cusparseDestroyCsrsv2Info(info->info_L);
 
  311        cusparseDestroyCsrsv2Info(info->info_U);
 
  313        cusparseSpSV_destroyDescr(info->info_L);
 
  314        cusparseSpSV_destroyDescr(info->info_L);
 
  321      struct CudaIluBSolveInfo
 
  323        cusparseMatDescr_t descr_M;
 
  324        cusparseMatDescr_t descr_L;
 
  325        cusparseMatDescr_t descr_U;
 
  326        bsrilu02Info_t info_M;
 
  329        cusparseOperation_t trans_L;
 
  330        cusparseOperation_t trans_U;
 
  331        cusparseDirection_t dir;
 
  332        cusparseSolvePolicy_t policy_M;
 
  333        cusparseSolvePolicy_t policy_L;
 
  334        cusparseSolvePolicy_t policy_U;
 
  342      void * cuda_ilub_init_symbolic(int m, int nnz, double * csrVal, int * csrRowPtr, int * csrColInd, const int blocksize)
 
  344        double * z = (double*)Util::cuda_malloc(m * blocksize * sizeof(double));
 
  346        cusparseMatDescr_t descr_M = 0;
 
  347        cusparseMatDescr_t descr_L = 0;
 
  348        cusparseMatDescr_t descr_U = 0;
 
  349        bsrilu02Info_t info_M  = 0;
 
  350        bsrsv2Info_t  info_L  = 0;
 
  351        bsrsv2Info_t  info_U  = 0;
 
  358        const cusparseSolvePolicy_t policy_M = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
 
  359        const cusparseSolvePolicy_t policy_L = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
 
  360        const cusparseSolvePolicy_t policy_U = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
 
  361        const cusparseOperation_t trans_L  = CUSPARSE_OPERATION_NON_TRANSPOSE;
 
  362        const cusparseOperation_t trans_U  = CUSPARSE_OPERATION_NON_TRANSPOSE;
 
  363        const cusparseDirection_t dir = CUSPARSE_DIRECTION_ROW;
 
  365        cusparseStatus_t status;
 
  367        cusparseCreateMatDescr(&descr_M);
 
  368        cusparseSetMatIndexBase(descr_M, CUSPARSE_INDEX_BASE_ZERO);
 
  369        cusparseSetMatType(descr_M, CUSPARSE_MATRIX_TYPE_GENERAL);
 
  371        cusparseCreateMatDescr(&descr_L);
 
  372        cusparseSetMatIndexBase(descr_L, CUSPARSE_INDEX_BASE_ZERO);
 
  373        cusparseSetMatType(descr_L, CUSPARSE_MATRIX_TYPE_GENERAL);
 
  374        cusparseSetMatFillMode(descr_L, CUSPARSE_FILL_MODE_LOWER);
 
  375        cusparseSetMatDiagType(descr_L, CUSPARSE_DIAG_TYPE_UNIT);
 
  377        cusparseCreateMatDescr(&descr_U);
 
  378        cusparseSetMatIndexBase(descr_U, CUSPARSE_INDEX_BASE_ZERO);
 
  379        cusparseSetMatType(descr_U, CUSPARSE_MATRIX_TYPE_GENERAL);
 
  380        cusparseSetMatFillMode(descr_U, CUSPARSE_FILL_MODE_UPPER);
 
  381        cusparseSetMatDiagType(descr_U, CUSPARSE_DIAG_TYPE_NON_UNIT);
 
  383        cusparseCreateBsrilu02Info(&info_M);
 
  384        cusparseCreateBsrsv2Info(&info_L);
 
  385        cusparseCreateBsrsv2Info(&info_U);
 
  387        cusparseDbsrilu02_bufferSize(Util::Intern::cusparse_handle, dir, m, nnz,
 
  388                descr_M, csrVal, csrRowPtr, csrColInd, blocksize, info_M, &pBufferSize_M);
 
  389        cusparseDbsrsv2_bufferSize(Util::Intern::cusparse_handle, dir, trans_L, m, nnz,
 
  390                descr_L, csrVal, csrRowPtr, csrColInd, blocksize, info_L, &pBufferSize_L);
 
  391        cusparseDbsrsv2_bufferSize(Util::Intern::cusparse_handle, dir, trans_U, m, nnz,
 
  392                descr_U, csrVal, csrRowPtr, csrColInd, blocksize, info_U, &pBufferSize_U);
 
  394        pBufferSize = max(pBufferSize_M, max(pBufferSize_L, pBufferSize_U));
 
  396        pBuffer = Util::cuda_malloc(pBufferSize);
 
  398        status = cusparseDbsrilu02_analysis(Util::Intern::cusparse_handle, dir, m, nnz, descr_M,
 
  399                csrVal, csrRowPtr, csrColInd, blocksize, info_M,
 
  401        if (status != CUSPARSE_STATUS_SUCCESS)
 
  402          throw InternalError(__func__, __FILE__, __LINE__, "cusparsebsrilu02 failed with status code: " + stringify(status));
 
  403        status = cusparseXbsrilu02_zeroPivot(Util::Intern::cusparse_handle, info_M, &structural_zero);
 
  404        if (CUSPARSE_STATUS_ZERO_PIVOT == status)
 
  406          throw InternalError(__func__, __FILE__, __LINE__, "CUSPARSE ZERO PIVOT ERROR!");
 
  409        status = cusparseDbsrsv2_analysis(Util::Intern::cusparse_handle, dir, trans_L, m, nnz, descr_L,
 
  410                csrVal, csrRowPtr, csrColInd, blocksize, info_L, policy_L, pBuffer);
 
  411        if (status != CUSPARSE_STATUS_SUCCESS)
 
  412          throw InternalError(__func__, __FILE__, __LINE__, "cusparsebsrv2_analysis failed with status code: " + stringify(status));
 
  414        status = cusparseDbsrsv2_analysis(Util::Intern::cusparse_handle, dir, trans_U, m, nnz, descr_U,
 
  415                csrVal, csrRowPtr, csrColInd, blocksize, info_U, policy_U, pBuffer);
 
  416        if (status != CUSPARSE_STATUS_SUCCESS)
 
  417          throw InternalError(__func__, __FILE__, __LINE__, "cusparsebsrv2_analysis failed with status code: " + stringify(status));
 
  419        cudaDeviceSynchronize();
 
  420#ifdef FEAT_DEBUG_MODE
 
  421        cudaError_t last_error(cudaGetLastError());
 
  422        if (cudaSuccess != last_error)
 
  423          throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
 
  426        CudaIluBSolveInfo * info = new CudaIluBSolveInfo;
 
  427        info->descr_M = descr_M;
 
  428        info->descr_L = descr_L;
 
  429        info->descr_U = descr_U;
 
  430        info->info_M  = info_M;
 
  431        info->info_L  = info_L;
 
  432        info->info_U  = info_U;
 
  433        info->trans_L = trans_L;
 
  434        info->trans_U = trans_U;
 
  436        info->policy_M = policy_M;
 
  437        info->policy_L = policy_L;
 
  438        info->policy_U = policy_U;
 
  439        info->pBuffer = pBuffer;
 
  443        info->blocksize = blocksize;
 
  448      void cuda_ilub_init_numeric(double * csrVal, int * csrRowPtr, int * csrColInd, void * vinfo)
 
  450        CudaIluBSolveInfo * info = (CudaIluBSolveInfo *) vinfo;
 
  452        cusparseStatus_t status = cusparseDbsrilu02(Util::Intern::cusparse_handle, info->dir, info->m, info->nnz, info->descr_M,
 
  453                csrVal, csrRowPtr, csrColInd, info->blocksize, info->info_M, info->policy_M, info->pBuffer);
 
  454        if (status != CUSPARSE_STATUS_SUCCESS)
 
  455          throw InternalError(__func__, __FILE__, __LINE__, "cusparsebsrilu02 failed with status code: " + stringify(status));
 
  457        status = cusparseXbsrilu02_zeroPivot(Util::Intern::cusparse_handle, info->info_M, &numerical_zero);
 
  458        if (CUSPARSE_STATUS_ZERO_PIVOT == status)
 
  460          throw InternalError(__func__, __FILE__, __LINE__, "CUSPARSE ZERO PIVOT ERROR!");
 
  464      int cuda_ilub_apply(double * y, const double * x, double * csrVal, int * csrRowPtr, int * csrColInd, void * vinfo)
 
  466        CudaIluBSolveInfo * info = (CudaIluBSolveInfo *) vinfo;
 
  467        const double alpha = 1.;
 
  469        cusparseStatus_t status = cusparseDbsrsv2_solve(Util::Intern::cusparse_handle, info->dir, info->trans_L, info->m, info->nnz, &alpha, info->descr_L,
 
  470               csrVal, csrRowPtr, csrColInd, info->blocksize, info->info_L,
 
  471                  x, info->z, info->policy_L, info->pBuffer);
 
  472        if (status != CUSPARSE_STATUS_SUCCESS)
 
  473          throw InternalError(__func__, __FILE__, __LINE__, "cusparsebsrsv2_solve failed with status code: " + stringify(status));
 
  475        status = cusparseDbsrsv2_solve(Util::Intern::cusparse_handle, info->dir, info->trans_U, info->m, info->nnz, &alpha, info->descr_U,
 
  476               csrVal, csrRowPtr, csrColInd, info->blocksize, info->info_U,
 
  477                  info->z, y, info->policy_U, info->pBuffer);
 
  478        if (status != CUSPARSE_STATUS_SUCCESS)
 
  479          throw InternalError(__func__, __FILE__, __LINE__, "cusparsebsrsv2_solve failed with status code: " + stringify(status));
 
  481        cudaDeviceSynchronize();
 
  482#ifdef FEAT_DEBUG_MODE
 
  483        cudaError_t last_error(cudaGetLastError());
 
  484        if (cudaSuccess != last_error)
 
  485          throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
 
  491      void cuda_ilub_done_symbolic(void * vinfo)
 
  493        CudaIluBSolveInfo * info = (CudaIluBSolveInfo *) vinfo;
 
  495        Util::cuda_free(info->z);
 
  496        Util::cuda_free(info->pBuffer);
 
  497        cusparseDestroyMatDescr(info->descr_M);
 
  498        cusparseDestroyMatDescr(info->descr_L);
 
  499        cusparseDestroyMatDescr(info->descr_U);
 
  500        cusparseDestroyBsrilu02Info(info->info_M);
 
  501        cusparseDestroyBsrsv2Info(info->info_L);
 
  502        cusparseDestroyBsrsv2Info(info->info_U);
 
  506    } // namespace Intern
 
  508  } // namespace Solver