1// FEAT3: Finite Element Analysis Toolbox, Version 3
 
    2// Copyright (C) 2010 by Stefan Turek & the FEAT group
 
    3// FEAT3 is released under the GNU General Public License version 3,
 
    4// see the file 'copyright.txt' in the top level directory for details.
 
    7#include <kernel/base_header.hpp>
 
    8#include <kernel/lafem/arch/slip_filter.hpp>
 
    9#include <kernel/util/exception.hpp>
 
   10#include <kernel/util/memory_pool.hpp>
 
   19      template <int BlockSize_, typename DT_, typename IT_>
 
   20      __global__ void cuda_slip_filter_rhs(DT_ * v, const DT_ * sv_elements, const IT_ * sv_indices, const Index ue)
 
   22        Index idx = threadIdx.x + blockDim.x * blockIdx.x;
 
   26        Index block_size = Index(BlockSize_);
 
   30        for(Index j(0) ; j < block_size; ++j)
 
   32          sp += v[block_size* sv_indices[idx] + j]*sv_elements[block_size * idx + j];
 
   33          scal += sv_elements[block_size * idx + j]*sv_elements[block_size * idx + j];
 
   38        for(Index j(0) ; j < block_size; ++j)
 
   39          v[block_size* sv_indices[idx] + j] -= sp*sv_elements[block_size * idx + j];
 
   42      template <int BlockSize_, typename DT_, typename IT_>
 
   43      __global__ void cuda_slip_filter_def(DT_ * v, const DT_ * sv_elements, const IT_ * sv_indices, const Index ue)
 
   45        Index idx = threadIdx.x + blockDim.x * blockIdx.x;
 
   49        Index block_size = Index(BlockSize_);
 
   53        for(Index j(0) ; j < block_size; ++j)
 
   55          sp += v[block_size* sv_indices[idx] + j]*sv_elements[block_size * idx + j];
 
   56          scal += sv_elements[block_size * idx + j]*sv_elements[block_size * idx + j];
 
   61        for(Index j(0) ; j < block_size; ++j)
 
   62          v[block_size* sv_indices[idx] + j] -= sp*sv_elements[block_size * idx + j];
 
   70using namespace FEAT::LAFEM;
 
   71using namespace FEAT::LAFEM::Arch;
 
   73template <int BlockSize_, typename DT_, typename IT_>
 
   74void SlipFilter::filter_rhs_cuda(DT_ * v, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue)
 
   76  Index blocksize = Util::cuda_blocksize_misc;
 
   79  block.x = (unsigned)blocksize;
 
   80  grid.x = (unsigned)ceil((ue)/(double)(block.x));
 
   82  FEAT::LAFEM::Intern::cuda_slip_filter_rhs<BlockSize_, DT_, IT_><<<grid, block>>>(v, sv_elements, sv_indices, ue);
 
   84  cudaDeviceSynchronize();
 
   86  cudaError_t last_error(cudaGetLastError());
 
   87  if (cudaSuccess != last_error)
 
   88    throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
 
   92template void SlipFilter::filter_rhs_cuda<2, float, std::uint64_t>(float *, const float * const, const std::uint64_t * const, const Index);
 
   93template void SlipFilter::filter_rhs_cuda<2, double, std::uint64_t>(double *, const double * const, const std::uint64_t * const, const Index);
 
   94template void SlipFilter::filter_rhs_cuda<2, float, std::uint32_t>(float *, const float * const, const std::uint32_t * const, const Index);
 
   95template void SlipFilter::filter_rhs_cuda<2, double, std::uint32_t>(double *, const double * const, const std::uint32_t * const, const Index);
 
   96template void SlipFilter::filter_rhs_cuda<3, float, std::uint64_t>(float *, const float * const, const std::uint64_t * const, const Index);
 
   97template void SlipFilter::filter_rhs_cuda<3, double, std::uint64_t>(double *, const double * const, const std::uint64_t * const, const Index);
 
   98template void SlipFilter::filter_rhs_cuda<3, float, std::uint32_t>(float *, const float * const, const std::uint32_t * const, const Index);
 
   99template void SlipFilter::filter_rhs_cuda<3, double, std::uint32_t>(double *, const double * const, const std::uint32_t * const, const Index);
 
  101template <int BlockSize_, typename DT_, typename IT_>
 
  102void SlipFilter::filter_def_cuda(DT_ * v, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue)
 
  104  Index blocksize = Util::cuda_blocksize_misc;
 
  107  block.x = (unsigned)blocksize;
 
  108  grid.x = (unsigned)ceil((ue)/(double)(block.x));
 
  110  FEAT::LAFEM::Intern::cuda_slip_filter_def<BlockSize_, DT_, IT_><<<grid, block>>>(v, sv_elements, sv_indices, ue);
 
  112  cudaDeviceSynchronize();
 
  113#ifdef FEAT_DEBUG_MODE
 
  114  cudaError_t last_error(cudaGetLastError());
 
  115  if (cudaSuccess != last_error)
 
  116    throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
 
  120template void SlipFilter::filter_def_cuda<2, float, std::uint64_t>(float *, const float * const, const std::uint64_t * const, const Index);
 
  121template void SlipFilter::filter_def_cuda<2, double, std::uint64_t>(double *, const double * const, const std::uint64_t * const, const Index);
 
  122template void SlipFilter::filter_def_cuda<2, float, std::uint32_t>(float *, const float * const, const std::uint32_t * const, const Index);
 
  123template void SlipFilter::filter_def_cuda<2, double, std::uint32_t>(double *, const double * const, const std::uint32_t * const, const Index);
 
  124template void SlipFilter::filter_def_cuda<3, float, std::uint64_t>(float *, const float * const, const std::uint64_t * const, const Index);
 
  125template void SlipFilter::filter_def_cuda<3, double, std::uint64_t>(double *, const double * const, const std::uint64_t * const, const Index);
 
  126template void SlipFilter::filter_def_cuda<3, float, std::uint32_t>(float *, const float * const, const std::uint32_t * const, const Index);
 
  127template void SlipFilter::filter_def_cuda<3, double, std::uint32_t>(double *, const double * const, const std::uint32_t * const, const Index);