1// FEAT3: Finite Element Analysis Toolbox, Version 3
 
    2// Copyright (C) 2010 by Stefan Turek & the FEAT group
 
    3// FEAT3 is released under the GNU General Public License version 3,
 
    4// see the file 'copyright.txt' in the top level directory for details.
 
    7#include <kernel/base_header.hpp>
 
    8#include <kernel/lafem/arch/mirror.hpp>
 
    9#include <kernel/util/exception.hpp>
 
   10#include <kernel/util/memory_pool.hpp>
 
   19      template<typename DT_, typename IT_>
 
   20      __global__ void cuda_mirror_gather_dv(const Index boff, const Index nidx, const IT_* idx, DT_* buf, const DT_* vec)
 
   22        Index ii = threadIdx.x + blockDim.x * blockIdx.x;
 
   26        buf[boff+ii] = vec[idx[ii]];
 
   29      template<typename DT_, typename IT_>
 
   30      __global__ void cuda_mirror_scatter_dv(const Index boff, const Index nidx, const IT_* idx, const DT_* buf, DT_* vec, const DT_ alpha)
 
   32        Index ii = threadIdx.x + blockDim.x * blockIdx.x;
 
   36        vec[idx[ii]] += alpha*buf[boff+ii];
 
   39      template<typename DT_, typename IT_>
 
   40      __global__ void cuda_mirror_gather_dvb(const Index bs, const Index boff, const Index nidx, const IT_* idx, DT_* buf, const DT_* vec)
 
   42        Index ii = threadIdx.x + blockDim.x * blockIdx.x;
 
   46        for(Index k(0); k < bs; ++k)
 
   48          buf[boff+ii*bs+k] = vec[idx[ii]*bs+k];
 
   52      template<typename DT_, typename IT_>
 
   53      __global__ void cuda_mirror_scatter_dvb(const Index bs, const Index boff, const Index nidx, const IT_* idx, const DT_* buf, DT_* vec, const DT_ alpha)
 
   55        Index ii = threadIdx.x + blockDim.x * blockIdx.x;
 
   59        for(Index k(0); k < bs; ++k)
 
   61          vec[idx[ii]*bs+k] += alpha*buf[boff+ii*bs+k];
 
   68      template<typename DT_, typename IT_>
 
   69      void Mirror::gather_dv_cuda(const Index boff, const Index nidx, const IT_* idx, DT_* buf, const DT_* vec)
 
   71        Index blocksize = Util::cuda_blocksize_misc;
 
   74        block.x = (unsigned)blocksize;
 
   75        grid.x = (unsigned)ceil((nidx)/(double)(block.x));
 
   77        FEAT::LAFEM::Intern::cuda_mirror_gather_dv<<<grid, block>>>(boff, nidx, idx, buf, vec);
 
   79        cudaDeviceSynchronize();
 
   81        cudaError_t last_error(cudaGetLastError());
 
   82        if (cudaSuccess != last_error)
 
   83          throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
 
   87      template void Mirror::gather_dv_cuda(const Index, const Index, const std::uint64_t*, float*, const float*);
 
   88      template void Mirror::gather_dv_cuda(const Index, const Index, const std::uint64_t*, double*, const double*);
 
   89      template void Mirror::gather_dv_cuda(const Index, const Index, const std::uint32_t*, float*, const float*);
 
   90      template void Mirror::gather_dv_cuda(const Index, const Index, const std::uint32_t*, double*, const double*);
 
   92      template<typename DT_, typename IT_>
 
   93      void Mirror::scatter_dv_cuda(const Index boff, const Index nidx, const IT_* idx, const DT_* buf, DT_* vec, const DT_ alpha)
 
   95        Index blocksize = Util::cuda_blocksize_misc;
 
   98        block.x = (unsigned)blocksize;
 
   99        grid.x = (unsigned)ceil((nidx)/(double)(block.x));
 
  101        FEAT::LAFEM::Intern::cuda_mirror_scatter_dv<<<grid, block>>>(boff, nidx, idx, buf, vec, alpha);
 
  103        cudaDeviceSynchronize();
 
  104#ifdef FEAT_DEBUG_MODE
 
  105        cudaError_t last_error(cudaGetLastError());
 
  106        if (cudaSuccess != last_error)
 
  107          throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
 
  111      template void Mirror::scatter_dv_cuda(const Index, const Index, const std::uint64_t*, const float*, float*, float);
 
  112      template void Mirror::scatter_dv_cuda(const Index, const Index, const std::uint64_t*, const double*, double*, double);
 
  113      template void Mirror::scatter_dv_cuda(const Index, const Index, const std::uint32_t*, const float*, float*, float);
 
  114      template void Mirror::scatter_dv_cuda(const Index, const Index, const std::uint32_t*, const double*, double*, double);
 
  116      template<typename DT_, typename IT_>
 
  117      void Mirror::gather_dvb_cuda(const Index bs, const Index boff, const Index nidx, const IT_* idx, DT_* buf, const DT_* vec)
 
  119        Index blocksize = Util::cuda_blocksize_misc;
 
  122        block.x = (unsigned)blocksize;
 
  123        grid.x = (unsigned)ceil((nidx)/(double)(block.x));
 
  125        FEAT::LAFEM::Intern::cuda_mirror_gather_dvb<<<grid, block>>>(bs, boff, nidx, idx, buf, vec);
 
  127        cudaDeviceSynchronize();
 
  128#ifdef FEAT_DEBUG_MODE
 
  129        cudaError_t last_error(cudaGetLastError());
 
  130        if (cudaSuccess != last_error)
 
  131          throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
 
  135      template void Mirror::gather_dvb_cuda(const Index, const Index, const Index, const std::uint64_t*, float*, const float*);
 
  136      template void Mirror::gather_dvb_cuda(const Index, const Index, const Index, const std::uint64_t*, double*, const double*);
 
  137      template void Mirror::gather_dvb_cuda(const Index, const Index, const Index, const std::uint32_t*, float*, const float*);
 
  138      template void Mirror::gather_dvb_cuda(const Index, const Index, const Index, const std::uint32_t*, double*, const double*);
 
  140      template<typename DT_, typename IT_>
 
  141      void Mirror::scatter_dvb_cuda(const Index bs, const Index boff, const Index nidx, const IT_* idx, const DT_* buf, DT_* vec, const DT_ alpha)
 
  143        Index blocksize = Util::cuda_blocksize_misc;
 
  146        block.x = (unsigned)blocksize;
 
  147        grid.x = (unsigned)ceil((nidx)/(double)(block.x));
 
  149        FEAT::LAFEM::Intern::cuda_mirror_scatter_dvb<<<grid, block>>>(bs, boff, nidx, idx, buf, vec, alpha);
 
  151        cudaDeviceSynchronize();
 
  152#ifdef FEAT_DEBUG_MODE
 
  153        cudaError_t last_error(cudaGetLastError());
 
  154        if (cudaSuccess != last_error)
 
  155          throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
 
  159      template void Mirror::scatter_dvb_cuda(const Index, const Index, const Index, const std::uint64_t*, const float*, float*, float);
 
  160      template void Mirror::scatter_dvb_cuda(const Index, const Index, const Index, const std::uint64_t*, const double*, double*, double);
 
  161      template void Mirror::scatter_dvb_cuda(const Index, const Index, const Index, const std::uint32_t*, const float*, float*, float);
 
  162      template void Mirror::scatter_dvb_cuda(const Index, const Index, const Index, const std::uint32_t*, const double*, double*, double);