1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
7#include <kernel/base_header.hpp>
8#include <kernel/lafem/arch/slip_filter.hpp>
9#include <kernel/util/exception.hpp>
10#include <kernel/util/memory_pool.hpp>
19 template <int BlockSize_, typename DT_, typename IT_>
20 __global__ void cuda_slip_filter_rhs(DT_ * v, const DT_ * sv_elements, const IT_ * sv_indices, const Index ue)
22 Index idx = threadIdx.x + blockDim.x * blockIdx.x;
26 Index block_size = Index(BlockSize_);
30 for(Index j(0) ; j < block_size; ++j)
32 sp += v[block_size* sv_indices[idx] + j]*sv_elements[block_size * idx + j];
33 scal += sv_elements[block_size * idx + j]*sv_elements[block_size * idx + j];
38 for(Index j(0) ; j < block_size; ++j)
39 v[block_size* sv_indices[idx] + j] -= sp*sv_elements[block_size * idx + j];
42 template <int BlockSize_, typename DT_, typename IT_>
43 __global__ void cuda_slip_filter_def(DT_ * v, const DT_ * sv_elements, const IT_ * sv_indices, const Index ue)
45 Index idx = threadIdx.x + blockDim.x * blockIdx.x;
49 Index block_size = Index(BlockSize_);
53 for(Index j(0) ; j < block_size; ++j)
55 sp += v[block_size* sv_indices[idx] + j]*sv_elements[block_size * idx + j];
56 scal += sv_elements[block_size * idx + j]*sv_elements[block_size * idx + j];
61 for(Index j(0) ; j < block_size; ++j)
62 v[block_size* sv_indices[idx] + j] -= sp*sv_elements[block_size * idx + j];
70using namespace FEAT::LAFEM;
71using namespace FEAT::LAFEM::Arch;
73template <int BlockSize_, typename DT_, typename IT_>
74void SlipFilter::filter_rhs_cuda(DT_ * v, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue)
76 Index blocksize = Util::cuda_blocksize_misc;
79 block.x = (unsigned)blocksize;
80 grid.x = (unsigned)ceil((ue)/(double)(block.x));
82 FEAT::LAFEM::Intern::cuda_slip_filter_rhs<BlockSize_, DT_, IT_><<<grid, block>>>(v, sv_elements, sv_indices, ue);
84 cudaDeviceSynchronize();
86 cudaError_t last_error(cudaGetLastError());
87 if (cudaSuccess != last_error)
88 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
92template void SlipFilter::filter_rhs_cuda<2, float, std::uint64_t>(float *, const float * const, const std::uint64_t * const, const Index);
93template void SlipFilter::filter_rhs_cuda<2, double, std::uint64_t>(double *, const double * const, const std::uint64_t * const, const Index);
94template void SlipFilter::filter_rhs_cuda<2, float, std::uint32_t>(float *, const float * const, const std::uint32_t * const, const Index);
95template void SlipFilter::filter_rhs_cuda<2, double, std::uint32_t>(double *, const double * const, const std::uint32_t * const, const Index);
96template void SlipFilter::filter_rhs_cuda<3, float, std::uint64_t>(float *, const float * const, const std::uint64_t * const, const Index);
97template void SlipFilter::filter_rhs_cuda<3, double, std::uint64_t>(double *, const double * const, const std::uint64_t * const, const Index);
98template void SlipFilter::filter_rhs_cuda<3, float, std::uint32_t>(float *, const float * const, const std::uint32_t * const, const Index);
99template void SlipFilter::filter_rhs_cuda<3, double, std::uint32_t>(double *, const double * const, const std::uint32_t * const, const Index);
101template <int BlockSize_, typename DT_, typename IT_>
102void SlipFilter::filter_def_cuda(DT_ * v, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue)
104 Index blocksize = Util::cuda_blocksize_misc;
107 block.x = (unsigned)blocksize;
108 grid.x = (unsigned)ceil((ue)/(double)(block.x));
110 FEAT::LAFEM::Intern::cuda_slip_filter_def<BlockSize_, DT_, IT_><<<grid, block>>>(v, sv_elements, sv_indices, ue);
112 cudaDeviceSynchronize();
113#ifdef FEAT_DEBUG_MODE
114 cudaError_t last_error(cudaGetLastError());
115 if (cudaSuccess != last_error)
116 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
120template void SlipFilter::filter_def_cuda<2, float, std::uint64_t>(float *, const float * const, const std::uint64_t * const, const Index);
121template void SlipFilter::filter_def_cuda<2, double, std::uint64_t>(double *, const double * const, const std::uint64_t * const, const Index);
122template void SlipFilter::filter_def_cuda<2, float, std::uint32_t>(float *, const float * const, const std::uint32_t * const, const Index);
123template void SlipFilter::filter_def_cuda<2, double, std::uint32_t>(double *, const double * const, const std::uint32_t * const, const Index);
124template void SlipFilter::filter_def_cuda<3, float, std::uint64_t>(float *, const float * const, const std::uint64_t * const, const Index);
125template void SlipFilter::filter_def_cuda<3, double, std::uint64_t>(double *, const double * const, const std::uint64_t * const, const Index);
126template void SlipFilter::filter_def_cuda<3, float, std::uint32_t>(float *, const float * const, const std::uint32_t * const, const Index);
127template void SlipFilter::filter_def_cuda<3, double, std::uint32_t>(double *, const double * const, const std::uint32_t * const, const Index);