FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
slip_filter.cu
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6// includes, FEAT
7#include <kernel/base_header.hpp>
8#include <kernel/lafem/arch/slip_filter.hpp>
9#include <kernel/util/exception.hpp>
10#include <kernel/util/memory_pool.hpp>
11
12/// \cond internal
13namespace FEAT
14{
15 namespace LAFEM
16 {
17 namespace Intern
18 {
19 template <int BlockSize_, typename DT_, typename IT_>
20 __global__ void cuda_slip_filter_rhs(DT_ * v, const DT_ * sv_elements, const IT_ * sv_indices, const Index ue)
21 {
22 Index idx = threadIdx.x + blockDim.x * blockIdx.x;
23 if (idx >= ue)
24 return;
25
26 Index block_size = Index(BlockSize_);
27 DT_ sp(DT_(0));
28 DT_ scal(DT_(0));
29
30 for(Index j(0) ; j < block_size; ++j)
31 {
32 sp += v[block_size* sv_indices[idx] + j]*sv_elements[block_size * idx + j];
33 scal += sv_elements[block_size * idx + j]*sv_elements[block_size * idx + j];
34 }
35
36 sp /= scal;
37
38 for(Index j(0) ; j < block_size; ++j)
39 v[block_size* sv_indices[idx] + j] -= sp*sv_elements[block_size * idx + j];
40 }
41
42 template <int BlockSize_, typename DT_, typename IT_>
43 __global__ void cuda_slip_filter_def(DT_ * v, const DT_ * sv_elements, const IT_ * sv_indices, const Index ue)
44 {
45 Index idx = threadIdx.x + blockDim.x * blockIdx.x;
46 if (idx >= ue)
47 return;
48
49 Index block_size = Index(BlockSize_);
50 DT_ sp(DT_(0));
51 DT_ scal(DT_(0));
52
53 for(Index j(0) ; j < block_size; ++j)
54 {
55 sp += v[block_size* sv_indices[idx] + j]*sv_elements[block_size * idx + j];
56 scal += sv_elements[block_size * idx + j]*sv_elements[block_size * idx + j];
57 }
58
59 sp /= scal;
60
61 for(Index j(0) ; j < block_size; ++j)
62 v[block_size* sv_indices[idx] + j] -= sp*sv_elements[block_size * idx + j];
63 }
64
65 } // namespace Intern
66 } // namespace LAFEM
67} // namespace FEAT
68
69using namespace FEAT;
70using namespace FEAT::LAFEM;
71using namespace FEAT::LAFEM::Arch;
72
73template <int BlockSize_, typename DT_, typename IT_>
74void SlipFilter::filter_rhs_cuda(DT_ * v, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue)
75{
76 Index blocksize = Util::cuda_blocksize_misc;
77 dim3 grid;
78 dim3 block;
79 block.x = (unsigned)blocksize;
80 grid.x = (unsigned)ceil((ue)/(double)(block.x));
81
82 FEAT::LAFEM::Intern::cuda_slip_filter_rhs<BlockSize_, DT_, IT_><<<grid, block>>>(v, sv_elements, sv_indices, ue);
83
84 cudaDeviceSynchronize();
85#ifdef FEAT_DEBUG_MODE
86 cudaError_t last_error(cudaGetLastError());
87 if (cudaSuccess != last_error)
88 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
89#endif
90}
91
92template void SlipFilter::filter_rhs_cuda<2, float, std::uint64_t>(float *, const float * const, const std::uint64_t * const, const Index);
93template void SlipFilter::filter_rhs_cuda<2, double, std::uint64_t>(double *, const double * const, const std::uint64_t * const, const Index);
94template void SlipFilter::filter_rhs_cuda<2, float, std::uint32_t>(float *, const float * const, const std::uint32_t * const, const Index);
95template void SlipFilter::filter_rhs_cuda<2, double, std::uint32_t>(double *, const double * const, const std::uint32_t * const, const Index);
96template void SlipFilter::filter_rhs_cuda<3, float, std::uint64_t>(float *, const float * const, const std::uint64_t * const, const Index);
97template void SlipFilter::filter_rhs_cuda<3, double, std::uint64_t>(double *, const double * const, const std::uint64_t * const, const Index);
98template void SlipFilter::filter_rhs_cuda<3, float, std::uint32_t>(float *, const float * const, const std::uint32_t * const, const Index);
99template void SlipFilter::filter_rhs_cuda<3, double, std::uint32_t>(double *, const double * const, const std::uint32_t * const, const Index);
100
101template <int BlockSize_, typename DT_, typename IT_>
102void SlipFilter::filter_def_cuda(DT_ * v, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue)
103{
104 Index blocksize = Util::cuda_blocksize_misc;
105 dim3 grid;
106 dim3 block;
107 block.x = (unsigned)blocksize;
108 grid.x = (unsigned)ceil((ue)/(double)(block.x));
109
110 FEAT::LAFEM::Intern::cuda_slip_filter_def<BlockSize_, DT_, IT_><<<grid, block>>>(v, sv_elements, sv_indices, ue);
111
112 cudaDeviceSynchronize();
113#ifdef FEAT_DEBUG_MODE
114 cudaError_t last_error(cudaGetLastError());
115 if (cudaSuccess != last_error)
116 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
117#endif
118}
119
120template void SlipFilter::filter_def_cuda<2, float, std::uint64_t>(float *, const float * const, const std::uint64_t * const, const Index);
121template void SlipFilter::filter_def_cuda<2, double, std::uint64_t>(double *, const double * const, const std::uint64_t * const, const Index);
122template void SlipFilter::filter_def_cuda<2, float, std::uint32_t>(float *, const float * const, const std::uint32_t * const, const Index);
123template void SlipFilter::filter_def_cuda<2, double, std::uint32_t>(double *, const double * const, const std::uint32_t * const, const Index);
124template void SlipFilter::filter_def_cuda<3, float, std::uint64_t>(float *, const float * const, const std::uint64_t * const, const Index);
125template void SlipFilter::filter_def_cuda<3, double, std::uint64_t>(double *, const double * const, const std::uint64_t * const, const Index);
126template void SlipFilter::filter_def_cuda<3, float, std::uint32_t>(float *, const float * const, const std::uint32_t * const, const Index);
127template void SlipFilter::filter_def_cuda<3, double, std::uint32_t>(double *, const double * const, const std::uint32_t * const, const Index);
128
129/// \endcond