FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
unit_filter.cu
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6// includes, FEAT
7#include <kernel/base_header.hpp>
8#include <kernel/lafem/arch/unit_filter.hpp>
9#include <kernel/util/exception.hpp>
10#include <kernel/util/memory_pool.hpp>
11
12/// \cond internal
13namespace FEAT
14{
15 namespace LAFEM
16 {
17 namespace Intern
18 {
19 template <typename DT_, typename IT_>
20 __global__ void cuda_unit_filter_rhs(DT_ * v, const DT_ * sv_elements, const IT_ * sv_indices, const Index ue)
21 {
22 Index idx = threadIdx.x + blockDim.x * blockIdx.x;
23 // grid strided for loop
24 for(int i = idx; idx < ue; idx += blockDim.x * gridDim.x)
25 {
26 v[sv_indices[i]] = sv_elements[i];
27 }
28 }
29
30 template <typename DT_, typename IT_>
31 __global__ void cuda_unit_filter_def(DT_ * v, const IT_ * sv_indices, const Index ue)
32 {
33 Index idx = threadIdx.x + blockDim.x * blockIdx.x;
34 // grid strided for loop
35 for(int i = idx; idx < ue; idx += blockDim.x * gridDim.x)
36 {
37 v[sv_indices[i]] = DT_(0);
38 }
39 }
40
41 template<typename DT_, typename IT_>
42 __global__ void cuda_unit_filter_mat(DT_* __restrict__ mat, const IT_* __restrict__ const row_ptr, const IT_* __restrict__ const col_idx,
43 const IT_ * __restrict__ const sv_indices, const Index ue)
44 {
45 int idx = threadIdx.x + blockDim.x * blockIdx.x;
46 // grid strided for loop
47 for(int i = idx; idx < ue; idx += blockDim.x * gridDim.x)
48 {
49 const IT_ ix(sv_indices[i]);
50
51 // replace by unit row
52 for(IT_ j(row_ptr[ix]); j < row_ptr[ix + 1]; ++j)
53 {
54 mat[j] = (col_idx[j] == ix) ? DT_(1) : DT_(0);
55 }
56 }
57 }
58
59 template<typename DT_, typename IT_>
60 __global__ void cuda_unit_filter_offdiag_row_mat(DT_* __restrict__ mat, const IT_* __restrict__ const row_ptr, int block_width,
61 const IT_ * __restrict__ const sv_indices, const Index ue)
62 {
63 int idx = threadIdx.x + blockDim.x * blockIdx.x;
64 // grid strided for loop
65 for(int i = idx; idx < ue; idx += blockDim.x * gridDim.x)
66 {
67 const IT_ ix(sv_indices[i]);
68
69 // replace by unit row
70 for(IT_ j(row_ptr[ix]); j < row_ptr[ix + 1]; ++j)
71 {
72 for(int l(0); l < block_width; ++l)
73 mat[j*block_width +l] = DT_(0);
74 }
75 }
76 }
77
78 template<typename DT_, typename IT_>
79 __global__ void cuda_unit_filter_weak_matrix_rows(DT_ * __restrict__ mat_a, const DT_ * __restrict__ const mat_m, const IT_* __restrict__ const row_ptr,
80 const DT_ * __restrict__ const sv_elements, const IT_ * __restrict__ const sv_indices, const Index ue)
81 {
82 int idx = threadIdx.x + blockDim.x * blockIdx.x;
83 // grid strided for loop
84 for(int i = idx; idx < ue; idx += blockDim.x * gridDim.x)
85 {
86 const IT_ ix(sv_indices[i]);
87 const DT_ vx(sv_elements[i]);
88
89 // replace by unit row
90 for(IT_ j(row_ptr[ix]); j < row_ptr[ix + 1]; ++j)
91 {
92 mat_a[j] = vx * mat_m[j];
93 }
94 }
95 }
96 }
97 }
98}
99
100
101using namespace FEAT;
102using namespace FEAT::LAFEM;
103using namespace FEAT::LAFEM::Arch;
104
105template <typename DT_, typename IT_>
106void UnitFilter::filter_rhs_cuda(DT_ * v, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue)
107{
108 Index blocksize = Util::cuda_blocksize_misc;
109 dim3 grid;
110 dim3 block;
111 block.x = (unsigned)blocksize;
112 grid.x = (unsigned)ceil((ue)/(double)(block.x));
113
114 FEAT::LAFEM::Intern::cuda_unit_filter_rhs<<<grid, block>>>(v, sv_elements, sv_indices, ue);
115
116 cudaDeviceSynchronize();
117#ifdef FEAT_DEBUG_MODE
118 cudaError_t last_error(cudaGetLastError());
119 if (cudaSuccess != last_error)
120 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
121#endif
122}
123
124template void UnitFilter::filter_rhs_cuda(float *, const float * const, const std::uint64_t * const, const Index);
125template void UnitFilter::filter_rhs_cuda(double *, const double * const, const std::uint64_t * const, const Index);
126template void UnitFilter::filter_rhs_cuda(float *, const float * const, const std::uint32_t * const, const Index);
127template void UnitFilter::filter_rhs_cuda(double *, const double * const, const std::uint32_t * const, const Index);
128
129template <typename DT_, typename IT_>
130void UnitFilter::filter_def_cuda(DT_ * v, const IT_ * const sv_indices, const Index ue)
131{
132 Index blocksize = Util::cuda_blocksize_misc;
133 dim3 grid;
134 dim3 block;
135 block.x = (unsigned)blocksize;
136 grid.x = (unsigned)ceil((ue)/(double)(block.x));
137
138 FEAT::LAFEM::Intern::cuda_unit_filter_def<<<grid, block>>>(v, sv_indices, ue);
139
140 cudaDeviceSynchronize();
141#ifdef FEAT_DEBUG_MODE
142 cudaError_t last_error(cudaGetLastError());
143 if (cudaSuccess != last_error)
144 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
145#endif
146}
147
148template void UnitFilter::filter_def_cuda(float *, const std::uint64_t * const, const Index);
149template void UnitFilter::filter_def_cuda(double *, const std::uint64_t * const, const Index);
150template void UnitFilter::filter_def_cuda(float *, const std::uint32_t * const, const Index);
151template void UnitFilter::filter_def_cuda(double *, const std::uint32_t * const, const Index);
152
153template<typename DT_, typename IT_>
154void UnitFilter::filter_unit_mat_cuda(DT_* mat, const IT_* const row_ptr, const IT_* const col_idx, const IT_ * const sv_indices, const Index ue)
155{
156 Index blocksize = Util::cuda_blocksize_misc;
157 dim3 grid;
158 dim3 block;
159 block.x = (unsigned)blocksize;
160 grid.x = (unsigned)ceil((ue)/(double)(block.x));
161
162 FEAT::LAFEM::Intern::cuda_unit_filter_mat<DT_, IT_><<<grid, block >>>(mat, row_ptr, col_idx, sv_indices, ue);
163
164 cudaDeviceSynchronize();
165#ifdef FEAT_DEBUG_MODE
166 cudaError_t last_error(cudaGetLastError());
167 if (cudaSuccess != last_error)
168 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
169#endif
170}
171
172template void UnitFilter::filter_unit_mat_cuda<float, std::uint64_t>(float *, const std::uint64_t * const, const std::uint64_t * const, const std::uint64_t * const, const Index);
173template void UnitFilter::filter_unit_mat_cuda<float, std::uint32_t>(float *, const std::uint32_t * const, const std::uint32_t * const, const std::uint32_t * const, const Index);
174template void UnitFilter::filter_unit_mat_cuda<double, std::uint64_t>(double *, const std::uint64_t * const, const std::uint64_t * const, const std::uint64_t * const, const Index);
175template void UnitFilter::filter_unit_mat_cuda<double, std::uint32_t>(double *, const std::uint32_t * const, const std::uint32_t * const, const std::uint32_t * const, const Index);
176
177template<typename DT_, typename IT_>
178void UnitFilter::filter_offdiag_row_mat_cuda(DT_* mat, const IT_* const row_ptr, int block_width, const IT_ * const sv_indices, const Index ue)
179{
180 Index blocksize = Util::cuda_blocksize_misc;
181 dim3 grid;
182 dim3 block;
183 block.x = (unsigned)blocksize;
184 grid.x = (unsigned)ceil((ue)/(double)(block.x));
185
186 FEAT::LAFEM::Intern::cuda_unit_filter_offdiag_row_mat<DT_, IT_><<<grid, block >>>(mat, row_ptr, block_width, sv_indices, ue);
187
188 cudaDeviceSynchronize();
189#ifdef FEAT_DEBUG_MODE
190 cudaError_t last_error(cudaGetLastError());
191 if (cudaSuccess != last_error)
192 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
193#endif
194}
195
196template void UnitFilter::filter_offdiag_row_mat_cuda<float, std::uint64_t>(float *, const std::uint64_t * const, int, const std::uint64_t * const, const Index);
197template void UnitFilter::filter_offdiag_row_mat_cuda<float, std::uint32_t>(float *, const std::uint32_t * const, int, const std::uint32_t * const, const Index);
198template void UnitFilter::filter_offdiag_row_mat_cuda<double, std::uint64_t>(double *, const std::uint64_t * const, int, const std::uint64_t * const, const Index);
199template void UnitFilter::filter_offdiag_row_mat_cuda<double, std::uint32_t>(double *, const std::uint32_t * const, int, const std::uint32_t * const, const Index);
200
201template<typename DT_, typename IT_>
202void UnitFilter::filter_weak_matrix_rows_cuda(DT_* mat_a, const DT_ * const mat_m, const IT_* const row_ptr,
203 const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue)
204{
205 Index blocksize = Util::cuda_blocksize_misc;
206 dim3 grid;
207 dim3 block;
208 block.x = (unsigned)blocksize;
209 grid.x = (unsigned)ceil((ue)/(double)(block.x));
210
211 FEAT::LAFEM::Intern::cuda_unit_filter_weak_matrix_rows<DT_, IT_><<<grid, block >>>(mat_a, mat_m, row_ptr, sv_elements, sv_indices, ue);
212
213 cudaDeviceSynchronize();
214#ifdef FEAT_DEBUG_MODE
215 cudaError_t last_error(cudaGetLastError());
216 if (cudaSuccess != last_error)
217 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
218#endif
219}
220
221template void UnitFilter::filter_weak_matrix_rows_cuda<float, std::uint64_t>(float *, const float* const, const std::uint64_t * const, const float * const, const std::uint64_t * const, const Index);
222template void UnitFilter::filter_weak_matrix_rows_cuda<float, std::uint32_t>(float *, const float* const, const std::uint32_t * const, const float * const, const std::uint32_t * const, const Index);
223template void UnitFilter::filter_weak_matrix_rows_cuda<double, std::uint64_t>(double *, const double* const, const std::uint64_t * const, const double * const, const std::uint64_t * const, const Index);
224template void UnitFilter::filter_weak_matrix_rows_cuda<double, std::uint32_t>(double *, const double* const, const std::uint32_t * const, const double * const, const std::uint32_t * const, const Index);
225/// \endcond