FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
poisson_assembler.cu
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#include <kernel/base_header.hpp>
7#include <kernel/util/cuda_util.hpp>
8#include <kernel/voxel_assembly/poisson_assembler.hpp>
9#include <kernel/lafem/matrix_gather_scatter_helper.hpp>
10#include <kernel/voxel_assembly/helper/space_helper.hpp>
11#include <kernel/util/tiny_algebra.hpp>
12
13namespace FEAT
14{
15 namespace VoxelAssembly
16 {
17 namespace Kernel
18 {
19 /**************************************************************************************************************/
20 /* CUDA Kernel */
21 /**************************************************************************************************************/
22
23 /**
24 * \brief Cuda kernel poisson csr matrix assembly
25 *
26 * \tparam Space_ The underlying spacetype.
27 * \tparam DT_ The datatype used.
28 * \tparam IT_ The indextype.
29 * \tparam pol_ The scatter and gather policy. See MatrixGatherScatterPolicy for details.
30 *
31 * \warning All ptr used here have to be unified or device pointer.
32 * \param[out] matrix_data The matrix data array initilized as a unified memory array. Is added upon, so format before!
33 * \param[in] matrix_row_ptr Row ptr of underlying csr matrix.
34 * \param[in] matrix_col_idx Column index array of underlying csr matrix.
35 * \param[in] matrix_num_rows Number of rows of csr matrix.
36 * \param[in] matrix_num_cols Number of columns of csr matrix.
37 * \param[in] cub_pt Array of cubature coordinate points.
38 * \param[in] cub_wg Array of cubature weights.
39 * \param[in] num_cubs Number of cubature points.
40 * \param[in] alpha Scaling parameter for assembly.
41 * \param[in] cell_to_dof Mapping cell to dof index.
42 * \param[in] cell_num Number of cells.
43 * \param[in] nodes Array of node points. Needed for local trafo assembly. /TODO: Version with precalced trafos.
44 * \param[in] node_size Number of nodes.
45 * \param[in] coloring_map Array of cell mapping for this color.
46 * \param[in] coloring_size Size of the coloring array.
47 * \param[in] cell_to_dof_sorter Depending on the used policy, this should either be nullptr, a local sorted array (or a col_ptr)
48 */
49 template<typename Space_, typename DT_, typename IT_, FEAT::Intern::MatrixGatherScatterPolicy pol_ = FEAT::Intern::MatrixGatherScatterPolicy::useLocalOps>
50 __global__ void poisson_assembler_matrix1_csr(DT_* __restrict__ matrix_data,
51 const IT_* __restrict__ matrix_row_ptr, const IT_* __restrict__ matrix_col_idx, Index matrix_num_rows, Index matrix_num_cols,
52 const Tiny::Vector<DT_, Space_::world_dim>* __restrict__ cub_pt,
53 const DT_* __restrict__ cub_wg, Index num_cubs, DT_ alpha,
54 const IT_* __restrict__ cell_to_dof, [[maybe_unused]] Index cell_num,
55 const Tiny::Vector<DT_, Space_::world_dim>* __restrict__ nodes, [[maybe_unused]] Index node_size,
56 const int* __restrict__ coloring_map, Index coloring_size, const IT_* __restrict__ cell_to_dof_sorter)
57 {
58 Index idx = threadIdx.x + blockDim.x * blockIdx.x;
59 if(idx >= coloring_size)
60 return;
61 // define types
62 typedef Space_ SpaceType;
63 typedef DT_ DataType;
64 typedef IT_ IndexType;
65
66 typedef VoxelAssembly::SpaceHelper<SpaceType, DT_, IT_> SpaceHelp;
67
68 constexpr int dim = SpaceHelp::dim;
69
70 // define local sizes
71 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
72 // get number of nodes per element
73 constexpr int num_loc_verts = SpaceType::MeshType::template IndexSet<dim, 0>::Type::num_indices;
74 // Our local matrix type
75 typedef Tiny::Matrix<DataType, num_loc_dofs, num_loc_dofs> LocMatType;
76
77 // define local coeffs
78 Tiny::Matrix<DataType, dim, num_loc_verts> local_coeffs;
79
80 // define local arrays
81 Tiny::Matrix<DataType, num_loc_dofs, num_loc_dofs> loc_mat;
82
83 typename SpaceHelp::EvalData basis_data;
84
85 // now do work for this cell
86 const Index cell = Index(coloring_map[idx]);
87 const IndexSetWrapper<IndexType> local_dofs_w{cell_to_dof, IndexType(num_loc_dofs)};
88 const IndexType* local_dofs = cell_to_dof + cell*num_loc_dofs;
89 const IndexType* local_dof_sorter = cell_to_dof_sorter + cell*num_loc_dofs;
90
91 // printf("On cell %i My local dofs: ", cell);
92 // for(int i = 0; i < num_loc_dofs; ++i)
93 // {
94 // printf("%i ", *(local_dofs + i));
95 // }
96 // printf("\n");
97 SpaceHelp::set_coefficients(local_coeffs, local_dofs_w, nodes, cell);
98
99 // To minimize code repetition, we call the same kernel from cuda and non cuda build...
100 VoxelAssembly::Kernel::poisson_assembly_kernel<SpaceHelp, LocMatType>(loc_mat, local_coeffs, cub_pt, cub_wg, num_cubs);
101
102 //scatter
103 LAFEM::template MatrixGatherScatterHelper<SpaceType, DataType, IndexType, pol_>::scatter_matrix_csr(loc_mat, matrix_data, local_dofs, local_dofs, IndexType(matrix_num_rows), IndexType(matrix_num_cols), matrix_row_ptr, matrix_col_idx, alpha, local_dof_sorter);
104
105
106 }
107
108 /**************************************************************************************************************/
109 /* CUDA Host OMP Kernels */
110 /**************************************************************************************************************/
111 /** \copydoc(poisson_assembler_matrix1_csr())*/
112 template<typename Space_, typename DT_, typename IT_, FEAT::Intern::MatrixGatherScatterPolicy pol_ = FEAT::Intern::MatrixGatherScatterPolicy::useLocalOps>
113 void poisson_assembler_matrix1_csr_host(DT_* matrix_data,
114 const IT_* matrix_row_ptr, const IT_* matrix_col_idx, Index matrix_num_rows, Index matrix_num_cols,
115 const Tiny::Vector<DT_, Space_::world_dim>* cub_pt,
116 const DT_* cub_wg, Index num_cubs, DT_ alpha,
117 const IT_* cell_to_dof, [[maybe_unused]] Index cell_num,
118 const Tiny::Vector<DT_, Space_::world_dim>* nodes, [[maybe_unused]] Index node_size,
119 const int* coloring_map, Index coloring_size,
120 const IT_* cell_to_dof_sorter)
121 {
122 // define types
123 typedef Space_ SpaceType;
124 typedef DT_ DataType;
125 typedef IT_ IndexType;
126
127 typedef VoxelAssembly::SpaceHelper<SpaceType, DT_, IT_> SpaceHelp;
128
129 constexpr int dim = SpaceHelp::dim;
130
131 // define local sizes
132 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
133 // get number of nodes per element
134 constexpr int num_loc_verts = SpaceType::MeshType::template IndexSet<dim, 0>::Type::num_indices;
135 // Our local matrix type
136 typedef Tiny::Matrix<DataType, num_loc_dofs, num_loc_dofs> LocMatType;
137
138
139 FEAT_PRAGMA_OMP(parallel for)
140 for(Index idx = 0; idx < coloring_size; ++idx)
141 {
142 // define local coefficients
143 Tiny::Matrix<DataType, dim, num_loc_verts> local_coeffs;
144 // and local matrix
145 Tiny::Matrix<DataType, num_loc_dofs, num_loc_dofs> loc_mat;
146 // now do work for this cell
147 int cell = coloring_map[idx];
148 // std::cout << "Starting with cell " << cell << std::endl;
149 const IndexSetWrapper<IndexType> local_dofs_w{cell_to_dof, IndexType(num_loc_dofs)};
150 const IndexType* local_dofs = cell_to_dof + IndexType(cell)*num_loc_dofs;
151 const IndexType* local_dof_sorter = cell_to_dof_sorter + IndexType(cell)*num_loc_dofs;
152
153 SpaceHelp::set_coefficients(local_coeffs, local_dofs_w, nodes, cell);
154
155 // To minimize code repetition, we call the same kernel from cuda and non cuda build...
156 VoxelAssembly::Kernel::poisson_assembly_kernel<SpaceHelp, LocMatType>(loc_mat, local_coeffs, cub_pt, cub_wg, int(num_cubs));
157
158 // scatter
159 LAFEM::template MatrixGatherScatterHelper<SpaceType, DataType, IndexType, pol_>::scatter_matrix_csr(loc_mat, matrix_data, local_dofs, local_dofs, IndexType(matrix_num_rows), IndexType(matrix_num_cols), matrix_row_ptr, matrix_col_idx, alpha, local_dof_sorter);
160 }
161
162
163 }
164 }
165
166 namespace Arch
167 {
168 template<typename Space_, typename DT_, typename IT_>
169 void assemble_poisson_csr([[maybe_unused]] const Space_& space,
170 const CSRMatrixData<DT_, IT_>& matrix_data,
171 const AssemblyCubatureData<DT_>& cubature,
172 const AssemblyMappingData<DT_, IT_>& dof_mapping,
173 const std::vector<int*>& coloring_maps,
174 const std::vector<Index>& coloring_map_sizes,
175 DT_ alpha)
176 {
177 const Index blocksize = Util::cuda_blocksize_spmv;
178 // const Index blocksize = 64;
179
180 for(Index i = 0; i < coloring_maps.size(); ++i)
181 {
182 dim3 grid;
183 dim3 block;
184 block.x = (unsigned int)blocksize;
185 grid.x = (unsigned int)ceil(double(coloring_map_sizes[i])/double(block.x));
186
187 //kernel call, since this uses the standard stream, sync before next call is enforced:
188 VoxelAssembly::Kernel::template poisson_assembler_matrix1_csr<Space_, DT_, IT_, FEAT::Intern::MatrixGatherScatterPolicy::useLocalSortHelper><<< grid, block >>>(
189 matrix_data.data, matrix_data.row_ptr, matrix_data.col_idx, matrix_data.num_rows, matrix_data.num_cols,
190 (const typename Tiny::Vector<DT_, Space_::world_dim>*) cubature.cub_pt,
191 cubature.cub_wg, cubature.num_cubs, alpha,
192 dof_mapping.cell_to_dof, dof_mapping.cell_num,
193 (const typename Tiny::Vector<DT_, Space_::world_dim>*) dof_mapping.nodes, dof_mapping.node_size,
194 (const int*) coloring_maps[i], coloring_map_sizes[i], dof_mapping.cell_to_dof_sorter
195 );
196 }
197
198 //check for cuda error in our kernel
199 Util::cuda_check_last_error();
200 }
201
202 template<typename Space_, typename DT_, typename IT_>
203 void assemble_poisson_csr_host([[maybe_unused]] const Space_& space,
204 const CSRMatrixData<DT_, IT_>& matrix_data,
205 const AssemblyCubatureData<DT_>& cubature,
206 const AssemblyMappingData<DT_, IT_>& dof_mapping,
207 const std::vector<int*>& coloring_maps,
208 const std::vector<Index>& coloring_map_sizes,
209 DT_ alpha)
210 {
211 for(Index col = 0; col < Index(coloring_maps.size()); ++col)
212 {
213 VoxelAssembly::Kernel::template poisson_assembler_matrix1_csr_host<Space_, DT_, IT_, FEAT::Intern::MatrixGatherScatterPolicy::useLocalSortHelper>(
214 matrix_data.data, matrix_data.row_ptr, matrix_data.col_idx, matrix_data.num_rows, matrix_data.num_cols,
215 (const typename Tiny::Vector<DT_, Space_::world_dim>*) cubature.cub_pt,
216 cubature.cub_wg, cubature.num_cubs, alpha, dof_mapping.cell_to_dof, dof_mapping.cell_num,
217 (const typename Tiny::Vector<DT_, Space_::world_dim>*) dof_mapping.nodes, dof_mapping.node_size,
218 (const int*) coloring_maps[col], coloring_map_sizes[col], dof_mapping.cell_to_dof_sorter
219 );
220 }
221 }
222 }
223 }
224}
225
226using namespace FEAT;
227using namespace FEAT::VoxelAssembly;
228
229/*--------------------Poisson Assembler Q2Quad-------------------------------------------------*/
230template void Arch::assemble_poisson_csr(const Q2StandardQuad&, const CSRMatrixData<double, std::uint32_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint32_t>&,
231 const std::vector<int*>&, const std::vector<Index>&, double);
232template void Arch::assemble_poisson_csr(const Q2StandardQuad&, const CSRMatrixData<float, std::uint32_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint32_t>&,
233 const std::vector<int*>&, const std::vector<Index>&, float);
234template void Arch::assemble_poisson_csr(const Q2StandardQuad&, const CSRMatrixData<double, std::uint64_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint64_t>&,
235 const std::vector<int*>&, const std::vector<Index>&, double);
236template void Arch::assemble_poisson_csr(const Q2StandardQuad&, const CSRMatrixData<float, std::uint64_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint64_t>&,
237 const std::vector<int*>&, const std::vector<Index>&, float);
238// #ifdef FEAT_HAVE_HALFMATH
239// template void Arch::assemble_poisson_csr(const Q2StandardQuad&, const CSRMatrixData<Half, std::uint32_t>&, const AssemblyCubatureData<Half>&, const AssemblyMappingData<Half, std::uint32_t>&,
240// const std::vector<int*>&, const std::vector<Index>&, Half);
241// template void Arch::assemble_poisson_csr(const Q2StandardQuad&, const CSRMatrixData<Half, std::uint64_t>&, const AssemblyCubatureData<Half>&, const AssemblyMappingData<Half, std::uint64_t>&,
242// const std::vector<int*>&, const std::vector<Index>&, Half);
243// #endif
244
245template void Arch::assemble_poisson_csr_host(const Q2StandardQuad&, const CSRMatrixData<double, std::uint32_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint32_t>&,
246 const std::vector<int*>&, const std::vector<Index>&, double);
247template void Arch::assemble_poisson_csr_host(const Q2StandardQuad&, const CSRMatrixData<float, std::uint32_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint32_t>&,
248 const std::vector<int*>&, const std::vector<Index>&, float);
249template void Arch::assemble_poisson_csr_host(const Q2StandardQuad&, const CSRMatrixData<double, std::uint64_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint64_t>&,
250 const std::vector<int*>&, const std::vector<Index>&, double);
251template void Arch::assemble_poisson_csr_host(const Q2StandardQuad&, const CSRMatrixData<float, std::uint64_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint64_t>&,
252 const std::vector<int*>&, const std::vector<Index>&, float);
253
254/*---------------------Poisson Assembler Q2Hexa------------------------------------------------------*/
255template void Arch::assemble_poisson_csr(const Q2StandardHexa&, const CSRMatrixData<double, std::uint32_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint32_t>&,
256 const std::vector<int*>&, const std::vector<Index>&, double);
257template void Arch::assemble_poisson_csr(const Q2StandardHexa&, const CSRMatrixData<float, std::uint32_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint32_t>&,
258 const std::vector<int*>&, const std::vector<Index>&, float);
259template void Arch::assemble_poisson_csr(const Q2StandardHexa&, const CSRMatrixData<double, std::uint64_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint64_t>&,
260 const std::vector<int*>&, const std::vector<Index>&, double);
261template void Arch::assemble_poisson_csr(const Q2StandardHexa&, const CSRMatrixData<float, std::uint64_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint64_t>&,
262 const std::vector<int*>&, const std::vector<Index>&, float);
263// #ifdef FEAT_HAVE_HALFMATH
264// template void Arch::assemble_poisson_csr(const Q2StandardHexa&, const CSRMatrixData<Half, std::uint32_t>&, const AssemblyCubatureData<Half>&, const AssemblyMappingData<Half, std::uint32_t>&,
265// const std::vector<int*>&, const std::vector<Index>&, Half);
266// template void Arch::assemble_poisson_csr(const Q2StandardHexa&, const CSRMatrixData<Half, std::uint64_t>&, const AssemblyCubatureData<Half>&, const AssemblyMappingData<Half, std::uint64_t>&,
267// const std::vector<int*>&, const std::vector<Index>&, Half);
268// #endif
269
270template void Arch::assemble_poisson_csr_host(const Q2StandardHexa&, const CSRMatrixData<double, std::uint32_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint32_t>&,
271 const std::vector<int*>&, const std::vector<Index>&, double);
272template void Arch::assemble_poisson_csr_host(const Q2StandardHexa&, const CSRMatrixData<float, std::uint32_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint32_t>&,
273 const std::vector<int*>&, const std::vector<Index>&, float);
274template void Arch::assemble_poisson_csr_host(const Q2StandardHexa&, const CSRMatrixData<double, std::uint64_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint64_t>&,
275 const std::vector<int*>&, const std::vector<Index>&, double);
276template void Arch::assemble_poisson_csr_host(const Q2StandardHexa&, const CSRMatrixData<float, std::uint64_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint64_t>&,
277 const std::vector<int*>&, const std::vector<Index>&, float);