FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
defo_assembler.cu
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#include <kernel/base_header.hpp>
7#include <kernel/util/cuda_util.hpp>
8#include <kernel/voxel_assembly/defo_assembler.hpp>
9#include <kernel/lafem/matrix_gather_scatter_helper.hpp>
10#include <kernel/voxel_assembly/helper/space_helper.hpp>
11#include <kernel/util/tiny_algebra.hpp>
12
13namespace FEAT
14{
15 namespace VoxelAssembly
16 {
17 namespace Kernel
18 {
19 /**************************************************************************************************************/
20 /* CUDA Kernel */
21 /**************************************************************************************************************/
22
23 template<typename Space_, typename DT_, typename IT_, FEAT::Intern::MatrixGatherScatterPolicy pol_ = FEAT::Intern::MatrixGatherScatterPolicy::useLocalOps>
24 __global__ void defo_assembler_matrix1_bcsr(DT_* __restrict__ matrix_data,
25 const IT_* __restrict__ matrix_row_ptr, const IT_* __restrict__ matrix_col_idx, Index matrix_num_rows, Index matrix_num_cols,
26 const Tiny::Vector<DT_, Space_::world_dim>* __restrict__ cub_pt,
27 const DT_* __restrict__ cub_wg, int num_cubs, DT_ alpha,
28 const IT_* __restrict__ cell_to_dof, [[maybe_unused]] Index cell_num,
29 const Tiny::Vector<DT_, Space_::world_dim>* __restrict__ nodes, [[maybe_unused]] Index node_size,
30 const int* __restrict__ coloring_map, Index coloring_size, const IT_* __restrict__ cell_to_dof_sorter, DT_ nu)
31 {
32 Index idx = threadIdx.x + blockDim.x * blockIdx.x;
33 if(idx >= coloring_size)
34 return;
35 // define types
36 typedef Space_ SpaceType;
37 typedef DT_ DataType;
38 typedef IT_ IndexType;
39
40 typedef VoxelAssembly::SpaceHelper<SpaceType, DT_, IT_> SpaceHelp;
41
42 constexpr int dim = SpaceHelp::dim;
43
44 // define local sizes
45 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
46 // get number of nodes per element
47 constexpr int num_loc_verts = SpaceType::MeshType::template IndexSet<dim, 0>::Type::num_indices;
48 //our local datatypes
49 // typedef Tiny::Vector<DataType, dim> VecValueType;
50 typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
51 // typedef Tiny::Vector<VecValueType, num_loc_dofs> LocalVectorType;
52 typedef Tiny::Matrix<MatValueType, num_loc_dofs, num_loc_dofs> LocalMatrixType;
53
54 // define local coefficients
55 Tiny::Matrix<DataType, dim, num_loc_verts> local_coeffs;
56 //define local array
57 LocalMatrixType loc_mat;
58
59
60 //now do work for this cell
61 Index cell = Index(coloring_map[idx]);
62 const IndexSetWrapper<IndexType> local_dofs_w{cell_to_dof, IndexType(num_loc_dofs)};
63 const IndexType* local_dofs = cell_to_dof + cell*num_loc_dofs;
64 const IndexType* local_dof_sorter = cell_to_dof_sorter + cell*num_loc_dofs;
65 SpaceHelp::set_coefficients(local_coeffs, local_dofs_w, nodes, cell);
66
67 // To minimize code repetition, we call the same kernel from cuda and non cuda build...
68 VoxelAssembly::Kernel::defo_assembly_kernel<SpaceHelp, LocalMatrixType>(loc_mat, local_coeffs, cub_pt, cub_wg, num_cubs, nu);
69
70 //scatter
71 LAFEM::template MatrixGatherScatterHelper<SpaceType, DataType, IndexType, pol_>::scatter_matrix_csr(loc_mat, (MatValueType*)matrix_data, local_dofs, local_dofs, IndexType(matrix_num_rows), IndexType(matrix_num_cols), matrix_row_ptr, matrix_col_idx, alpha, local_dof_sorter);
72
73 }
74
75
76 /**************************************************************************************************************/
77 /* CUDA Host OMP Kernels */
78 /**************************************************************************************************************/
79
80 template<typename Space_, typename DT_, typename IT_, FEAT::Intern::MatrixGatherScatterPolicy pol_ = FEAT::Intern::MatrixGatherScatterPolicy::useLocalOps>
81 void defo_assembler_matrix1_csr_host(DT_* matrix_data,
82 const IT_* matrix_row_ptr, const IT_* matrix_col_idx, Index matrix_num_rows, Index matrix_num_cols,
83 const Tiny::Vector<DT_, Space_::world_dim>* cub_pt,
84 const DT_* cub_wg, int num_cubs, DT_ alpha,
85 const IT_* cell_to_dof, [[maybe_unused]] Index cell_num,
86 const Tiny::Vector<DT_, Space_::world_dim>* nodes, [[maybe_unused]] Index node_size,
87 const int* coloring_map, Index coloring_size,
88 const IT_* cell_to_dof_sorter, DT_ nu)
89 {
90 // define types
91 typedef Space_ SpaceType;
92 typedef DT_ DataType;
93 typedef IT_ IndexType;
94
95 typedef VoxelAssembly::SpaceHelper<SpaceType, DT_, IT_> SpaceHelp;
96
97 constexpr int dim = SpaceHelp::dim;
98
99 // define local sizes
100 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
101 // get number of nodes per element
102 constexpr int num_loc_verts = SpaceType::MeshType::template IndexSet<dim, 0>::Type::num_indices;
103 //our local datatypes
104 // typedef Tiny::Vector<DataType, dim> VecValueType;
105 typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
106 // typedef Tiny::Vector<VecValueType, num_loc_dofs> LocalVectorType;
107 typedef Tiny::Matrix<MatValueType, num_loc_dofs, num_loc_dofs> LocalMatrixType;
108
109
110
111 FEAT_PRAGMA_OMP(parallel for)
112 for(Index idx = 0; idx < coloring_size; ++idx)
113 {
114 // define local coefficients
115 Tiny::Matrix<DataType, dim, num_loc_verts> local_coeffs;
116 // and local matrix
117 LocalMatrixType loc_mat;
118 // now do work for this cell
119 int cell = coloring_map[idx];
120 // std::cout << "Starting with cell " << cell << std::endl;
121 const IndexSetWrapper<IndexType> local_dofs_w{cell_to_dof, IndexType(num_loc_dofs)};
122 const IndexType* local_dofs = cell_to_dof + IndexType(cell)*num_loc_dofs;
123 const IndexType* local_dof_sorter = cell_to_dof_sorter + IndexType(cell)*num_loc_dofs;
124
125 SpaceHelp::set_coefficients(local_coeffs, local_dofs_w, nodes, cell);
126
127 // To minimize code repetition, we call the same kernel from cuda and non cuda build...
128 VoxelAssembly::Kernel::defo_assembly_kernel<SpaceHelp, LocalMatrixType>(loc_mat, local_coeffs, cub_pt, cub_wg, num_cubs, nu);
129
130 // scatter
131 LAFEM::template MatrixGatherScatterHelper<SpaceType, DataType, IndexType, pol_>::scatter_matrix_csr(loc_mat, (MatValueType*)matrix_data, local_dofs, local_dofs, IndexType(matrix_num_rows), IndexType(matrix_num_cols), matrix_row_ptr, matrix_col_idx, alpha, local_dof_sorter);
132 }
133 }
134 }
135
136
137 /**************************************************************************************************************/
138 /* CUDA kernel wrapper */
139 /**************************************************************************************************************/
140
141 namespace Arch
142 {
143 template<typename Space_, typename DT_, typename IT_>
144 void assemble_defo_csr([[maybe_unused]] const Space_& space,
145 const CSRMatrixData<DT_, IT_>& matrix_data,
146 const AssemblyCubatureData<DT_>& cubature,
147 const AssemblyMappingData<DT_, IT_>& dof_mapping,
148 const std::vector<int*>& coloring_maps,
149 const std::vector<Index>& coloring_map_sizes,
150 DT_ alpha, DT_ nu
151 )
152 {
153 const Index blocksize = Util::cuda_blocksize_blocked_assembly;
154 // const Index blocksize = 64;
155
156 for(Index i = 0; i < coloring_maps.size(); ++i)
157 {
158 dim3 grid;
159 dim3 block;
160 block.x = (unsigned int)blocksize;
161 grid.x = (unsigned int)ceil(double(coloring_map_sizes[i])/double(block.x));
162
163 //kernel call, since this uses the standard stream, sync before next call is enforced:
164 VoxelAssembly::Kernel::template defo_assembler_matrix1_bcsr<Space_, DT_, IT_, FEAT::Intern::MatrixGatherScatterPolicy::useLocalSortHelper><<< grid, block >>>(
165 matrix_data.data, matrix_data.row_ptr, matrix_data.col_idx, matrix_data.num_rows, matrix_data.num_cols,
166 (const typename Tiny::Vector<DT_, Space_::world_dim>*) cubature.cub_pt,
167 cubature.cub_wg, cubature.num_cubs, alpha,
168 dof_mapping.cell_to_dof, dof_mapping.cell_num,
169 (const typename Tiny::Vector<DT_, Space_::world_dim>*) dof_mapping.nodes, dof_mapping.node_size,
170 (const int*) coloring_maps[i], coloring_map_sizes[i], dof_mapping.cell_to_dof_sorter, nu
171 );
172 }
173
174 //check for cuda error in our kernel
175 Util::cuda_check_last_error();
176 }
177
178 template<typename Space_, typename DT_, typename IT_>
179 void assemble_defo_csr_host([[maybe_unused]] const Space_& space,
180 const CSRMatrixData<DT_, IT_>& matrix_data,
181 const AssemblyCubatureData<DT_>& cubature,
182 const AssemblyMappingData<DT_, IT_>& dof_mapping,
183 const std::vector<int*>& coloring_maps,
184 const std::vector<Index>& coloring_map_sizes,
185 DT_ alpha, DT_ nu
186 )
187 {
188 for(Index col = 0; col < Index(coloring_maps.size()); ++col)
189 {
190 VoxelAssembly::Kernel::template defo_assembler_matrix1_csr_host<Space_, DT_, IT_, FEAT::Intern::MatrixGatherScatterPolicy::useLocalSortHelper>(
191 matrix_data.data, matrix_data.row_ptr, matrix_data.col_idx, matrix_data.num_rows, matrix_data.num_cols,
192 (const typename Tiny::Vector<DT_, Space_::world_dim>*) cubature.cub_pt,
193 cubature.cub_wg, cubature.num_cubs, alpha, dof_mapping.cell_to_dof, dof_mapping.cell_num,
194 (const typename Tiny::Vector<DT_, Space_::world_dim>*) dof_mapping.nodes, dof_mapping.node_size,
195 (const int*) coloring_maps[col], coloring_map_sizes[col], dof_mapping.cell_to_dof_sorter, nu
196 );
197 }
198 }
199 }
200 }
201}
202
203
204using namespace FEAT;
205using namespace FEAT::VoxelAssembly;
206
207template void Arch::assemble_defo_csr(const Q2StandardQuad&, const CSRMatrixData<double, std::uint32_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint32_t>&,
208 const std::vector<int*>&, const std::vector<Index>&, double, double);
209template void Arch::assemble_defo_csr(const Q2StandardQuad&, const CSRMatrixData<float, std::uint32_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint32_t>&,
210 const std::vector<int*>&, const std::vector<Index>&, float, float);
211template void Arch::assemble_defo_csr(const Q2StandardQuad&, const CSRMatrixData<double, std::uint64_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint64_t>&,
212 const std::vector<int*>&, const std::vector<Index>&, double, double);
213template void Arch::assemble_defo_csr(const Q2StandardQuad&, const CSRMatrixData<float, std::uint64_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint64_t>&,
214 const std::vector<int*>&, const std::vector<Index>&, float, float);
215// #ifdef FEAT_HAVE_HALFMATH
216// template void Arch::assemble_defo_csr(const Q2StandardQuad&, const CSRMatrixData<Half, std::uint32_t>&, const AssemblyCubatureData<Half>&, const AssemblyMappingData<Half, std::uint32_t>&,
217// const std::vector<int*>&, const std::vector<Index>&, Half, Half);
218// template void Arch::assemble_defo_csr(const Q2StandardQuad&, const CSRMatrixData<Half, std::uint64_t>&, const AssemblyCubatureData<Half>&, const AssemblyMappingData<Half, std::uint64_t>&,
219// const std::vector<int*>&, const std::vector<Index>&, Half, Half);
220// #endif
221
222template void Arch::assemble_defo_csr_host(const Q2StandardQuad&, const CSRMatrixData<double, std::uint32_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint32_t>&,
223 const std::vector<int*>&, const std::vector<Index>&, double, double);
224template void Arch::assemble_defo_csr_host(const Q2StandardQuad&, const CSRMatrixData<float, std::uint32_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint32_t>&,
225 const std::vector<int*>&, const std::vector<Index>&, float, float);
226template void Arch::assemble_defo_csr_host(const Q2StandardQuad&, const CSRMatrixData<double, std::uint64_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint64_t>&,
227 const std::vector<int*>&, const std::vector<Index>&, double, double);
228template void Arch::assemble_defo_csr_host(const Q2StandardQuad&, const CSRMatrixData<float, std::uint64_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint64_t>&,
229 const std::vector<int*>&, const std::vector<Index>&, float, float);
230
231
232template void Arch::assemble_defo_csr(const Q2StandardHexa&, const CSRMatrixData<double, std::uint32_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint32_t>&,
233 const std::vector<int*>&, const std::vector<Index>&, double, double);
234template void Arch::assemble_defo_csr(const Q2StandardHexa&, const CSRMatrixData<float, std::uint32_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint32_t>&,
235 const std::vector<int*>&, const std::vector<Index>&, float, float);
236template void Arch::assemble_defo_csr(const Q2StandardHexa&, const CSRMatrixData<double, std::uint64_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint64_t>&,
237 const std::vector<int*>&, const std::vector<Index>&, double, double);
238template void Arch::assemble_defo_csr(const Q2StandardHexa&, const CSRMatrixData<float, std::uint64_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint64_t>&,
239 const std::vector<int*>&, const std::vector<Index>&, float, float);
240// #ifdef FEAT_HAVE_HALFMATH
241// template void Arch::assemble_defo_csr(const Q2StandardHexa&, const CSRMatrixData<Half, std::uint32_t>&, const AssemblyCubatureData<Half>&, const AssemblyMappingData<Half, std::uint32_t>&,
242// const std::vector<int*>&, const std::vector<Index>&, Half, Half);
243// template void Arch::assemble_defo_csr(const Q2StandardHexa&, const CSRMatrixData<Half, std::uint64_t>&, const AssemblyCubatureData<Half>&, const AssemblyMappingData<Half, std::uint64_t>&,
244// const std::vector<int*>&, const std::vector<Index>&, Half, Half);
245// #endif
246
247template void Arch::assemble_defo_csr_host(const Q2StandardHexa&, const CSRMatrixData<double, std::uint32_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint32_t>&,
248 const std::vector<int*>&, const std::vector<Index>&, double, double);
249template void Arch::assemble_defo_csr_host(const Q2StandardHexa&, const CSRMatrixData<float, std::uint32_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint32_t>&,
250 const std::vector<int*>&, const std::vector<Index>&, float, float);
251template void Arch::assemble_defo_csr_host(const Q2StandardHexa&, const CSRMatrixData<double, std::uint64_t>&, const AssemblyCubatureData<double>&, const AssemblyMappingData<double, std::uint64_t>&,
252 const std::vector<int*>&, const std::vector<Index>&, double, double);
253template void Arch::assemble_defo_csr_host(const Q2StandardHexa&, const CSRMatrixData<float, std::uint64_t>&, const AssemblyCubatureData<float>&, const AssemblyMappingData<float, std::uint64_t>&,
254 const std::vector<int*>&, const std::vector<Index>&, float, float);