FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
voxel_assembly_common.hpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#pragma once
7
9#include <kernel/shape.hpp>
10#include <kernel/geometry/conformal_mesh.hpp>
11#include <kernel/trafo/standard/mapping.hpp>
12#include <kernel/space/lagrange2/element.hpp>
13
14#ifdef FEAT_HAVE_OMP
15#include "omp.h"
16#endif
17
18#ifdef __CUDACC__
19#include <cooperative_groups.h>
20#include <cooperative_groups/memcpy_async.h>
21#include <cooperative_groups/reduce.h>
22
23namespace cg = cooperative_groups;
24#endif
25
26namespace FEAT
27{
39 namespace VoxelAssembly
40 {
45
47 template<int dim_>
49
50
57 template<typename DT_, typename IT_>
59 {
61 const IT_* cell_to_dof;
67 const void* nodes;
70 };
71
77 template<typename DT_>
79 {
81 const void* cub_pt;
83 const DT_* cub_wg;
86 };
87
94 template<typename DT_, typename IT_>
96 {
97 DT_* data;
98 const IT_* row_ptr;
99 const IT_* col_idx;
100 Index num_rows;
101 Index num_cols;
102 };
103
109 template<typename DT_>
111 {
112 DT_ nu;
113 DT_ theta;
114 DT_ beta;
115 DT_ frechet_beta;
116 DT_ sd_delta;
117 DT_ sd_nu;
118 DT_ sd_v_norm;
119 bool deformation;
120 };
121
122 template<typename SpaceHelp_>
124 {
125 typedef SpaceHelp_ SpaceHelp;
126 typedef typename SpaceHelp::SpaceType SpaceType;
127 typedef typename SpaceHelp::DataType DataType;
128 typedef typename SpaceHelp::IndexType IndexType;
129 static constexpr int dim = SpaceHelp::dim;
130 static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
131 static constexpr int num_loc_verts = SpaceHelp::num_verts;
132 DataType local_coeffs[dim*num_loc_verts];
133 DataType local_conv_dofs[dim*num_loc_dofs];
134 IndexType local_dofs[num_loc_dofs];
135 // these can be defined in shared memory
136 DataType tol_eps;
137 bool need_streamline;
138 bool need_convection;
139 };
140
141 template<typename SpaceHelp_>
143 {
144 typedef SpaceHelp_ SpaceHelp;
145 typedef typename SpaceHelp::SpaceType SpaceType;
146 typedef typename SpaceHelp::DataType DataType;
147 typedef typename SpaceHelp::IndexType IndexType;
148 static constexpr int dim = SpaceHelp::dim;
149 static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
150 static constexpr int num_loc_verts = SpaceHelp::num_verts;
151 DataType local_coeffs[dim*num_loc_verts];
152 DataType local_conv_dofs[dim*num_loc_dofs];
153 DataType local_prim_dofs[dim*num_loc_dofs];
154 IndexType local_dofs[num_loc_dofs];
155 // these can be defined in shared memory
156 bool need_convection;
157 };
158
159 #ifdef __CUDACC__
160 template<typename DT_, int dim_>
161 CUDA_DEVICE inline DT_ dot(const Tiny::Vector<DT_, dim_>& grad, const DT_* conv, DT_ alpha = DT_(1))
162 {
163 DT_ lv(DT_(0));
164 for(int k = 0; k < dim_; ++k)
165 {
166 lv += alpha * grad[k] * conv[k];
167 }
168 return lv;
169 }
170
171 template<typename DT_, typename TG_>
172 CUDA_DEVICE inline void coalesced_format(const TG_& thread_group, DT_* target, int size)
173 {
174 for(int i = thread_group.thread_rank(); i < size; i += thread_group.num_threads())
175 {
176 target[i] = DT_(0);
177 }
178 }
179
180 template<typename DT_, typename TG_, int dim_>
181 CUDA_DEVICE inline void grouped_special_dot(const TG_& tg, DT_& loc_val, const DT_ phi, const DT_* conv, DT_ alpha = DT_(1))
182 {
183 auto coal_g = cg::coalesced_threads();
184 DT_ lv(DT_(0));
185 for(int k = 0; k < dim_; ++k)
186 {
187 lv += alpha * phi * conv[k];
188 }
189 lv = cg::reduce(coal_g, lv, cg::plus<DT_>());
190 cg::invoke_one(coal_g, [&](){atomicAdd(&loc_val, lv);});
191 }
192
193 template<typename DT_, typename TG_, int dim_>
194 CUDA_DEVICE inline void grouped_dot(const TG_& tg, DT_& loc_val, const Tiny::Vector<DT_, dim_>& grad, const DT_* conv, DT_ alpha = DT_(1))
195 {
196 auto coal_g = cg::coalesced_threads();
197 DT_ lv(DT_(0));
198 for(int k = 0; k < dim_; ++k)
199 {
200 lv += alpha * grad[k] * conv[k];
201 }
202 lv = cg::reduce(coal_g, lv, cg::plus<DT_>());
203 cg::invoke_one(coal_g, [&](){atomicAdd(&loc_val, lv);});
204 }
205
206 template<typename DT_, typename TG_, int dim_>
207 CUDA_DEVICE inline void grouped_axpy(const TG_& tg, DT_* loc_v, const DT_ phi, const DT_* conv, DT_ alpha = DT_(1))
208 {
209 auto coal_g = cg::coalesced_threads();
210 for(int k = 0; k < dim_; ++k)
211 {
212 //thos could also be done a bit nicer by using async reduce versions...
213 DT_ lv(DT_(0));
214 lv = cg::reduce(coal_g, alpha * phi * conv[k], cg::plus<DT_>());
215 cg::invoke_one(coal_g, [&](){atomicAdd(loc_v+k, lv);});
216 // coal_g.snyc();
217 }
218 }
219
221 template<typename DT_, typename TG_, int dim_>
222 CUDA_DEVICE inline void grouped_add_outer_product(const TG_& tg, DT_* loc_grad_v, const DT_* conv, const Tiny::Vector<DT_, dim_>& grad, DT_ alpha = DT_(1))
223 {
224 auto coal_g = cg::coalesced_threads();
225 for(int i(0); i < dim_; ++i)
226 {
227 for(int j(0); j < dim_; ++j)
228 {
229 DT_ lv = cg::reduce(coal_g, alpha * conv[i] * grad[j], cg::plus<DT_>());
230 cg::invoke_one(coal_g, [&](){atomicAdd(&loc_grad_v[i*dim_ + j], lv);});
231 }
232 }
233 }
234
235 template<typename DT_>
236 CUDA_DEVICE DT_* get_aligned_address(void* ptr)
237 {
238 // alignment is always a power of 2, so can use the following bit magic
239 // first add the (alignment-1) of the datatype, which leads to the next aligned
240 // address modulo bits that fall into the alignment.
241 // The simply cut off everything from the address larger then our aligned address by using
242 // AND with the negative alignment, which is simply all bits flipped of (alignment-1)
243 constexpr std::uint64_t align = alignof(DT_);
244 std::uint64_t ptr_a = reinterpret_cast<std::uint64_t>(ptr);
245 return (DT_*)((ptr_a + align - 1u)&(-align));
246 }
247
248 #endif
249
255 template<typename DT_>
257 {
258 DT_ frechet_material;
259 DT_ reg_eps;
260 DT_ mu_0;
261 DT_ exp;
262 DT_ lambda;
263 DT_ a_T;
264 DT_ yasuda_a;
265 DT_ mu_inf;
266 bool need_frechet_material;
267 };
268
273 {
274 carreau = 0,
275 carreauYasuda = 1
276 };
277
278 namespace Intern
279 {
280 template<typename DT_, MaterialType mat_type>
282
283 template<typename DT_, MaterialType mat_type>
285
286 template<typename DT_>
287 struct ViscoFunctorNew<DT_, MaterialType::carreau>
288 {
289 DT_ mu_0, exp, lambda;
290
291 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot, DT_ a_T) const
292 {
293 #ifdef __CUDACC__
294 return a_T * mu_0 * CudaMath::cuda_pow(DT_(1) + lambda * a_T * gamma_dot, - exp);
295 #else
296 return a_T * mu_0 * Math::pow(DT_(1) + lambda * a_T * gamma_dot, - exp);
297 #endif
298 }
299 };
300
301 template<typename DT_>
302 struct ViscoFunctor<DT_, MaterialType::carreau>
303 {
304 DT_ mu_0, exp, lambda, a_T;
305
306 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot) const
307 {
308 #ifdef __CUDACC__
309 return a_T * mu_0 * CudaMath::cuda_pow(DT_(1) + lambda * a_T * gamma_dot, - exp);
310 #else
311 return a_T * mu_0 * Math::pow(DT_(1) + lambda * a_T * gamma_dot, - exp);
312 #endif
313 }
314 };
315
316 template<typename DT_>
317 struct ViscoFunctorNew<DT_, MaterialType::carreauYasuda>
318 {
319 DT_ mu_0, exp, lambda, yasuda_a, mu_inf;
320
321 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot, DT_ a_T) const
322 {
323 #ifdef __CUDACC__
324 return a_T * mu_inf + a_T * (mu_0 - mu_inf) *
325 CudaMath::cuda_pow(DT_(1) + CudaMath::cuda_pow(lambda * a_T * gamma_dot, yasuda_a), (exp - 1) / yasuda_a);
326 #else
327 return a_T * mu_inf + a_T * (mu_0 - mu_inf) *
328 Math::pow(DT_(1) + Math::pow(lambda * a_T * gamma_dot, yasuda_a), (exp - 1) / yasuda_a);
329 #endif
330 }
331 };
332
333 template<typename DT_>
334 struct ViscoFunctor<DT_, MaterialType::carreauYasuda>
335 {
336 DT_ mu_0, exp, lambda, a_T, yasuda_a, mu_inf;
337
338 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot) const
339 {
340 #ifdef __CUDACC__
341 return a_T * mu_inf + a_T * (mu_0 - mu_inf) *
342 CudaMath::cuda_pow(DT_(1) + CudaMath::cuda_pow(lambda * a_T * gamma_dot, yasuda_a), (exp - 1) / yasuda_a);
343 #else
344 return a_T * mu_inf + a_T * (mu_0 - mu_inf) *
345 Math::pow(DT_(1) + Math::pow(lambda * a_T * gamma_dot, yasuda_a), (exp - 1) / yasuda_a);
346 #endif
347 }
348 };
349
350 template<typename DT_, MaterialType mat_type>
352
353 template<typename DT_, MaterialType mat_type>
355
356 template<typename DT_>
357 struct ViscoDFunctorNew<DT_, MaterialType::carreau>
358 {
359 DT_ mu_0, exp, lambda;
360
361 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot, DT_ a_T) const
362 {
363 #ifdef __CUDACC__
364 return - a_T * a_T * mu_0 * lambda * exp *
365 CudaMath::cuda_pow(DT_(1) + lambda * a_T * gamma_dot, - exp - DT_(1));
366 #else
367 return - a_T * a_T * mu_0 * lambda * exp *
368 Math::pow(DT_(1) + lambda * a_T * gamma_dot, - exp - DT_(1));
369 #endif
370 }
371 };
372
373 template<typename DT_>
374 struct ViscoDFunctor<DT_, MaterialType::carreau>
375 {
376 DT_ mu_0, exp, lambda, a_T;
377
378 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot) const
379 {
380 #ifdef __CUDACC__
381 return - a_T * a_T * mu_0 * lambda * exp *
382 CudaMath::cuda_pow(DT_(1) + lambda * a_T * gamma_dot, - exp - DT_(1));
383 #else
384 return - a_T * a_T * mu_0 * lambda * exp *
385 Math::pow(DT_(1) + lambda * a_T * gamma_dot, - exp - DT_(1));
386 #endif
387 }
388 };
389
390 template<typename DT_>
391 struct ViscoDFunctorNew<DT_, MaterialType::carreauYasuda>
392 {
393 DT_ mu_0, exp, lambda, yasuda_a, mu_inf;
394
395 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot, DT_ a_T) const
396 {
397 #ifdef __CUDACC__
398 return a_T * (mu_0 - mu_inf) * ((exp-DT_(1.0)) / yasuda_a) *
399 CudaMath::cuda_pow( DT_(1.0) + CudaMath::cuda_pow(lambda * gamma_dot, yasuda_a), ((exp - DT_(1.0) - yasuda_a) / yasuda_a)) *
400 yasuda_a * lambda * a_T * CudaMath::cuda_pow(lambda * gamma_dot * a_T, yasuda_a-DT_(1.0));
401 #else
402 return a_T * (mu_0 - mu_inf) * ((exp-DT_(1.0)) / yasuda_a) *
403 Math::pow( DT_(1.0) + Math::pow(lambda * gamma_dot, yasuda_a), ((exp - DT_(1.0) - yasuda_a) / yasuda_a)) *
404 yasuda_a * lambda * a_T * Math::pow(lambda * gamma_dot * a_T, yasuda_a-DT_(1.0));
405 #endif
406 }
407 };
408
409 template<typename DT_>
410 struct ViscoDFunctor<DT_, MaterialType::carreauYasuda>
411 {
412 DT_ mu_0, exp, lambda, a_T, yasuda_a, mu_inf;
413
414 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot) const
415 {
416 #ifdef __CUDACC__
417 return a_T * (mu_0 - mu_inf) * ((exp-DT_(1.0)) / yasuda_a) *
418 CudaMath::cuda_pow( DT_(1.0) + CudaMath::cuda_pow(lambda * gamma_dot, yasuda_a), ((exp - DT_(1.0) - yasuda_a) / yasuda_a)) *
419 yasuda_a * lambda * a_T * CudaMath::cuda_pow(lambda * gamma_dot * a_T, yasuda_a-DT_(1.0));
420 #else
421 return a_T * (mu_0 - mu_inf) * ((exp-DT_(1.0)) / yasuda_a) *
422 Math::pow( DT_(1.0) + Math::pow(lambda * gamma_dot, yasuda_a), ((exp - DT_(1.0) - yasuda_a) / yasuda_a)) *
423 yasuda_a * lambda * a_T * Math::pow(lambda * gamma_dot * a_T, yasuda_a-DT_(1.0));
424 #endif
425 }
426 };
427 }
428 }
429}
FEAT Kernel base header.
Standard Lagrange-2 Finite-Element space class template.
Definition: element.hpp:39
Tiny Vector class template.
T_ pow(T_ x, T_ y)
Returns x raised to the power of y.
Definition: math.hpp:643
Space::Lagrange2::Element< Trafo::Standard::Mapping< Geometry::ConformalMesh< Shape::Hypercube< 3 > > > > Q2StandardHexa
Q2 hexadron space.
Space::Lagrange2::Element< Trafo::Standard::Mapping< Geometry::ConformalMesh< Shape::Hypercube< 2 > > > > Q2StandardQuad
Q2 quadliteral space.
MaterialType
Enum for different material types.
FEAT namespace.
Definition: adjactor.hpp:12
@ grad
specifies whether the space should supply basis function gradients
std::uint64_t Index
Index data type.
const void * cub_pt
The cubature point data array.
A data field for all necessary values that define the dof mapping for assembly.
const IT_ * cell_to_dof
The cell to dof, where cell_to_dof[i],..., cell_to_dof[i+cell_dofs-1] are the dofs of one cell.
const IT_ * cell_to_dof_sorter
Array of sortingindices of cell_to_dof.
const void * nodes
An array of the nodes fitting to the cell_to_dof mapping.
Data for material burgers assembler.