9#include <kernel/shape.hpp>
10#include <kernel/geometry/conformal_mesh.hpp>
11#include <kernel/trafo/standard/mapping.hpp>
12#include <kernel/space/lagrange2/element.hpp>
19#include <cooperative_groups.h>
20#include <cooperative_groups/memcpy_async.h>
21#include <cooperative_groups/reduce.h>
23namespace cg = cooperative_groups;
39 namespace VoxelAssembly
57 template<
typename DT_,
typename IT_>
77 template<
typename DT_>
94 template<
typename DT_,
typename IT_>
109 template<
typename DT_>
122 template<
typename SpaceHelp_>
125 typedef SpaceHelp_ SpaceHelp;
126 typedef typename SpaceHelp::SpaceType SpaceType;
127 typedef typename SpaceHelp::DataType DataType;
128 typedef typename SpaceHelp::IndexType IndexType;
129 static constexpr int dim = SpaceHelp::dim;
130 static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
131 static constexpr int num_loc_verts = SpaceHelp::num_verts;
132 DataType local_coeffs[dim*num_loc_verts];
133 DataType local_conv_dofs[dim*num_loc_dofs];
134 IndexType local_dofs[num_loc_dofs];
137 bool need_streamline;
138 bool need_convection;
141 template<
typename SpaceHelp_>
144 typedef SpaceHelp_ SpaceHelp;
145 typedef typename SpaceHelp::SpaceType SpaceType;
146 typedef typename SpaceHelp::DataType DataType;
147 typedef typename SpaceHelp::IndexType IndexType;
148 static constexpr int dim = SpaceHelp::dim;
149 static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
150 static constexpr int num_loc_verts = SpaceHelp::num_verts;
151 DataType local_coeffs[dim*num_loc_verts];
152 DataType local_conv_dofs[dim*num_loc_dofs];
153 DataType local_prim_dofs[dim*num_loc_dofs];
154 IndexType local_dofs[num_loc_dofs];
156 bool need_convection;
160 template<
typename DT_,
int dim_>
164 for(
int k = 0; k < dim_; ++k)
166 lv += alpha *
grad[k] * conv[k];
171 template<
typename DT_,
typename TG_>
172 CUDA_DEVICE
inline void coalesced_format(
const TG_& thread_group, DT_* target,
int size)
174 for(
int i = thread_group.thread_rank(); i < size; i += thread_group.num_threads())
180 template<
typename DT_,
typename TG_,
int dim_>
181 CUDA_DEVICE
inline void grouped_special_dot(
const TG_& tg, DT_& loc_val,
const DT_ phi,
const DT_* conv, DT_ alpha = DT_(1))
183 auto coal_g = cg::coalesced_threads();
185 for(
int k = 0; k < dim_; ++k)
187 lv += alpha * phi * conv[k];
189 lv = cg::reduce(coal_g, lv, cg::plus<DT_>());
190 cg::invoke_one(coal_g, [&](){atomicAdd(&loc_val, lv);});
193 template<
typename DT_,
typename TG_,
int dim_>
194 CUDA_DEVICE
inline void grouped_dot(
const TG_& tg, DT_& loc_val,
const Tiny::Vector<DT_, dim_>&
grad,
const DT_* conv, DT_ alpha = DT_(1))
196 auto coal_g = cg::coalesced_threads();
198 for(
int k = 0; k < dim_; ++k)
200 lv += alpha *
grad[k] * conv[k];
202 lv = cg::reduce(coal_g, lv, cg::plus<DT_>());
203 cg::invoke_one(coal_g, [&](){atomicAdd(&loc_val, lv);});
206 template<
typename DT_,
typename TG_,
int dim_>
207 CUDA_DEVICE
inline void grouped_axpy(
const TG_& tg, DT_* loc_v,
const DT_ phi,
const DT_* conv, DT_ alpha = DT_(1))
209 auto coal_g = cg::coalesced_threads();
210 for(
int k = 0; k < dim_; ++k)
214 lv = cg::reduce(coal_g, alpha * phi * conv[k], cg::plus<DT_>());
215 cg::invoke_one(coal_g, [&](){atomicAdd(loc_v+k, lv);});
221 template<
typename DT_,
typename TG_,
int dim_>
222 CUDA_DEVICE
inline void grouped_add_outer_product(
const TG_& tg, DT_* loc_grad_v,
const DT_* conv,
const Tiny::Vector<DT_, dim_>&
grad, DT_ alpha = DT_(1))
224 auto coal_g = cg::coalesced_threads();
225 for(
int i(0); i < dim_; ++i)
227 for(
int j(0); j < dim_; ++j)
229 DT_ lv = cg::reduce(coal_g, alpha * conv[i] *
grad[j], cg::plus<DT_>());
230 cg::invoke_one(coal_g, [&](){atomicAdd(&loc_grad_v[i*dim_ + j], lv);});
235 template<
typename DT_>
236 CUDA_DEVICE DT_* get_aligned_address(
void* ptr)
243 constexpr std::uint64_t align =
alignof(DT_);
244 std::uint64_t ptr_a =
reinterpret_cast<std::uint64_t
>(ptr);
245 return (DT_*)((ptr_a + align - 1u)&(-align));
255 template<
typename DT_>
258 DT_ frechet_material;
266 bool need_frechet_material;
280 template<
typename DT_, MaterialType mat_type>
283 template<
typename DT_, MaterialType mat_type>
286 template<
typename DT_>
289 DT_ mu_0, exp, lambda;
291 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot, DT_ a_T)
const
294 return a_T * mu_0 * CudaMath::cuda_pow(DT_(1) + lambda * a_T * gamma_dot, - exp);
296 return a_T * mu_0 *
Math::pow(DT_(1) + lambda * a_T * gamma_dot, - exp);
301 template<
typename DT_>
304 DT_ mu_0, exp, lambda, a_T;
306 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot)
const
309 return a_T * mu_0 * CudaMath::cuda_pow(DT_(1) + lambda * a_T * gamma_dot, - exp);
311 return a_T * mu_0 *
Math::pow(DT_(1) + lambda * a_T * gamma_dot, - exp);
316 template<
typename DT_>
319 DT_ mu_0, exp, lambda, yasuda_a, mu_inf;
321 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot, DT_ a_T)
const
324 return a_T * mu_inf + a_T * (mu_0 - mu_inf) *
325 CudaMath::cuda_pow(DT_(1) + CudaMath::cuda_pow(lambda * a_T * gamma_dot, yasuda_a), (exp - 1) / yasuda_a);
327 return a_T * mu_inf + a_T * (mu_0 - mu_inf) *
328 Math::pow(DT_(1) +
Math::pow(lambda * a_T * gamma_dot, yasuda_a), (exp - 1) / yasuda_a);
333 template<
typename DT_>
336 DT_ mu_0, exp, lambda, a_T, yasuda_a, mu_inf;
338 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot)
const
341 return a_T * mu_inf + a_T * (mu_0 - mu_inf) *
342 CudaMath::cuda_pow(DT_(1) + CudaMath::cuda_pow(lambda * a_T * gamma_dot, yasuda_a), (exp - DT_(1)) / yasuda_a);
344 return a_T * mu_inf + a_T * (mu_0 - mu_inf) *
345 Math::pow(DT_(1) +
Math::pow(lambda * a_T * gamma_dot, yasuda_a), (exp - DT_(1)) / yasuda_a);
350 template<
typename DT_, MaterialType mat_type>
353 template<
typename DT_, MaterialType mat_type>
356 template<
typename DT_>
359 DT_ mu_0, exp, lambda;
361 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot, DT_ a_T)
const
364 return - a_T * a_T * mu_0 * lambda * exp *
365 CudaMath::cuda_pow(DT_(1) + lambda * a_T * gamma_dot, - exp - DT_(1));
367 return - a_T * a_T * mu_0 * lambda * exp *
368 Math::pow(DT_(1) + lambda * a_T * gamma_dot, - exp - DT_(1));
373 template<
typename DT_>
376 DT_ mu_0, exp, lambda, a_T;
378 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot)
const
381 return - a_T * a_T * mu_0 * lambda * exp *
382 CudaMath::cuda_pow(DT_(1) + lambda * a_T * gamma_dot, - exp - DT_(1));
384 return - a_T * a_T * mu_0 * lambda * exp *
385 Math::pow(DT_(1) + lambda * a_T * gamma_dot, - exp - DT_(1));
390 template<
typename DT_>
393 DT_ mu_0, exp, lambda, yasuda_a, mu_inf;
395 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot, DT_ a_T)
const
398 return a_T * (mu_0 - mu_inf) * ((exp-DT_(1.0)) / yasuda_a) *
399 CudaMath::cuda_pow( DT_(1.0) + CudaMath::cuda_pow(lambda * gamma_dot, yasuda_a), ((exp - DT_(1.0) - yasuda_a) / yasuda_a)) *
400 yasuda_a * lambda * a_T * CudaMath::cuda_pow(lambda * gamma_dot * a_T, yasuda_a-DT_(1.0));
402 return a_T * (mu_0 - mu_inf) * ((exp-DT_(1.0)) / yasuda_a) *
403 Math::pow( DT_(1.0) +
Math::pow(lambda * gamma_dot, yasuda_a), ((exp - DT_(1.0) - yasuda_a) / yasuda_a)) *
404 yasuda_a * lambda * a_T *
Math::pow(lambda * gamma_dot * a_T, yasuda_a-DT_(1.0));
409 template<
typename DT_>
412 DT_ mu_0, exp, lambda, a_T, yasuda_a, mu_inf;
414 CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot)
const
417 return a_T * (mu_0 - mu_inf) * ((exp-DT_(1.0)) / yasuda_a) *
418 CudaMath::cuda_pow( DT_(1.0) + CudaMath::cuda_pow(lambda * gamma_dot, yasuda_a), ((exp - DT_(1.0) - yasuda_a) / yasuda_a)) *
419 yasuda_a * lambda * a_T * CudaMath::cuda_pow(lambda * gamma_dot * a_T, yasuda_a-DT_(1.0));
421 return a_T * (mu_0 - mu_inf) * ((exp-DT_(1.0)) / yasuda_a) *
422 Math::pow( DT_(1.0) +
Math::pow(lambda * gamma_dot, yasuda_a), ((exp - DT_(1.0) - yasuda_a) / yasuda_a)) *
423 yasuda_a * lambda * a_T *
Math::pow(lambda * gamma_dot * a_T, yasuda_a-DT_(1.0));
Standard Lagrange-2 Finite-Element space class template.
Tiny Vector class template.
T_ pow(T_ x, T_ y)
Returns x raised to the power of y.
Space::Lagrange2::Element< Trafo::Standard::Mapping< Geometry::ConformalMesh< Shape::Hypercube< 3 > > > > Q2StandardHexa
Q2 hexadron space.
Space::Lagrange2::Element< Trafo::Standard::Mapping< Geometry::ConformalMesh< Shape::Hypercube< 2 > > > > Q2StandardQuad
Q2 quadliteral space.
MaterialType
Enum for different material types.
@ grad
specifies whether the space should supply basis function gradients
std::uint64_t Index
Index data type.
Data for burgers assembler.
A data field for a cubature rule.
const DT_ * cub_wg
The cubature weights.
int num_cubs
Number of cubtaure points.
const void * cub_pt
The cubature point data array.
A data field for all necessary values that define the dof mapping for assembly.
Index cell_num
The number of cells.
const IT_ * cell_to_dof
The cell to dof, where cell_to_dof[i],..., cell_to_dof[i+cell_dofs-1] are the dofs of one cell.
Index node_size
The number of nodes.
const IT_ * cell_to_dof_sorter
Array of sortingindices of cell_to_dof.
const void * nodes
An array of the nodes fitting to the cell_to_dof mapping.
Data for material burgers assembler.