9#include <kernel/shape.hpp> 
   10#include <kernel/geometry/conformal_mesh.hpp> 
   11#include <kernel/trafo/standard/mapping.hpp> 
   12#include <kernel/space/lagrange2/element.hpp> 
   19#include <cooperative_groups.h> 
   20#include <cooperative_groups/memcpy_async.h> 
   21#include <cooperative_groups/reduce.h> 
   23namespace cg = cooperative_groups;
 
   39  namespace VoxelAssembly
 
   57    template<
typename DT_, 
typename IT_>
 
   77    template<
typename DT_>
 
   94    template<
typename DT_, 
typename IT_>
 
  109    template<
typename DT_>
 
  122    template<
typename SpaceHelp_>
 
  125      typedef SpaceHelp_ SpaceHelp;
 
  126      typedef typename SpaceHelp::SpaceType SpaceType;
 
  127      typedef typename SpaceHelp::DataType DataType;
 
  128      typedef typename SpaceHelp::IndexType IndexType;
 
  129      static constexpr int dim = SpaceHelp::dim;
 
  130      static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
  131      static constexpr int num_loc_verts = SpaceHelp::num_verts;
 
  132      DataType local_coeffs[dim*num_loc_verts];
 
  133      DataType local_conv_dofs[dim*num_loc_dofs];
 
  134      IndexType local_dofs[num_loc_dofs];
 
  137      bool need_streamline;
 
  138      bool need_convection;
 
  141    template<
typename SpaceHelp_>
 
  144      typedef SpaceHelp_ SpaceHelp;
 
  145      typedef typename SpaceHelp::SpaceType SpaceType;
 
  146      typedef typename SpaceHelp::DataType DataType;
 
  147      typedef typename SpaceHelp::IndexType IndexType;
 
  148      static constexpr int dim = SpaceHelp::dim;
 
  149      static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
  150      static constexpr int num_loc_verts = SpaceHelp::num_verts;
 
  151      DataType local_coeffs[dim*num_loc_verts];
 
  152      DataType local_conv_dofs[dim*num_loc_dofs];
 
  153      DataType local_prim_dofs[dim*num_loc_dofs];
 
  154      IndexType local_dofs[num_loc_dofs];
 
  156      bool need_convection;
 
  160    template<
typename DT_, 
int dim_>
 
  164      for(
int k = 0; k < dim_; ++k)
 
  166        lv += alpha * 
grad[k] * conv[k];
 
  171    template<
typename DT_, 
typename TG_>
 
  172    CUDA_DEVICE 
inline void coalesced_format(
const TG_& thread_group, DT_* target, 
int size)
 
  174      for(
int i = thread_group.thread_rank(); i < size; i += thread_group.num_threads())
 
  180    template<
typename DT_, 
typename TG_, 
int dim_>
 
  181    CUDA_DEVICE 
inline void grouped_special_dot(
const TG_& tg, DT_& loc_val, 
const DT_ phi, 
const DT_* conv, DT_ alpha = DT_(1))
 
  183      auto coal_g = cg::coalesced_threads();
 
  185      for(
int k = 0; k < dim_; ++k)
 
  187        lv += alpha * phi * conv[k];
 
  189      lv = cg::reduce(coal_g, lv, cg::plus<DT_>());
 
  190      cg::invoke_one(coal_g, [&](){atomicAdd(&loc_val, lv);});
 
  193    template<
typename DT_, 
typename TG_, 
int dim_>
 
  194    CUDA_DEVICE 
inline void grouped_dot(
const TG_& tg, DT_& loc_val, 
const Tiny::Vector<DT_, dim_>& 
grad, 
const DT_* conv, DT_ alpha = DT_(1))
 
  196      auto coal_g = cg::coalesced_threads();
 
  198      for(
int k = 0; k < dim_; ++k)
 
  200        lv += alpha * 
grad[k] * conv[k];
 
  202      lv = cg::reduce(coal_g, lv, cg::plus<DT_>());
 
  203      cg::invoke_one(coal_g, [&](){atomicAdd(&loc_val, lv);});
 
  206    template<
typename DT_, 
typename TG_, 
int dim_>
 
  207    CUDA_DEVICE 
inline void grouped_axpy(
const TG_& tg, DT_* loc_v, 
const DT_ phi, 
const DT_* conv, DT_ alpha = DT_(1))
 
  209      auto coal_g = cg::coalesced_threads();
 
  210      for(
int k = 0; k < dim_; ++k)
 
  214        lv = cg::reduce(coal_g, alpha * phi * conv[k], cg::plus<DT_>());
 
  215        cg::invoke_one(coal_g, [&](){atomicAdd(loc_v+k, lv);});
 
  221    template<
typename DT_, 
typename TG_, 
int dim_>
 
  222    CUDA_DEVICE 
inline void grouped_add_outer_product(
const TG_& tg, DT_* loc_grad_v, 
const DT_* conv, 
const Tiny::Vector<DT_, dim_>& 
grad, DT_ alpha = DT_(1))
 
  224      auto coal_g = cg::coalesced_threads();
 
  225      for(
int i(0); i < dim_; ++i)
 
  227        for(
int j(0); j < dim_; ++j)
 
  229          DT_ lv = cg::reduce(coal_g, alpha * conv[i] * 
grad[j], cg::plus<DT_>());
 
  230          cg::invoke_one(coal_g, [&](){atomicAdd(&loc_grad_v[i*dim_ + j], lv);});
 
  235    template<
typename DT_>
 
  236    CUDA_DEVICE DT_* get_aligned_address(
void* ptr)
 
  243      constexpr std::uint64_t align = 
alignof(DT_);
 
  244      std::uint64_t ptr_a = 
reinterpret_cast<std::uint64_t
>(ptr);
 
  245      return (DT_*)((ptr_a + align - 1u)&(-align));
 
  255    template<
typename DT_>
 
  258      DT_ frechet_material;
 
  266      bool need_frechet_material;
 
  280      template<
typename DT_, MaterialType mat_type>
 
  283      template<
typename DT_, MaterialType mat_type>
 
  286      template<
typename DT_>
 
  289        DT_ mu_0, exp, lambda;
 
  291        CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot, DT_ a_T)
 const 
  294          return a_T * mu_0 * CudaMath::cuda_pow(DT_(1) + lambda * a_T * gamma_dot, - exp);
 
  296          return a_T * mu_0 * 
Math::pow(DT_(1) + lambda * a_T * gamma_dot, - exp);
 
  301      template<
typename DT_>
 
  304        DT_ mu_0, exp, lambda, a_T;
 
  306        CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot)
 const 
  309          return a_T * mu_0 * CudaMath::cuda_pow(DT_(1) + lambda * a_T * gamma_dot, - exp);
 
  311          return a_T * mu_0 * 
Math::pow(DT_(1) + lambda * a_T * gamma_dot, - exp);
 
  316      template<
typename DT_>
 
  319        DT_ mu_0, exp, lambda, yasuda_a, mu_inf;
 
  321        CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot, DT_ a_T)
 const 
  324          return a_T * mu_inf + a_T * (mu_0 - mu_inf) *
 
  325                  CudaMath::cuda_pow(DT_(1) + CudaMath::cuda_pow(lambda * a_T * gamma_dot, yasuda_a), (exp - 1) / yasuda_a);
 
  327          return a_T * mu_inf + a_T * (mu_0 - mu_inf) *
 
  328                  Math::pow(DT_(1) + 
Math::pow(lambda * a_T * gamma_dot, yasuda_a), (exp - 1) / yasuda_a);
 
  333      template<
typename DT_>
 
  336        DT_ mu_0, exp, lambda, a_T, yasuda_a, mu_inf;
 
  338        CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot)
 const 
  341          return a_T * mu_inf + a_T * (mu_0 - mu_inf) *
 
  342                  CudaMath::cuda_pow(DT_(1) + CudaMath::cuda_pow(lambda * a_T * gamma_dot, yasuda_a), (exp - 1) / yasuda_a);
 
  344          return a_T * mu_inf + a_T * (mu_0 - mu_inf) *
 
  345                  Math::pow(DT_(1) + 
Math::pow(lambda * a_T * gamma_dot, yasuda_a), (exp - 1) / yasuda_a);
 
  350      template<
typename DT_, MaterialType mat_type>
 
  353      template<
typename DT_, MaterialType mat_type>
 
  356      template<
typename DT_>
 
  359        DT_ mu_0, exp, lambda;
 
  361        CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot, DT_ a_T)
 const 
  364          return - a_T * a_T * mu_0 * lambda * exp *
 
  365                CudaMath::cuda_pow(DT_(1) + lambda * a_T * gamma_dot, - exp - DT_(1));
 
  367          return - a_T * a_T * mu_0 * lambda * exp *
 
  368                Math::pow(DT_(1) + lambda * a_T * gamma_dot, - exp - DT_(1));
 
  373      template<
typename DT_>
 
  376        DT_ mu_0, exp, lambda, a_T;
 
  378        CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot)
 const 
  381          return - a_T * a_T * mu_0 * lambda * exp *
 
  382                CudaMath::cuda_pow(DT_(1) + lambda * a_T * gamma_dot, - exp - DT_(1));
 
  384          return - a_T * a_T * mu_0 * lambda * exp *
 
  385                Math::pow(DT_(1) + lambda * a_T * gamma_dot, - exp - DT_(1));
 
  390      template<
typename DT_>
 
  393        DT_ mu_0, exp, lambda, yasuda_a, mu_inf;
 
  395        CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot, DT_ a_T)
 const 
  398        return a_T * (mu_0 - mu_inf) * ((exp-DT_(1.0)) / yasuda_a) *
 
  399                CudaMath::cuda_pow( DT_(1.0) + CudaMath::cuda_pow(lambda * gamma_dot, yasuda_a), ((exp - DT_(1.0) - yasuda_a) / yasuda_a)) *
 
  400                yasuda_a * lambda * a_T * CudaMath::cuda_pow(lambda * gamma_dot * a_T, yasuda_a-DT_(1.0));
 
  402        return a_T * (mu_0 - mu_inf) * ((exp-DT_(1.0)) / yasuda_a) *
 
  403                Math::pow( DT_(1.0) + 
Math::pow(lambda * gamma_dot, yasuda_a), ((exp - DT_(1.0) - yasuda_a) / yasuda_a)) *
 
  404                yasuda_a * lambda * a_T * 
Math::pow(lambda * gamma_dot * a_T, yasuda_a-DT_(1.0));
 
  409      template<
typename DT_>
 
  412        DT_ mu_0, exp, lambda, a_T, yasuda_a, mu_inf;
 
  414        CUDA_HOST_DEVICE DT_ operator()(DT_ gamma_dot)
 const 
  417        return a_T * (mu_0 - mu_inf) * ((exp-DT_(1.0)) / yasuda_a) *
 
  418                CudaMath::cuda_pow( DT_(1.0) + CudaMath::cuda_pow(lambda * gamma_dot, yasuda_a), ((exp - DT_(1.0) - yasuda_a) / yasuda_a)) *
 
  419                yasuda_a * lambda * a_T * CudaMath::cuda_pow(lambda * gamma_dot * a_T, yasuda_a-DT_(1.0));
 
  421        return a_T * (mu_0 - mu_inf) * ((exp-DT_(1.0)) / yasuda_a) *
 
  422                Math::pow( DT_(1.0) + 
Math::pow(lambda * gamma_dot, yasuda_a), ((exp - DT_(1.0) - yasuda_a) / yasuda_a)) *
 
  423                yasuda_a * lambda * a_T * 
Math::pow(lambda * gamma_dot * a_T, yasuda_a-DT_(1.0));
 
Standard Lagrange-2 Finite-Element space class template.
Tiny Vector class template.
T_ pow(T_ x, T_ y)
Returns x raised to the power of y.
Space::Lagrange2::Element< Trafo::Standard::Mapping< Geometry::ConformalMesh< Shape::Hypercube< 3 > > > > Q2StandardHexa
Q2 hexadron space.
Space::Lagrange2::Element< Trafo::Standard::Mapping< Geometry::ConformalMesh< Shape::Hypercube< 2 > > > > Q2StandardQuad
Q2 quadliteral space.
MaterialType
Enum for different material types.
@ grad
specifies whether the space should supply basis function gradients
std::uint64_t Index
Index data type.
Data for burgers assembler.
A data field for a cubature rule.
const DT_ * cub_wg
The cubature weights.
int num_cubs
Number of cubtaure points.
const void * cub_pt
The cubature point data array.
A data field for all necessary values that define the dof mapping for assembly.
Index cell_num
The number of cells.
const IT_ * cell_to_dof
The cell to dof, where cell_to_dof[i],..., cell_to_dof[i+cell_dofs-1] are the dofs of one cell.
Index node_size
The number of nodes.
const IT_ * cell_to_dof_sorter
Array of sortingindices of cell_to_dof.
const void * nodes
An array of the nodes fitting to the cell_to_dof mapping.
Data for material burgers assembler.