2#ifndef FEAT_KERNEL_VOXEL_ASSEMBLY_BURGERS_VELO_MATERIAL_HPP 
    3#define FEAT_KERNEL_VOXEL_ASSEMBLY_BURGERS_VELO_MATERIAL_HPP 1 
    6#include <kernel/backend.hpp> 
    8#include <kernel/voxel_assembly/voxel_assembly_common.hpp> 
    9#include <kernel/voxel_assembly/helper/data_handler.hpp> 
   10#include <kernel/voxel_assembly/helper/space_helper.hpp> 
   11#include <kernel/cubature/rule.hpp> 
   12#include <kernel/lafem/sparse_matrix_bcsr.hpp> 
   13#include <kernel/lafem/dense_vector_blocked.hpp> 
   14#include <kernel/trafo/standard/mapping.hpp> 
   15#include <kernel/geometry/conformal_mesh.hpp> 
   16#include <kernel/util/tiny_algebra.hpp> 
   17#include <kernel/global/vector.hpp> 
   18#include <kernel/lafem/vector_mirror.hpp> 
   19#include <kernel/voxel_assembly/burgers_assembler.hpp> 
   21#include <kernel/util/cuda_util.hpp> 
   26  namespace VoxelAssembly
 
   29    #if defined(FEAT_HAVE_CUDA) || defined(DOXYGEN) 
   31    template<
typename SpaceHelp_, 
bool need_streamdiff_ = true>
 
   34      typedef SpaceHelp_ SpaceHelp;
 
   35      static constexpr int dim = SpaceHelp::dim;
 
   36      typedef typename SpaceHelp::SpaceType SpaceType;
 
   37      typedef typename SpaceHelp::DataType DataType;
 
   39      static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
   44      typename SpaceHelp::EvalData basis_data;
 
   45      typename SpaceHelp::JacobianMatrixType loc_jac;
 
   46      typename SpaceHelp::JacobianMatrixType loc_jac_inv;
 
   50      typename SpaceHelp::DomainPointType dom_point;
 
   60    template<
typename SpaceHelp_>
 
   63      typedef SpaceHelp_ SpaceHelp;
 
   64      static constexpr int dim = SpaceHelp::dim;
 
   65      typedef typename SpaceHelp::SpaceType SpaceType;
 
   66      typedef typename SpaceHelp::DataType DataType;
 
   68      static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
   73      typename SpaceHelp::EvalData basis_data;
 
   74      typename SpaceHelp::JacobianMatrixType loc_jac;
 
   75      typename SpaceHelp::JacobianMatrixType loc_jac_inv;
 
   78      typename SpaceHelp::DomainPointType dom_point;
 
   97    template<
typename Space_, 
typename DT_, 
typename IT_>
 
  130      template<
typename SpaceHelp_, 
typename LocMatType_, 
typename LocVecType_, 
int dim_, 
int num_verts_, 
typename ViscFunc_, 
typename ViscDerFunc_, 
bool need_stream_diff_>
 
  132                                          const typename SpaceHelp_::DomainPointType* cub_pt, 
const typename SpaceHelp_::DataType* cub_wg, 
const int num_cubs,
 
  135                                          const bool need_convection, 
const typename SpaceHelp_::DataType tol_eps,
 
  136                                          ViscFunc_ visco_func, ViscDerFunc_ visco_d_func)
 
  138        typedef SpaceHelp_ SpaceHelp;
 
  139        constexpr int dim = SpaceHelp::dim;
 
  140        typedef typename SpaceHelp::SpaceType SpaceType;
 
  141        typedef typename SpaceHelp::DataType DataType;
 
  143        constexpr bool need_streamline = need_stream_diff_;
 
  147        const DataType& theta{burgers_params.theta};
 
  148        const DataType& beta{burgers_params.beta};
 
  149        const DataType& frechet_beta{burgers_params.frechet_beta};
 
  150        const DataType& sd_delta{burgers_params.sd_delta};
 
  151        const DataType& sd_nu{burgers_params.sd_nu};
 
  152        const DataType& sd_v_norm{burgers_params.sd_v_norm};
 
  157        const DataType& frechet_material{material_params.frechet_material};
 
  158        const DataType& reg_eps{material_params.reg_eps};
 
  159        const bool& need_frechet_material{material_params.need_frechet_material};
 
  177        constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
  181        typename SpaceHelp::JacobianMatrixType loc_jac, loc_jac_inv;
 
  182        typename SpaceHelp::EvalData basis_data;
 
  188        VecValueType loc_v(DataType(0));
 
  189        MatValueType loc_grad_v(DataType(0)), strain_rate_tensor_2(DataType(0));
 
  190        DataType local_delta(DataType(0));
 
  191        DataType nu_loc(DataType(0));
 
  192        DataType gamma_dot(DataType(0));
 
  197        if constexpr(need_streamline) 
 
  199          VecValueType mean_v(DataType(0));
 
  201          const VecValueType barycenter(DataType(0));
 
  204          SpaceHelp::eval_ref_values(basis_data, barycenter);
 
  205          SpaceHelp::trans_values(basis_data);
 
  206          for(
int i(0); i < num_loc_dofs; ++i)
 
  208            mean_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
 
  210          const DataType local_norm_v = mean_v.norm_euclid();
 
  212          if(local_norm_v > tol_eps)
 
  218            const DataType local_h = SpaceHelp::width_directed(mean_v, local_coeffs) * local_norm_v;
 
  219            const DataType local_re = (local_norm_v * local_h) / sd_nu;
 
  220            local_delta = sd_delta * (local_h / sd_v_norm) * (DataType(2) * local_re) / (DataType(1) + local_re);
 
  224        for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
 
  226          const typename SpaceHelp::DomainPointType& 
dom_point = cub_pt[cub_ind];
 
  227          SpaceHelp::eval_ref_values(basis_data, 
dom_point);
 
  228          SpaceHelp::trans_values(basis_data);
 
  230          SpaceHelp::calc_jac_mat(loc_jac, 
dom_point, local_coeffs);
 
  231          loc_jac_inv.set_inverse(loc_jac);
 
  233          SpaceHelp::eval_ref_gradients(basis_data, 
dom_point);
 
  234          SpaceHelp::trans_gradients(basis_data, loc_jac_inv);
 
  236          const DataType weight = loc_jac.det() * cub_wg[cub_ind];
 
  237          if(need_convection || need_streamline) 
 
  240            for(
int i = 0; i < num_loc_dofs; ++i)
 
  242              loc_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
 
  248            for(
int i = 0; i < num_loc_dofs; ++i)
 
  250              loc_grad_v.add_outer_product(local_conv_dofs[i], basis_data.phi[i].grad);
 
  254          strain_rate_tensor_2.set_transpose(loc_grad_v);
 
  255          strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
 
  257          gamma_dot = CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
 
  259          gamma_dot = 
Math::sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
 
  263          nu_loc = visco_func(gamma_dot);
 
  267          for(
int i(0); i < num_loc_dofs; ++i)
 
  270            for(
int j(0); j < num_loc_dofs; ++j)
 
  273              const DataType 
value = nu_loc * weight * 
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
 
  276              loc_mat[i][j].add_scalar_main_diag(
value);
 
  277              loc_mat[i][j].add_outer_product(basis_data.phi[j].grad, basis_data.phi[i].grad, nu_loc * weight);
 
  281          if(need_frechet_material)
 
  283            const DataType fac =  frechet_material * visco_d_func(gamma_dot)/(gamma_dot + reg_eps);
 
  286            for(
int i(0); i < num_loc_dofs; ++i)
 
  292              for(
int j(0); j < num_loc_dofs; ++j)
 
  297                loc_mat[i][j].add_outer_product(du_grad_phi, du_grad_psi, fac * weight);
 
  305            for(
int i(0); i < num_loc_dofs; ++i)
 
  308              for(
int j(0); j < num_loc_dofs; ++j)
 
  311                const DataType 
value = beta * weight * basis_data.phi[i].value * 
Tiny::dot(loc_v, basis_data.phi[j].grad);
 
  314                loc_mat[i][j].add_scalar_main_diag(
value);
 
  321          if(CudaMath::cuda_abs(frechet_beta) > DataType(0))
 
  323          if(
Math::abs(frechet_beta) > DataType(0))
 
  327            for(
int i(0); i < num_loc_dofs; ++i)
 
  330              for(
int j(0); j < num_loc_dofs; ++j)
 
  333                const DataType 
value = frechet_beta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
 
  336                loc_mat[i][j].axpy(
value, loc_grad_v);
 
  343          if(CudaMath::cuda_abs(theta) > DataType(0))
 
  349            for(
int i(0); i < num_loc_dofs; ++i)
 
  352              for(
int j(0); j < num_loc_dofs; ++j)
 
  355                const DataType 
value = theta * weight *  basis_data.phi[i].value * basis_data.phi[j].value;
 
  358                loc_mat[i][j].add_scalar_main_diag(
value);
 
  363          if constexpr(need_streamline) 
 
  366            for(
int i = 0; i < num_loc_dofs; ++i)
 
  368              streamdiff_coeffs[i] = 
Tiny::dot(loc_v, basis_data.phi[i].grad);
 
  373            if((local_delta > tol_eps))
 
  376              for(
int i(0); i < num_loc_dofs; ++i)
 
  379                for(
int j(0); j < num_loc_dofs; ++j)
 
  382                  const DataType 
value = local_delta * weight * streamdiff_coeffs[i] * streamdiff_coeffs[j];
 
  385                  loc_mat[i][j].add_scalar_main_diag(
value);
 
  394      #if defined(__CUDACC__) 
  419      template<
typename ThreadGroup_, 
typename SpaceHelp_, 
typename ViscFunc_, 
typename ViscDerFunc_, 
bool need_stream_diff_>
 
  421                                          int loc_assemble_size, 
int assemble_offset, 
typename SpaceHelp_::DataType* loc_mat, 
const typename SpaceHelp_::DataType* local_conv_dofs,
 
  423                                          const typename SpaceHelp_::DomainPointType* cub_pt, 
const typename SpaceHelp_::DataType* cub_wg, 
const int num_cubs,
 
  426                                          const bool need_convection, 
const typename SpaceHelp_::DataType tol_eps,
 
  427                                          ViscFunc_ visco_func, ViscDerFunc_ visco_d_func)
 
  429        typedef SpaceHelp_ SpaceHelp;
 
  430        constexpr int dim = SpaceHelp::dim;
 
  431        typedef typename SpaceHelp::SpaceType SpaceType;
 
  432        typedef typename SpaceHelp::DataType DataType;
 
  433        constexpr bool need_streamline = need_stream_diff_;
 
  435        const int t_idx = tg.thread_rank();
 
  436        const int g_size = tg.num_threads();
 
  439        const DataType& theta{burgers_params.theta};
 
  440        const DataType& beta{burgers_params.beta};
 
  441        const DataType& frechet_beta{burgers_params.frechet_beta};
 
  442        const DataType& sd_delta{burgers_params.sd_delta};
 
  443        const DataType& sd_nu{burgers_params.sd_nu};
 
  444        const DataType& sd_v_norm{burgers_params.sd_v_norm};
 
  446        const DataType& frechet_material{material_params.frechet_material};
 
  447        const DataType& reg_eps{material_params.reg_eps};
 
  448        const bool& need_frechet_material{material_params.need_frechet_material};
 
  466        constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
  468        typedef Tiny::Vector<DataType, dim> VecValueType;
 
  469        typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
 
  474        typedef BurgersMatSharedDataKernelWrapper<SpaceHelp, need_stream_diff_> BSDKWrapper;
 
  475        typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
 
  476        typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
 
  477        MatValueType& loc_grad_v = shared_wrapper->loc_grad_v;
 
  478        VecValueType& loc_v = shared_wrapper->loc_v;
 
  479        VecValueType* mean_v_p = 
nullptr;
 
  480        if constexpr(need_streamline) mean_v_p = &(shared_wrapper->mean_v);
 
  481        typename SpaceHelp::DomainPointType& 
dom_point = shared_wrapper->dom_point;
 
  482        Tiny::Vector<DataType, num_loc_dofs>* streamdiff_coeffs_p = 
nullptr;
 
  483        if constexpr(need_streamline) streamdiff_coeffs_p = &(shared_wrapper->streamdiff_coeffs);
 
  484        DataType& local_delta = shared_wrapper->local_delta;
 
  485        DataType& nu_loc = shared_wrapper->nu_loc;
 
  486        DataType& gamma_dot = shared_wrapper->gamma_dot;
 
  487        DataType& det = shared_wrapper->det;
 
  488        DataType& weight = shared_wrapper->weight;
 
  489        typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
 
  490        bool& need_frechet = shared_wrapper->need_frechet;
 
  493        MatValueType strain_rate_tensor_2(DataType(0));
 
  495        VoxelAssembly::coalesced_format(tg, (
unsigned int*) shared_wrapper, 
sizeof(BSDKWrapper)/(
sizeof(
unsigned int)));
 
  498        cg::invoke_one(tg, [&](){need_frechet = CudaMath::cuda_abs(frechet_beta) > DataType(0);});
 
  502        if constexpr(need_streamline) 
 
  504          cg::invoke_one(tg, [&]() {*mean_v_p = DataType(0);});
 
  508          cg::invoke_one(tg, [&](){ 
const VecValueType bc(0);
 
  509                                    SpaceHelp::eval_ref_values(basis_data, bc);
 
  510                                    SpaceHelp::trans_values(basis_data);});
 
  514          for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
 
  516            VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &(*mean_v_p)[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
 
  521          DataType local_norm_v = mean_v_p->norm_euclid();
 
  523          if(local_norm_v > tol_eps)
 
  525            cg::invoke_one(tg, [&](){
 
  526              const DataType local_h = SpaceHelp::width_directed(*mean_v_p, local_coeffs) * local_norm_v;
 
  527              const DataType local_re = (local_norm_v * local_h) / sd_nu;
 
  528              local_delta = sd_delta * (local_h / sd_v_norm) * (DataType(2) * local_re) / (DataType(1) + local_re);
 
  536        for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
 
  538          cg::invoke_one(tg, [&](){
dom_point = cub_pt[cub_ind];
 
  539                                  SpaceHelp::eval_ref_values(basis_data, 
dom_point);
 
  540                                  SpaceHelp::trans_values(basis_data);});
 
  543          SpaceHelp::grouped_calc_jac_mat(tg, loc_jac, 
dom_point, &local_coeffs[0][0]);
 
  545          cg::invoke_one(tg, [&](){det = loc_jac.det();
 
  546                                  weight = det * cub_wg[cub_ind];});
 
  548          loc_jac_inv.grouped_set_inverse(tg, loc_jac, det);
 
  550          cg::invoke_one(tg, [&](){
 
  551                                  SpaceHelp::eval_ref_gradients(basis_data, 
dom_point);
 
  552                                  SpaceHelp::trans_gradients(basis_data, loc_jac_inv);});
 
  555          if(need_convection || need_streamline) 
 
  557            VoxelAssembly::coalesced_format(tg, &loc_v[0], dim);
 
  560            for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
 
  562              VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &loc_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
 
  568            VoxelAssembly::coalesced_format(tg, &loc_grad_v[0][0], dim*dim);
 
  570            for(
int i = t_idx; i < num_loc_dofs; i += g_size)
 
  572              VoxelAssembly::grouped_add_outer_product(tg, &loc_grad_v[0][0], &local_conv_dofs[i*dim], basis_data.phi[i].grad);
 
  579              strain_rate_tensor_2.set_transpose(loc_grad_v);
 
  580              strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
 
  581            cg::invoke_one(tg, [&](){
 
  584              gamma_dot = CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
 
  585              nu_loc = visco_func(gamma_dot);
 
  590          if constexpr(need_streamline) 
 
  593            for(
int i = t_idx; i < num_loc_dofs; i += g_size)
 
  595              (*streamdiff_coeffs_p)[i] = 
Tiny::dot(loc_v, basis_data.phi[i].grad);
 
  602          const DataType fac =  frechet_material * visco_d_func(gamma_dot)/(gamma_dot + reg_eps);
 
  604          for(
int idx = t_idx; idx < loc_assemble_size; idx += g_size)
 
  607            const int i = (idx+assemble_offset) / num_loc_dofs;
 
  608            const int j = (idx+assemble_offset) % num_loc_dofs;
 
  612              const DataType 
value = nu_loc * weight * 
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
 
  614              ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(
value);
 
  615              ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_outer_product(basis_data.phi[j].grad, basis_data.phi[i].grad, nu_loc * weight);
 
  618            if(need_frechet_material)
 
  620              Tiny::Vector<DataType, dim> du_grad_phi;
 
  621              du_grad_phi.set_mat_vec_mult(strain_rate_tensor_2, basis_data.phi[i].grad);
 
  622              Tiny::Vector<DataType, dim> du_grad_psi;
 
  623              du_grad_psi.set_mat_vec_mult(strain_rate_tensor_2, basis_data.phi[j].grad);
 
  625              ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_outer_product(du_grad_phi, du_grad_psi, fac*weight);
 
  632              const DataType 
value = beta * weight * basis_data.phi[i].value * VoxelAssembly::dot(basis_data.phi[j].grad, &loc_v[0]);
 
  635              ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(
value);
 
  642              const DataType 
value = frechet_beta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
 
  645              ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->
axpy(
value, loc_grad_v);
 
  650            if(CudaMath::cuda_abs(theta) > DataType(0))
 
  656              const DataType 
value = theta * weight *  basis_data.phi[i].value * basis_data.phi[j].value;
 
  659              ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(
value);
 
  663            if constexpr(need_streamline)
 
  665              if(local_delta > tol_eps)
 
  668                const DataType 
value = local_delta * weight * (*streamdiff_coeffs_p)[i] * (*streamdiff_coeffs_p)[j];
 
  671                ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(
value);
 
  703      template<
typename SpaceHelp_, 
typename LocVecType_, 
int dim_, 
int num_verts_, 
typename ViscFunc_>
 
  705                                          const typename SpaceHelp_::DomainPointType* cub_pt, 
const typename SpaceHelp_::DataType* cub_wg, 
const int num_cubs,
 
  707                                          const bool need_convection, ViscFunc_ visco_func)
 
  709        typedef SpaceHelp_ SpaceHelp;
 
  710        constexpr int dim = SpaceHelp::dim;
 
  711        typedef typename SpaceHelp::SpaceType SpaceType;
 
  712        typedef typename SpaceHelp::DataType DataType;
 
  715        const DataType& theta{burgers_params.theta};
 
  716        const DataType& beta{burgers_params.beta};
 
  739        constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
  743        typename SpaceHelp::JacobianMatrixType loc_jac, loc_jac_inv;
 
  744        typename SpaceHelp::EvalData basis_data;
 
  750        VecValueType loc_v(DataType(0));
 
  751        DataType nu_loc(DataType(0));
 
  757        for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
 
  759          const typename SpaceHelp::DomainPointType& 
dom_point = cub_pt[cub_ind];
 
  760          SpaceHelp::eval_ref_values(basis_data, 
dom_point);
 
  761          SpaceHelp::trans_values(basis_data);
 
  763          SpaceHelp::calc_jac_mat(loc_jac, 
dom_point, local_coeffs);
 
  764          loc_jac_inv.set_inverse(loc_jac);
 
  766          SpaceHelp::eval_ref_gradients(basis_data, 
dom_point);
 
  767          SpaceHelp::trans_gradients(basis_data, loc_jac_inv);
 
  769          const DataType weight = loc_jac.det() * cub_wg[cub_ind];
 
  773            for(
int i = 0; i < num_loc_dofs; ++i)
 
  775              loc_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
 
  781            MatValueType loc_grad_v(DataType(0)), strain_rate_tensor_2(DataType(0));
 
  783            for(
int i = 0; i < num_loc_dofs; ++i)
 
  785              loc_grad_v.add_outer_product(local_conv_dofs[i], basis_data.phi[i].grad);
 
  788            strain_rate_tensor_2.set_transpose(loc_grad_v);
 
  789            strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
 
  791            nu_loc = visco_func(CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius());
 
  793            nu_loc = visco_func(
Math::sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius());
 
  799          for(
int i(0); i < num_loc_dofs; ++i)
 
  802            for(
int j(0); j < num_loc_dofs; ++j)
 
  805              const DataType value1 = nu_loc * weight * 
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
 
  806              const DataType value2 = nu_loc * weight * 
Tiny::dot(local_prim_dofs[j], basis_data.phi[i].grad);
 
  808              loc_vec[i].axpy(value1, local_prim_dofs[j]);
 
  809              loc_vec[i].axpy(value2, basis_data.phi[j].grad);
 
  816            for(
int i(0); i < num_loc_dofs; ++i)
 
  819              for(
int j(0); j < num_loc_dofs; ++j)
 
  822                const DataType 
value = beta * weight * basis_data.phi[i].value * 
Tiny::dot(loc_v, basis_data.phi[j].grad);
 
  825                loc_vec[i].axpy(
value, local_prim_dofs[j]);
 
  832          if(CudaMath::cuda_abs(theta) > DataType(0))
 
  838            for(
int i(0); i < num_loc_dofs; ++i)
 
  841              for(
int j(0); j < num_loc_dofs; ++j)
 
  844                const DataType 
value = theta * weight *  basis_data.phi[i].value * basis_data.phi[j].value;
 
  847                loc_vec[i].axpy(
value, local_prim_dofs[j]);
 
  866      template<
typename ThreadGroup_, 
typename SpaceHelp_, 
typename ViscFunc_>
 
  868                                          typename SpaceHelp_::DataType* loc_vec, 
const typename SpaceHelp_::DataType* local_prim_dofs, 
const typename SpaceHelp_::DataType* local_conv_dofs,
 
  870                                          const typename SpaceHelp_::DomainPointType* cub_pt, 
const typename SpaceHelp_::DataType* cub_wg, 
const int num_cubs,
 
  872                                          const bool need_convection, ViscFunc_ visco_func)
 
  874        typedef SpaceHelp_ SpaceHelp;
 
  875        constexpr int dim = SpaceHelp::dim;
 
  876        typedef typename SpaceHelp::SpaceType SpaceType;
 
  877        typedef typename SpaceHelp::DataType DataType;
 
  878        const int t_idx = tg.thread_rank();
 
  879        const int g_size = tg.num_threads();
 
  881        const DataType& nu{burgers_params.nu};
 
  882        const DataType& theta{burgers_params.theta};
 
  883        const DataType& beta{burgers_params.beta};
 
  906        constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
  910        typedef BurgersMatSharedDataKernelWrapper<SpaceHelp, false> BSDKWrapper;
 
  911        typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
 
  912        typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
 
  914        DataType& det = shared_wrapper->det;
 
  915        DataType& weight = shared_wrapper->weight;
 
  916        DataType& nu_loc = shared_wrapper->nu_loc;
 
  917        DataType& gamma_dot = shared_wrapper->gamma_dot;
 
  918        typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
 
  920        typedef Tiny::Vector<DataType, dim> VecValueType;
 
  921        typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
 
  923        MatValueType& loc_grad_v = shared_wrapper->loc_grad_v;
 
  924        VecValueType& loc_v = shared_wrapper->loc_v;
 
  926        typename SpaceHelp::DomainPointType& 
dom_point = shared_wrapper->dom_point;
 
  928        VoxelAssembly::coalesced_format(tg, (
unsigned int*) shared_wrapper, 
sizeof(BSDKWrapper)/(
sizeof(
unsigned int)));
 
  931        cg::thread_block_tile<4> tile4 = cg::tiled_partition<4>(tg);
 
  933        for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
 
  936          cg::invoke_one(tg, [&](){
dom_point = cub_pt[cub_ind];
 
  937                                  SpaceHelp::eval_ref_values(basis_data, 
dom_point);
 
  938                                  SpaceHelp::trans_values(basis_data);});
 
  941          SpaceHelp::grouped_calc_jac_mat(tg, loc_jac, 
dom_point, &local_coeffs[0][0]);
 
  943          cg::invoke_one(tg, [&](){det = loc_jac.det();
 
  944          weight = det * cub_wg[cub_ind];});
 
  946          loc_jac_inv.grouped_set_inverse(tg, loc_jac, det);
 
  948          cg::invoke_one(tg, [&](){
 
  949                                  SpaceHelp::eval_ref_gradients(basis_data, 
dom_point);
 
  950                                  SpaceHelp::trans_gradients(basis_data, loc_jac_inv);});
 
  955            VoxelAssembly::coalesced_format(tg, &loc_v[0], dim);
 
  958            for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
 
  960              VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &loc_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
 
  966            VoxelAssembly::coalesced_format(tg, &loc_grad_v[0][0], dim*dim);
 
  968            for(
int i = t_idx; i < num_loc_dofs; i += g_size)
 
  970              VoxelAssembly::grouped_add_outer_product(tg, &loc_grad_v[0][0], &local_conv_dofs[i*dim], basis_data.phi[i].grad);
 
  973            cg::invoke_one(tg, [&](){
 
  974              MatValueType strain_rate_tensor_2(DataType(0));
 
  975              strain_rate_tensor_2.set_transpose(loc_grad_v);
 
  976              strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
 
  977              gamma_dot = CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
 
  978              nu_loc = visco_func(gamma_dot);
 
  984          for(
int idx = tile4.meta_group_rank(); idx < loc_assemble_size*dim; idx += tile4.meta_group_size())
 
  987            DataType val = DataType(0);
 
  989            const int i = (idx/dim+assemble_offset);
 
  990            const int ii = idx%dim;
 
  992              for(
int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
 
  995                val += nu_loc * weight * (
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad) * local_prim_dofs[j*dim + ii]
 
  996                      + 
Tiny::dot(((Tiny::Vector<DataType, dim>*)local_prim_dofs)[j], basis_data.phi[i].grad) * basis_data.phi[j].grad[ii]);
 
 1002              for(
int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
 
 1005                val += beta * weight * basis_data.phi[i].value * 
Tiny::dot(loc_v, basis_data.phi[j].grad) * local_prim_dofs[j*dim + ii];
 
 1011            if(CudaMath::cuda_abs(theta) > DataType(0))
 
 1016              for(
int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
 
 1019                val += theta * weight *  basis_data.phi[i].value * basis_data.phi[j].value * local_prim_dofs[j*dim + ii];
 
 1024            cuda::atomic_ref<DataType, cuda::thread_scope_block> a_ref(*(loc_vec+idx));
 
 1025            cg::reduce_update_async(tile4, a_ref, val, cg::plus<DataType>());
 
 1035      #if defined(FEAT_HAVE_CUDA) || defined(DOXYGEN) 
 1057      template<
typename Space_, 
typename DT_, 
typename IT_>
 
 1060              const DT_* conv_data,
 
 1063              const std::vector<int*>& coloring_maps,
 
 1064              const std::vector<Index>& coloring_map_sizes,
 
 1067              MaterialType mat_type, 
int shared_mem, 
int blocksize, 
int gridsize, 
bool print_occupancy);
 
 1091      template<
typename Space_, 
typename DT_, 
typename IT_>
 
 1094              const DT_* conv_data,
 
 1095              const DT_* primal_data,
 
 1098              const std::vector<int*>& coloring_maps,
 
 1099              const std::vector<Index>& coloring_map_sizes,
 
 1103              int shared_mem, 
int blocksize, 
int gridsize, 
bool print_occupancy);
 
 1128      template<
typename Space_, 
typename DT_, 
typename IT_>
 
 1131              const DT_* conv_data,
 
 1134              const std::vector<int*>& coloring_maps,
 
 1135              const std::vector<Index>& coloring_map_sizes,
 
 1162      template<
typename Space_, 
typename DT_, 
typename IT_>
 
 1165              const DT_* conv_data,
 
 1166              const DT_* primal_data,
 
 1169              const std::vector<int*>& coloring_maps,
 
 1170              const std::vector<Index>& coloring_map_sizes,
 
 1192    template<
int dim_, 
typename DT_, 
typename IT_>
 
 1202      typedef typename SpaceHelp::ShapeType ShapeType;
 
 1203      typedef typename SpaceHelp::DataType DataType;
 
 1204      typedef typename SpaceHelp::IndexType IndexType;
 
 1207      static constexpr int dim = SpaceHelp::dim;
 
 1209      typedef typename SpaceHelp::DomainPointType DomainPointType;
 
 1210      typedef typename SpaceHelp::ImagePointType ImagePointType;
 
 1211      typedef typename SpaceHelp::ValueType ValueType;
 
 1212      typedef typename SpaceHelp::JacobianMatrixType JacobianMatrixType;
 
 1248      template<
typename ColoringType_>
 
 1251      frechet_material(DataType(0)), reg_eps(DataType(1E-100)), material_type(
MaterialType::carreau)
 
 1287      template<
typename CubatureFactory_>
 
 1289       const SpaceType& space, 
const CubatureFactory_& cubature_factory, DataType alpha = DataType(1))
 const 
 1297        CubatureRuleType cubature(Cubature::ctor_factory, cubature_factory);
 
 1300        int num_cubs = cubature.get_num_points();
 
 1301        typename CubatureRuleType::PointType* cub_pt = cubature.get_points();
 
 1302        DataType* cub_wg = cubature.get_weights();
 
 1304        BACKEND_SKELETON_VOID(assemble_matrix1_cuda, assemble_matrix1_generic, assemble_matrix1_generic, matrix, convect, space, cub_pt, cub_wg, num_cubs, alpha)
 
 1319      template<
typename CubatureFactory_>
 
 1328        CubatureRuleType cubature(Cubature::ctor_factory, cubature_factory);
 
 1331        int num_cubs = cubature.get_num_points();
 
 1332        typename CubatureRuleType::PointType* cub_pt = cubature.get_points();
 
 1333        DataType* cub_wg = cubature.get_weights();
 
 1335        BACKEND_SKELETON_VOID(assemble_vector_cuda, assemble_vector_generic, assemble_vector_generic, vector, convect, primal, space, cub_pt, cub_wg, num_cubs, alpha)
 
 1342        const DataType* vec_data = convect.template elements<LAFEM::Perspective::pod>();
 
 1345        VoxelAssembly::AssemblyMappingData<DataType, IndexType> mapping_data = this->mesh_data.get_assembly_field();
 
 1346        auto burgers_params = this->wrap_burgers_params();
 
 1347        auto material_params = this->wrap_material_params();
 
 1351            this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha,
 
 1352                                  burgers_params, material_params, material_type);
 
 1355      void assemble_vector_generic(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& vector, 
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect, 
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& primal,
 
 1356       const SpaceType& space, 
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, 
const DataType* cub_wg, 
int num_cubs, DataType alpha)
 const 
 1358        DataType* vec_data = vector.template elements<LAFEM::Perspective::pod>();
 
 1359        const DataType* conv_data = convect.template elements<LAFEM::Perspective::pod>();
 
 1360        const DataType* primal_data = primal.template elements<LAFEM::Perspective::pod>();
 
 1362        VoxelAssembly::AssemblyCubatureData<DataType> cub_data = {(
void*)cub_pt, cub_wg, num_cubs};
 
 1363        VoxelAssembly::AssemblyMappingData<DataType, IndexType> mapping_data = this->mesh_data.get_assembly_field();
 
 1364        auto burgers_params = this->wrap_burgers_params();
 
 1365        auto material_params = wrap_material_params();
 
 1369                                  this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha,
 
 1370                                  burgers_params, material_params, material_type);
 
 1374      #ifdef FEAT_HAVE_CUDA 
 1375      void assemble_matrix1_cuda(LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, dim>& matrix, 
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect,
 
 1376       const SpaceType& space, 
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, 
const DataType* cub_wg, 
int num_cubs, DataType alpha)
 const 
 1378        VoxelAssembly::CSRMatrixData<DataType, IndexType> mat_data = {matrix.template val<LAFEM::Perspective::pod>(), matrix.row_ptr(), matrix.col_ind(), matrix.rows(), matrix.columns()};
 
 1379        const DataType* vec_data = convect.template elements<LAFEM::Perspective::pod>();
 
 1381        typedef typename Cubature::Rule<ShapeType, DataType, DataType>::PointType CubPointType;
 
 1383        void* cub_pt_device = Util::cuda_malloc(
Index(num_cubs) * 
sizeof(CubPointType));
 
 1384        Util::cuda_copy_host_to_device(cub_pt_device, (
void*)cub_pt, 
Index(num_cubs) * 
sizeof(CubPointType));
 
 1386        void* cub_wg_device = Util::cuda_malloc(
Index(num_cubs) * 
sizeof(DataType));
 
 1387        Util::cuda_copy_host_to_device(cub_wg_device, (
void*)cub_wg, 
Index(num_cubs) * 
sizeof(DataType));
 
 1389        VoxelAssembly::AssemblyCubatureData<DataType> d_cub_data = {cub_pt_device, (DataType*)cub_wg_device, num_cubs};
 
 1390        VoxelAssembly::AssemblyMappingData<DataType, IndexType> d_mapping_data = this->mesh_data.get_assembly_field();
 
 1391        auto burgers_params = this->wrap_burgers_params();
 
 1392        auto material_params = wrap_material_params();
 
 1395                  this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha, burgers_params, material_params, material_type,
 
 1396                  this->shared_mem, this->blocksize, this->gridsize, this->print_occupancy);
 
 1397        Util::cuda_free(cub_wg_device);
 
 1398        Util::cuda_free(cub_pt_device);
 
 1402      void assemble_vector_cuda(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& vector, 
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect, 
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& primal,
 
 1403       const SpaceType& space, 
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, 
const DataType* cub_wg, 
int num_cubs, DataType alpha)
 const 
 1405        DataType* vec_data = vector.template elements<LAFEM::Perspective::pod>();
 
 1406        const DataType* conv_data = convect.template elements<LAFEM::Perspective::pod>();
 
 1407        const DataType* primal_data = primal.template elements<LAFEM::Perspective::pod>();
 
 1409        typedef typename Cubature::Rule<ShapeType, DataType, DataType>::PointType CubPointType;
 
 1411        void* cub_pt_device = Util::cuda_malloc(
Index(num_cubs) * 
sizeof(CubPointType));
 
 1412        Util::cuda_copy_host_to_device(cub_pt_device, (
void*)cub_pt, 
Index(num_cubs) * 
sizeof(CubPointType));
 
 1414        void* cub_wg_device = Util::cuda_malloc(
Index(num_cubs) * 
sizeof(DataType));
 
 1415        Util::cuda_copy_host_to_device(cub_wg_device, (
void*)cub_wg, 
Index(num_cubs) * 
sizeof(DataType));
 
 1417        VoxelAssembly::AssemblyCubatureData<DataType> d_cub_data = {cub_pt_device, (DataType*)cub_wg_device, num_cubs};
 
 1418        VoxelAssembly::AssemblyMappingData<DataType, IndexType> d_mapping_data = this->mesh_data.get_assembly_field();
 
 1419        auto burgers_params = this->wrap_burgers_params();
 
 1420        auto material_params = wrap_material_params();
 
 1423                  this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha, burgers_params, material_params, material_type,
 
 1424                  this->shared_mem, this->blocksize, this->gridsize, this->print_occupancy);
 
 1425        Util::cuda_free(cub_wg_device);
 
 1426        Util::cuda_free(cub_pt_device);
 
 1430      void assemble_matrix1_cuda(LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, dim>& DOXY(matrix), 
const SpaceType& DOXY(space), 
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* DOXY(cub_pt), 
const DataType* DOXY(cub_wg), 
int DOXY(num_cubs), DataType DOXY(alpha))
 const 
 1432        XABORTM(
"What in the nine hells are you doing here?");
 
 1435      void assemble_vector_cuda(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&, 
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&, 
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&,
 
 1436       const SpaceType&, 
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType*, 
const DataType*, 
int, DataType)
 const 
 1438        XABORTM(
"This call was logged and your local FBI agent was informed about your transgression");
 
#define XABORTM(msg)
Abortion macro definition with custom message.
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Cubature Rule class template.
Blocked Dense data vector class template.
Index size() const
The number of elements.
CSR based blocked sparse matrix.
Index rows() const
Retrieve matrix row count.
IT_ * col_ind()
Retrieve column indices array.
IT_ * row_ptr()
Retrieve row start index array.
Index columns() const
Retrieve matrix column count.
Standard Lagrange-2 Finite-Element space class template.
Index get_num_dofs() const
Returns the number of dofs.
Tiny Matrix class template.
Tiny Vector class template.
CUDA_HOST_DEVICE Vector & set_mat_vec_mult(const Matrix< T_, n_, m_, sma_, sna_ > &a, const Vector< T_, m_, sx_ > &x)
Sets this vector to the result of a matrix-vector product.
Q2Lagrange thread parallel assembly class for the burgers operator.
Burgers Voxel Assembly template.
MaterialType material_type
material type
DataType frechet_material
Scaling factor full material jacobian.
VoxelBurgersVeloMaterialAssembler(const SpaceType &space, const ColoringType_ &coloring, int hint=-1)
Constructor for burgers velo material assembler.
void assemble_matrix1(LAFEM::SparseMatrixBCSR< DataType, IndexType, dim, dim > &matrix, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &convect, const SpaceType &space, const CubatureFactory_ &cubature_factory, DataType alpha=DataType(1)) const
Assembles the burgers velo material operator for finitie element space with same test and trial space...
DataType mu_0
Material Parameter.
void assemble_vector(LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &vector, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &convect, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &primal, const SpaceType &space, const CubatureFactory_ &cubature_factory, DataType alpha=DataType(1)) const
Assembles the application of the burgers operator to a given primal vector.
VoxelAssembly::AssemblyMaterialData< DataType > wrap_material_params() const
Wraps the set burgers parameter into convenient struct.
BaseClass::SpaceType SpaceType
typedefs
DataType reg_eps
regularity parameter
Burgers Velocity Material Voxel Assembly template.
T_ sqrt(T_ x)
Returns the square-root of a value.
T_ abs(T_ x)
Returns the absolute value.
T_ pow(T_ x, T_ y)
Returns x raised to the power of y.
CUDA_HOST_DEVICE T_ dot(const T_ &a, const T_ &b)
Computes the dot-product of two scalars.
CUDA_HOST_DEVICE void axpy(T_ &y, const T_ &x, const T_ &alpha)
Performs an AXPY of two scalars.
void assemble_burgers_velo_material_defect_host(const Space_ &space, DT_ *vector_data, const DT_ *conv_data, const DT_ *primal_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType material_type)
Host kernel wrapper for the defect burgers assembler.
void assemble_burgers_velo_material_csr(const Space_ &space, const CSRMatrixData< DT_, IT_ > &matrix_data, const DT_ *conv_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType mat_type, int shared_mem, int blocksize, int gridsize, bool print_occupancy)
Device kernel wrapper for the full matrix burgers assembler.
void assemble_burgers_velo_material_csr_host(const Space_ &space, const CSRMatrixData< DT_, IT_ > &matrix_data, const DT_ *conv_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType material_type)
Host kernel wrapper for the full matrix burgers assembler.
void assemble_burgers_velo_material_defect(const Space_ &space, DT_ *vector_data, const DT_ *conv_data, const DT_ *primal_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType material_type, int shared_mem, int blocksize, int gridsize, bool print_occupancy)
Device kernel wrapper for the defect burgers assembler.
CUDA_HOST_DEVICE void burgers_velo_material_mat_assembly_kernel(LocMatType_ &loc_mat, const LocVecType_ &local_conv_dofs, const Tiny::Matrix< typename SpaceHelp_::DataType, dim_, num_verts_ > &local_coeffs, const typename SpaceHelp_::DomainPointType *cub_pt, const typename SpaceHelp_::DataType *cub_wg, const int num_cubs, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const VoxelAssembly::AssemblyMaterialData< typename SpaceHelp_::DataType > &material_params, const bool need_convection, const typename SpaceHelp_::DataType tol_eps, ViscFunc_ visco_func, ViscDerFunc_ visco_d_func)
Burgers Velo Material Matrix assembly kernel.
CUDA_HOST_DEVICE void burgers_velo_material_defect_assembly_kernel(LocVecType_ &loc_vec, const LocVecType_ &local_prim_dofs, const LocVecType_ &local_conv_dofs, const Tiny::Matrix< typename SpaceHelp_::DataType, dim_, num_verts_ > &local_coeffs, const typename SpaceHelp_::DomainPointType *cub_pt, const typename SpaceHelp_::DataType *cub_wg, const int num_cubs, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const bool need_convection, ViscFunc_ visco_func)
Burgers Vector assembly kernel.
MaterialType
Enum for different material types.
@ value
specifies whether the space should supply basis function values
std::uint64_t Index
Index data type.
@ dom_point
specifies whether the trafo should supply domain point coordinates
Data for burgers assembler.
A data field for a cubature rule.
A data field for all necessary values that define the dof mapping for assembly.
Data for material burgers assembler.
Helper struct wrapping the data required as shared data.