9#include <kernel/backend.hpp> 
   11#include <kernel/voxel_assembly/voxel_assembly_common.hpp> 
   12#include <kernel/voxel_assembly/helper/data_handler.hpp> 
   13#include <kernel/voxel_assembly/helper/space_helper.hpp> 
   14#include <kernel/cubature/rule.hpp> 
   15#include <kernel/lafem/sparse_matrix_bcsr.hpp> 
   16#include <kernel/lafem/dense_vector_blocked.hpp> 
   17#include <kernel/trafo/standard/mapping.hpp> 
   18#include <kernel/geometry/conformal_mesh.hpp> 
   19#include <kernel/util/tiny_algebra.hpp> 
   20#include <kernel/global/vector.hpp> 
   21#include <kernel/lafem/vector_mirror.hpp> 
   24#include <kernel/util/cuda_util.hpp> 
   28#include <cooperative_groups.h> 
   29#include <cooperative_groups/reduce.h> 
   30namespace cg = cooperative_groups;
 
   35  namespace VoxelAssembly
 
   46    template<
typename Space_, 
typename DT_, 
typename IT_>
 
   49    #if defined(FEAT_HAVE_CUDA) || defined(DOXYGEN) 
   51    template<
typename SpaceHelp_>
 
   54      typedef SpaceHelp_ SpaceHelp;
 
   55      static constexpr int dim = SpaceHelp::dim;
 
   56      typedef typename SpaceHelp::SpaceType SpaceType;
 
   57      typedef typename SpaceHelp::DataType DataType;
 
   59      static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
   64      typename SpaceHelp::EvalData basis_data;
 
   65      typename SpaceHelp::JacobianMatrixType loc_jac;
 
   66      typename SpaceHelp::JacobianMatrixType loc_jac_inv;
 
   70      typename SpaceHelp::DomainPointType dom_point;
 
  104      template<
typename SpaceHelp_, 
typename LocMatType_, 
typename LocVecType_, 
int dim_, 
int num_verts_>
 
  106                                          const typename SpaceHelp_::DomainPointType* cub_pt, 
const typename SpaceHelp_::DataType* cub_wg, 
const int num_cubs,
 
  108                                          const bool need_streamline, 
const bool need_convection, 
const typename SpaceHelp_::DataType tol_eps)
 
  110        typedef SpaceHelp_ SpaceHelp;
 
  111        constexpr int dim = SpaceHelp::dim;
 
  112        typedef typename SpaceHelp::SpaceType SpaceType;
 
  113        typedef typename SpaceHelp::DataType DataType;
 
  115        const DataType& nu{burgers_params.nu};
 
  116        const DataType& theta{burgers_params.theta};
 
  117        const DataType& beta{burgers_params.beta};
 
  118        const DataType& frechet_beta{burgers_params.frechet_beta};
 
  119        const DataType& sd_delta{burgers_params.sd_delta};
 
  120        const DataType& sd_nu{burgers_params.sd_nu};
 
  121        const DataType& sd_v_norm{burgers_params.sd_v_norm};
 
  122        const bool& deformation{burgers_params.deformation};
 
  140        constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
  144        typename SpaceHelp::JacobianMatrixType loc_jac, loc_jac_inv;
 
  145        typename SpaceHelp::EvalData basis_data;
 
  151        VecValueType loc_v(DataType(0)), mean_v(DataType(0));
 
  152        MatValueType loc_grad_v(DataType(0));
 
  153        DataType local_delta(DataType(0));
 
  162          const VecValueType barycenter(DataType(0));
 
  165          SpaceHelp::eval_ref_values(basis_data, barycenter);
 
  166          SpaceHelp::trans_values(basis_data);
 
  167          for(
int i(0); i < num_loc_dofs; ++i)
 
  169            mean_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
 
  171          const DataType local_norm_v = mean_v.norm_euclid();
 
  173          if(local_norm_v > tol_eps)
 
  175            const DataType local_h = SpaceHelp::width_directed(mean_v, local_coeffs) * local_norm_v;
 
  176            const DataType local_re = (local_norm_v * local_h) / sd_nu;
 
  177            local_delta = sd_delta * (local_h / sd_v_norm) * (DataType(2) * local_re) / (DataType(1) + local_re);
 
  181        for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
 
  183          const typename SpaceHelp::DomainPointType& 
dom_point = cub_pt[cub_ind];
 
  184          SpaceHelp::eval_ref_values(basis_data, 
dom_point);
 
  185          SpaceHelp::trans_values(basis_data);
 
  187          SpaceHelp::calc_jac_mat(loc_jac, 
dom_point, local_coeffs);
 
  188          loc_jac_inv.set_inverse(loc_jac);
 
  190          SpaceHelp::eval_ref_gradients(basis_data, 
dom_point);
 
  191          SpaceHelp::trans_gradients(basis_data, loc_jac_inv);
 
  193          const DataType weight = loc_jac.det() * cub_wg[cub_ind];
 
  194          if(need_convection || need_streamline) 
 
  197            for(
int i = 0; i < num_loc_dofs; ++i)
 
  199              loc_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
 
  203          if(CudaMath::cuda_abs(frechet_beta) > DataType(0)) 
 
  205          if(
Math::abs(frechet_beta) > DataType(0)) 
 
  209            for(
int i = 0; i < num_loc_dofs; ++i)
 
  211              loc_grad_v.add_outer_product(local_conv_dofs[i], basis_data.phi[i].grad);
 
  217            for(
int i = 0; i < num_loc_dofs; ++i)
 
  219              streamdiff_coeffs[i] = 
Tiny::dot(loc_v, basis_data.phi[i].grad);
 
  225            for(
int i(0); i < num_loc_dofs; ++i)
 
  228              for(
int j(0); j < num_loc_dofs; ++j)
 
  231                const DataType 
value = nu * weight * 
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
 
  234                loc_mat[i][j].add_scalar_main_diag(
value);
 
  235                loc_mat[i][j].add_outer_product(basis_data.phi[j].grad, basis_data.phi[i].grad, nu * weight);
 
  241            for(
int i(0); i < num_loc_dofs; ++i)
 
  244              for(
int j(0); j < num_loc_dofs; ++j)
 
  247                const DataType 
value = nu * weight * 
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
 
  250                loc_mat[i][j].add_scalar_main_diag(
value);
 
  258            for(
int i(0); i < num_loc_dofs; ++i)
 
  261              for(
int j(0); j < num_loc_dofs; ++j)
 
  264                const DataType 
value = beta * weight * basis_data.phi[i].value * 
Tiny::dot(loc_v, basis_data.phi[j].grad);
 
  267                loc_mat[i][j].add_scalar_main_diag(
value);
 
  274          if(CudaMath::cuda_abs(frechet_beta) > DataType(0))
 
  276          if(
Math::abs(frechet_beta) > DataType(0))
 
  280            for(
int i(0); i < num_loc_dofs; ++i)
 
  283              for(
int j(0); j < num_loc_dofs; ++j)
 
  286                const DataType 
value = frechet_beta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
 
  289                loc_mat[i][j].axpy(
value, loc_grad_v);
 
  296          if(CudaMath::cuda_abs(theta) > DataType(0))
 
  302            for(
int i(0); i < num_loc_dofs; ++i)
 
  305              for(
int j(0); j < num_loc_dofs; ++j)
 
  308                const DataType 
value = theta * weight *  basis_data.phi[i].value * basis_data.phi[j].value;
 
  311                loc_mat[i][j].add_scalar_main_diag(
value);
 
  317          if((need_streamline) && (local_delta > tol_eps))
 
  320            for(
int i(0); i < num_loc_dofs; ++i)
 
  323              for(
int j(0); j < num_loc_dofs; ++j)
 
  326                const DataType 
value = local_delta * weight * streamdiff_coeffs[i] * streamdiff_coeffs[j];
 
  329                loc_mat[i][j].add_scalar_main_diag(
value);
 
  337      #if defined(__CUDACC__) || defined(DOXYGEN) 
  340      template<
typename ThreadGroup_, 
typename SpaceHelp_>
 
  342                                          int loc_assemble_size, 
int assemble_offset, 
typename SpaceHelp_::DataType* loc_mat, 
const typename SpaceHelp_::DataType* local_conv_dofs,
 
  344                                          const typename SpaceHelp_::DomainPointType* cub_pt, 
const typename SpaceHelp_::DataType* cub_wg, 
const int cub_ind,
 
  346                                          const bool need_streamline, 
const bool need_convection, 
const typename SpaceHelp_::DataType tol_eps)
 
  348        typedef SpaceHelp_ SpaceHelp;
 
  349        constexpr int dim = SpaceHelp::dim;
 
  350        typedef typename SpaceHelp::SpaceType SpaceType;
 
  351        typedef typename SpaceHelp::DataType DataType;
 
  353        const int t_idx = tg.thread_rank();
 
  354        const int g_size = tg.num_threads();
 
  356        const DataType& nu{burgers_params.nu};
 
  357        const DataType& theta{burgers_params.theta};
 
  358        const DataType& beta{burgers_params.beta};
 
  359        const DataType& frechet_beta{burgers_params.frechet_beta};
 
  360        const DataType& sd_delta{burgers_params.sd_delta};
 
  361        const DataType& sd_nu{burgers_params.sd_nu};
 
  362        const DataType& sd_v_norm{burgers_params.sd_v_norm};
 
  363        const bool& deformation{burgers_params.deformation};
 
  381        constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
  383        typedef Tiny::Vector<DataType, dim> VecValueType;
 
  384        typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
 
  390        typedef BurgersSharedDataKernelWrapper<SpaceHelp> BSDKWrapper;
 
  391        typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
 
  392        typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
 
  393        MatValueType& loc_grad_v = shared_wrapper->loc_grad_v;
 
  394        VecValueType& loc_v = shared_wrapper->loc_v;
 
  395        VecValueType& mean_v = shared_wrapper->mean_v;
 
  396        typename SpaceHelp::DomainPointType& 
dom_point = shared_wrapper->dom_point;
 
  397        Tiny::Vector<DataType, num_loc_dofs>& streamdiff_coeffs = shared_wrapper->streamdiff_coeffs;
 
  398        DataType& local_delta = shared_wrapper->local_delta;
 
  399        DataType& det = shared_wrapper->det;
 
  400        DataType& weight = shared_wrapper->weight;
 
  401        typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
 
  402        bool& need_frechet = shared_wrapper->need_frechet;
 
  404        VoxelAssembly::coalesced_format(tg, (
unsigned int*) shared_wrapper, 
sizeof(BSDKWrapper)/(
sizeof(
unsigned int)));
 
  407        cg::invoke_one(tg, [&](){need_frechet = CudaMath::cuda_abs(frechet_beta) > DataType(0);});
 
  413          cg::invoke_one(tg, [&]() {mean_v = DataType(0);});
 
  417          cg::invoke_one(tg, [&](){ 
const VecValueType bc(0);
 
  418                                    SpaceHelp::eval_ref_values(basis_data, bc);
 
  419                                    SpaceHelp::trans_values(basis_data);});
 
  423          for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
 
  425            VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &mean_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
 
  430          DataType local_norm_v = mean_v.norm_euclid();
 
  432          if(local_norm_v > tol_eps)
 
  434            cg::invoke_one(tg, [&](){
 
  435              const DataType local_h = SpaceHelp::width_directed(mean_v, local_coeffs) * local_norm_v;
 
  436              const DataType local_re = (local_norm_v * local_h) / sd_nu;
 
  437              local_delta = sd_delta * (local_h / sd_v_norm) * (DataType(2) * local_re) / (DataType(1) + local_re);
 
  444          cg::invoke_one(tg, [&](){
dom_point = cub_pt[cub_ind];
 
  445                                  SpaceHelp::eval_ref_values(basis_data, 
dom_point);
 
  446                                  SpaceHelp::trans_values(basis_data);});
 
  449          SpaceHelp::grouped_calc_jac_mat(tg, loc_jac, 
dom_point, &local_coeffs[0][0]);
 
  451          cg::invoke_one(tg, [&](){det = loc_jac.det();
 
  452                                  weight = det * cub_wg[cub_ind];});
 
  454          loc_jac_inv.grouped_set_inverse(tg, loc_jac, det);
 
  456          cg::invoke_one(tg, [&](){
 
  457                                  SpaceHelp::eval_ref_gradients(basis_data, 
dom_point);
 
  458                                  SpaceHelp::trans_gradients(basis_data, loc_jac_inv);});
 
  461          if(need_convection || need_streamline) 
 
  463            VoxelAssembly::coalesced_format(tg, &loc_v[0], dim);
 
  466            for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
 
  468              VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &loc_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
 
  475            VoxelAssembly::coalesced_format(tg, &loc_grad_v[0][0], dim*dim);
 
  477            for(
int i = t_idx; i < num_loc_dofs; i += g_size)
 
  479              VoxelAssembly::grouped_add_outer_product(tg, &loc_grad_v[0][0], &local_conv_dofs[i*dim], basis_data.phi[i].grad);
 
  487            for(
int i = t_idx; i < num_loc_dofs; i += g_size)
 
  489              streamdiff_coeffs[i] = 
Tiny::dot(loc_v, basis_data.phi[i].grad);
 
  499      template<
typename ThreadGroup_, 
typename SpaceHelp_>
 
  501                                          int loc_assemble_size, 
int assemble_offset, 
typename SpaceHelp_::DataType* loc_mat,
 
  503                                          const bool need_streamline, 
const bool need_convection, 
const typename SpaceHelp_::DataType tol_eps)
 
  505        typedef SpaceHelp_ SpaceHelp;
 
  506        constexpr int dim = SpaceHelp::dim;
 
  507        typedef typename SpaceHelp::SpaceType SpaceType;
 
  508        typedef typename SpaceHelp::DataType DataType;
 
  510        const int t_idx = tg.thread_rank();
 
  511        const int g_size = tg.num_threads();
 
  513        const DataType& nu{burgers_params.nu};
 
  514        const DataType& theta{burgers_params.theta};
 
  515        const DataType& beta{burgers_params.beta};
 
  516        const DataType& frechet_beta{burgers_params.frechet_beta};
 
  517        const DataType& sd_delta{burgers_params.sd_delta};
 
  518        const DataType& sd_nu{burgers_params.sd_nu};
 
  519        const DataType& sd_v_norm{burgers_params.sd_v_norm};
 
  520        const bool& deformation{burgers_params.deformation};
 
  538        constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
  548        typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
 
  549        typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
 
  550        MatValueType& loc_grad_v = shared_wrapper->loc_grad_v;
 
  551        VecValueType& loc_v = shared_wrapper->loc_v;
 
  552        VecValueType& mean_v = shared_wrapper->mean_v;
 
  553        typename SpaceHelp::DomainPointType& 
dom_point = shared_wrapper->dom_point;
 
  555        DataType& local_delta = shared_wrapper->local_delta;
 
  556        DataType& det = shared_wrapper->det;
 
  557        DataType& weight = shared_wrapper->weight;
 
  558        typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
 
  559        bool& need_frechet = shared_wrapper->need_frechet;
 
  562        for(
int idx = t_idx; idx < loc_assemble_size; idx += g_size)
 
  564          const int i = (idx+assemble_offset) / num_loc_dofs;
 
  565          const int j = (idx+assemble_offset) % num_loc_dofs;
 
  569            const DataType 
value = nu * weight * 
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
 
  577            const DataType 
value = nu * weight * 
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
 
  586            const DataType 
value = beta * weight * basis_data.phi[i].value * VoxelAssembly::dot(basis_data.phi[j].grad, &loc_v[0]);
 
  596            const DataType 
value = frechet_beta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
 
  604          if(CudaMath::cuda_abs(theta) > DataType(0))
 
  610            const DataType 
value = theta * weight *  basis_data.phi[i].value * basis_data.phi[j].value;
 
  617          if((need_streamline) && (local_delta > tol_eps))
 
  620            const DataType 
value = local_delta * weight * streamdiff_coeffs[i] * streamdiff_coeffs[j];
 
  651      template<
typename ThreadGroup_, 
typename SpaceHelp_>
 
  654                                          const typename SpaceHelp_::DomainPointType* cub_pt, 
const typename SpaceHelp_::DataType* cub_wg, 
const int num_cubs,
 
  656                                          const bool need_streamline, 
const bool need_convection, 
const typename SpaceHelp_::DataType tol_eps)
 
  658        typedef SpaceHelp_ SpaceHelp;
 
  659        constexpr int dim = SpaceHelp::dim;
 
  660        typedef typename SpaceHelp::SpaceType SpaceType;
 
  661        typedef typename SpaceHelp::DataType DataType;
 
  663        const int t_idx = tg.thread_rank();
 
  664        const int g_size = tg.num_threads();
 
  666        const DataType& nu{burgers_params.nu};
 
  667        const DataType& theta{burgers_params.theta};
 
  668        const DataType& beta{burgers_params.beta};
 
  669        const DataType& frechet_beta{burgers_params.frechet_beta};
 
  670        const DataType& sd_delta{burgers_params.sd_delta};
 
  671        const DataType& sd_nu{burgers_params.sd_nu};
 
  672        const DataType& sd_v_norm{burgers_params.sd_v_norm};
 
  673        const bool& deformation{burgers_params.deformation};
 
  691        constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
  700        typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
 
  701        typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
 
  702        MatValueType& loc_grad_v = shared_wrapper->loc_grad_v;
 
  703        VecValueType& loc_v = shared_wrapper->loc_v;
 
  704        VecValueType& mean_v = shared_wrapper->mean_v;
 
  705        typename SpaceHelp::DomainPointType& 
dom_point = shared_wrapper->dom_point;
 
  707        DataType& local_delta = shared_wrapper->local_delta;
 
  708        DataType& det = shared_wrapper->det;
 
  709        DataType& weight = shared_wrapper->weight;
 
  710        typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
 
  711        bool& need_frechet = shared_wrapper->need_frechet;
 
  713        VoxelAssembly::coalesced_format(tg, (
unsigned int*) shared_wrapper, 
sizeof(BSDKWrapper)/(
sizeof(
unsigned int)));
 
  716        cg::invoke_one(tg, [&](){need_frechet = CudaMath::cuda_abs(frechet_beta) > DataType(0);});
 
  722          cg::invoke_one(tg, [&]() {mean_v = DataType(0);});
 
  726          cg::invoke_one(tg, [&](){ 
const VecValueType bc(0);
 
  727                                    SpaceHelp::eval_ref_values(basis_data, bc);
 
  728                                    SpaceHelp::trans_values(basis_data);});
 
  732          for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
 
  734            VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &mean_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
 
  739          DataType local_norm_v = mean_v.norm_euclid();
 
  741          if(local_norm_v > tol_eps)
 
  743            cg::invoke_one(tg, [&](){
 
  744              const DataType local_h = SpaceHelp::width_directed(mean_v, local_coeffs) * local_norm_v;
 
  745              const DataType local_re = (local_norm_v * local_h) / sd_nu;
 
  746              local_delta = sd_delta * (local_h / sd_v_norm) * (DataType(2) * local_re) / (DataType(1) + local_re);
 
  754        for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
 
  756          cg::invoke_one(tg, [&](){
dom_point = cub_pt[cub_ind];
 
  757                                  SpaceHelp::eval_ref_values(basis_data, 
dom_point);
 
  758                                  SpaceHelp::trans_values(basis_data);});
 
  761          SpaceHelp::grouped_calc_jac_mat(tg, loc_jac, 
dom_point, &local_coeffs[0][0]);
 
  763          cg::invoke_one(tg, [&](){det = loc_jac.det();
 
  764          weight = det * cub_wg[cub_ind];});
 
  766          loc_jac_inv.grouped_set_inverse(tg, loc_jac, det);
 
  768          cg::invoke_one(tg, [&](){
 
  769                                  SpaceHelp::eval_ref_gradients(basis_data, 
dom_point);
 
  770                                  SpaceHelp::trans_gradients(basis_data, loc_jac_inv);});
 
  773          if(need_convection || need_streamline) 
 
  775            VoxelAssembly::coalesced_format(tg, &loc_v[0], dim);
 
  778            for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
 
  780              VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &loc_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
 
  787            VoxelAssembly::coalesced_format(tg, &loc_grad_v[0][0], dim*dim);
 
  789            for(
int i = t_idx; i < num_loc_dofs; i += g_size)
 
  791              VoxelAssembly::grouped_add_outer_product(tg, &loc_grad_v[0][0], &local_conv_dofs[i*dim], basis_data.phi[i].grad);
 
  799            for(
int i = t_idx; i < num_loc_dofs; i += g_size)
 
  801              streamdiff_coeffs[i] = 
Tiny::dot(loc_v, basis_data.phi[i].grad);
 
  808          for(
int idx = t_idx; idx < loc_assemble_size; idx += g_size)
 
  811            const int i = (idx+assemble_offset) / num_loc_dofs;
 
  812            const int j = (idx+assemble_offset) % num_loc_dofs;
 
  817              const DataType 
value = nu * weight * 
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
 
  825              const DataType 
value = nu * weight * 
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
 
  834              const DataType 
value = beta * weight * basis_data.phi[i].value * VoxelAssembly::dot(basis_data.phi[j].grad, &loc_v[0]);
 
  844              const DataType 
value = frechet_beta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
 
  852            if(CudaMath::cuda_abs(theta) > DataType(0))
 
  858              const DataType 
value = theta * weight *  basis_data.phi[i].value * basis_data.phi[j].value;
 
  865            if((need_streamline) && (local_delta > tol_eps))
 
  868              const DataType 
value = local_delta * weight * streamdiff_coeffs[i] * streamdiff_coeffs[j];
 
  900      template<
typename SpaceHelp_, 
typename LocVecType_, 
int dim_, 
int num_verts_>
 
  902                                          const typename SpaceHelp_::DomainPointType* cub_pt, 
const typename SpaceHelp_::DataType* cub_wg, 
const int num_cubs,
 
  904                                          const bool need_convection)
 
  906        typedef SpaceHelp_ SpaceHelp;
 
  907        constexpr int dim = SpaceHelp::dim;
 
  908        typedef typename SpaceHelp::SpaceType SpaceType;
 
  909        typedef typename SpaceHelp::DataType DataType;
 
  911        const DataType& nu{burgers_params.nu};
 
  912        const DataType& theta{burgers_params.theta};
 
  913        const DataType& beta{burgers_params.beta};
 
  918        const bool& deformation{burgers_params.deformation};
 
  936        constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
  940        typename SpaceHelp::JacobianMatrixType loc_jac, loc_jac_inv;
 
  941        typename SpaceHelp::EvalData basis_data;
 
  947        VecValueType loc_v(DataType(0));
 
  953        for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
 
  955          const typename SpaceHelp::DomainPointType& 
dom_point = cub_pt[cub_ind];
 
  956          SpaceHelp::eval_ref_values(basis_data, 
dom_point);
 
  957          SpaceHelp::trans_values(basis_data);
 
  959          SpaceHelp::calc_jac_mat(loc_jac, 
dom_point, local_coeffs);
 
  960          loc_jac_inv.set_inverse(loc_jac);
 
  962          SpaceHelp::eval_ref_gradients(basis_data, 
dom_point);
 
  963          SpaceHelp::trans_gradients(basis_data, loc_jac_inv);
 
  965          const DataType weight = loc_jac.det() * cub_wg[cub_ind];
 
  969            for(
int i = 0; i < num_loc_dofs; ++i)
 
  971              loc_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
 
  977            for(
int i(0); i < num_loc_dofs; ++i)
 
  980              for(
int j(0); j < num_loc_dofs; ++j)
 
  983                const DataType value1 = nu * weight * 
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
 
  984                const DataType value2 = nu * weight * 
Tiny::dot(local_prim_dofs[j], basis_data.phi[i].grad);
 
  986                loc_vec[i].axpy(value1, local_prim_dofs[j]);
 
  987                loc_vec[i].axpy(value2, basis_data.phi[j].grad);
 
  993            for(
int i(0); i < num_loc_dofs; ++i)
 
  996              for(
int j(0); j < num_loc_dofs; ++j)
 
  999                const DataType 
value = nu * weight * 
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
 
 1002                loc_vec[i].axpy(
value, local_prim_dofs[j]);
 
 1010            for(
int i(0); i < num_loc_dofs; ++i)
 
 1013              for(
int j(0); j < num_loc_dofs; ++j)
 
 1016                const DataType 
value = beta * weight * basis_data.phi[i].value * 
Tiny::dot(loc_v, basis_data.phi[j].grad);
 
 1019                loc_vec[i].axpy(
value, local_prim_dofs[j]);
 
 1026          if(CudaMath::cuda_abs(theta) > DataType(0))
 
 1032            for(
int i(0); i < num_loc_dofs; ++i)
 
 1035              for(
int j(0); j < num_loc_dofs; ++j)
 
 1038                const DataType 
value = theta * weight *  basis_data.phi[i].value * basis_data.phi[j].value;
 
 1041                loc_vec[i].axpy(
value, local_prim_dofs[j]);
 
 1050      template<
typename ThreadGroup_, 
typename SpaceHelp_>
 
 1052                                          typename SpaceHelp_::DataType* loc_vec, 
const typename SpaceHelp_::DataType* local_prim_dofs, 
const typename SpaceHelp_::DataType* local_conv_dofs,
 
 1054                                          const typename SpaceHelp_::DomainPointType* cub_pt, 
const typename SpaceHelp_::DataType* cub_wg, 
const int num_cubs,
 
 1056                                          const bool need_convection)
 
 1058        typedef SpaceHelp_ SpaceHelp;
 
 1059        constexpr int dim = SpaceHelp::dim;
 
 1060        typedef typename SpaceHelp::SpaceType SpaceType;
 
 1061        typedef typename SpaceHelp::DataType DataType;
 
 1062        const int t_idx = tg.thread_rank();
 
 1063        const int g_size = tg.num_threads();
 
 1065        const DataType& nu{burgers_params.nu};
 
 1066        const DataType& theta{burgers_params.theta};
 
 1067        const DataType& beta{burgers_params.beta};
 
 1072        const bool& deformation{burgers_params.deformation};
 
 1090        constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
 1094        typedef BurgersSharedDataKernelWrapper<SpaceHelp> BSDKWrapper;
 
 1095        typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
 
 1096        typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
 
 1098        DataType& det = shared_wrapper->det;
 
 1099        DataType& weight = shared_wrapper->weight;
 
 1100        typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
 
 1102        typedef Tiny::Vector<DataType, dim> VecValueType;
 
 1105        VecValueType& loc_v = shared_wrapper->loc_v;
 
 1107        typename SpaceHelp::DomainPointType& 
dom_point = shared_wrapper->dom_point;
 
 1109        VoxelAssembly::coalesced_format(tg, (
unsigned int*) shared_wrapper, 
sizeof(BSDKWrapper)/(
sizeof(
unsigned int)));
 
 1112        cg::thread_block_tile<4> tile4 = cg::tiled_partition<4>(tg);
 
 1114        for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
 
 1117          cg::invoke_one(tg, [&](){
dom_point = cub_pt[cub_ind];
 
 1118                                  SpaceHelp::eval_ref_values(basis_data, 
dom_point);
 
 1119                                  SpaceHelp::trans_values(basis_data);});
 
 1122          SpaceHelp::grouped_calc_jac_mat(tg, loc_jac, 
dom_point, &local_coeffs[0][0]);
 
 1124          cg::invoke_one(tg, [&](){det = loc_jac.det();
 
 1125          weight = det * cub_wg[cub_ind];});
 
 1127          loc_jac_inv.grouped_set_inverse(tg, loc_jac, det);
 
 1129          cg::invoke_one(tg, [&](){
 
 1130                                  SpaceHelp::eval_ref_gradients(basis_data, 
dom_point);
 
 1131                                  SpaceHelp::trans_gradients(basis_data, loc_jac_inv);});
 
 1136            VoxelAssembly::coalesced_format(tg, &loc_v[0], dim);
 
 1139            for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
 
 1141              VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &loc_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
 
 1147          for(
int idx = tile4.meta_group_rank(); idx < loc_assemble_size*dim; idx += tile4.meta_group_size())
 
 1150            DataType val = DataType(0);
 
 1152            const int i = (idx/dim+assemble_offset);
 
 1153            const int ii = idx%dim;
 
 1156              for(
int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
 
 1159                val += nu * weight * (
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad) * local_prim_dofs[j*dim + ii]
 
 1160                      + 
Tiny::dot(((Tiny::Vector<DataType, dim>*)local_prim_dofs)[j], basis_data.phi[i].grad) * basis_data.phi[j].grad[ii]);
 
 1165              for(
int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
 
 1168                val += nu * weight * 
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad) * local_prim_dofs[j*dim + ii];
 
 1174              for(
int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
 
 1177                val += beta * weight * basis_data.phi[i].value * 
Tiny::dot(loc_v, basis_data.phi[j].grad) * local_prim_dofs[j*dim + ii];
 
 1183            if(CudaMath::cuda_abs(theta) > DataType(0))
 
 1188              for(
int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
 
 1191                val += theta * weight *  basis_data.phi[i].value * basis_data.phi[j].value * local_prim_dofs[j*dim + ii];
 
 1196            cuda::atomic_ref<DataType, cuda::thread_scope_block> a_ref(*(loc_vec+idx));
 
 1197            cg::reduce_update_async(tile4, a_ref, val, cg::plus<DataType>());
 
 1207      #if defined(FEAT_HAVE_CUDA) || defined(DOXYGEN) 
 1231      template<
typename Space_, 
typename DT_, 
typename IT_>
 
 1234              const DT_* conv_data,
 
 1237              const std::vector<int*>& coloring_maps,
 
 1238              const std::vector<Index>& coloring_map_sizes,
 
 1240              int shared_mem, 
int blocksize, 
int gridsize, 
bool print_occupancy);
 
 1266      template<
typename Space_, 
typename DT_, 
typename IT_>
 
 1269              const DT_* conv_data,
 
 1270              const DT_* primal_data,
 
 1273              const std::vector<int*>& coloring_maps,
 
 1274              const std::vector<Index>& coloring_map_sizes,
 
 1276              int shared_mem, 
int blocksize, 
int gridsize, 
bool print_occupancy);
 
 1291      template<
typename DT_, 
typename IT_, 
int dim_>
 
 1306      template<
typename DT_, 
typename IT_, 
int dim_>
 
 1329      template<
typename Space_, 
typename DT_, 
typename IT_>
 
 1332              const DT_* conv_data,
 
 1335              const std::vector<int*>& coloring_maps,
 
 1336              const std::vector<Index>& coloring_map_sizes,
 
 1359      template<
typename Space_, 
typename DT_, 
typename IT_>
 
 1362              const DT_* conv_data,
 
 1363              const DT_* primal_data,
 
 1366              const std::vector<int*>& coloring_maps,
 
 1367              const std::vector<Index>& coloring_map_sizes,
 
 1383      template<
typename DT_, 
typename IT_, 
int dim_>
 
 1399      template<
typename DT_, 
typename IT_, 
int dim_>
 
 1418    template<
int dim_, 
typename DT_, 
typename IT_>
 
 1426      typedef typename SpaceHelp::ShapeType ShapeType;
 
 1427      typedef typename SpaceHelp::DataType DataType;
 
 1428      typedef typename SpaceHelp::IndexType IndexType;
 
 1431      static constexpr int dim = SpaceHelp::dim;
 
 1433      typedef typename SpaceHelp::DomainPointType DomainPointType;
 
 1434      typedef typename SpaceHelp::ImagePointType ImagePointType;
 
 1435      typedef typename SpaceHelp::ValueType ValueType;
 
 1436      typedef typename SpaceHelp::JacobianMatrixType JacobianMatrixType;
 
 1493      template<
typename ColoringType_>
 
 1495      mesh_data(space, coloring, hint),
 
 1496      nu(DataType(0)), theta(DataType(0)), beta(DataType(0)), frechet_beta(DataType(0)), sd_delta(DataType(0)),
 
 1497      sd_nu(DataType(0)), sd_v_norm(DataType(0)), shared_mem(0), gridsize(1), blocksize(32), print_occupancy(false),  deformation(true)
 
 1499        #ifdef FEAT_HAVE_CUDA 
 1501        const std::size_t stack_limit = Util::cuda_get_max_cache_thread();
 
 1502        const std::size_t stack_limit_target = 
sizeof(DataType) * (dim == 3 ? 8096u : 1012u);
 
 1503        if(stack_limit < stack_limit_target)
 
 1504          Util::cuda_set_max_cache_thread(stack_limit_target);
 
 1507        int target_elements = SpaceType::DofMappingType::dof_count * (dim == 3 ? (SpaceType::DofMappingType::dof_count/2+1) : SpaceType::DofMappingType::dof_count);
 
 1508        set_kernel_launch_params(target_elements, blocksize);
 
 1545      template<
typename CubatureFactory_>
 
 1547       const SpaceType& space, 
const CubatureFactory_& cubature_factory, DataType alpha = DataType(1))
 const 
 1555        CubatureRuleType cubature(Cubature::ctor_factory, cubature_factory);
 
 1558        int num_cubs = cubature.get_num_points();
 
 1559        typename CubatureRuleType::PointType* cub_pt = cubature.get_points();
 
 1560        DataType* cub_wg = cubature.get_weights();
 
 1562        BACKEND_SKELETON_VOID(assemble_matrix1_cuda, assemble_matrix1_generic, assemble_matrix1_generic, matrix, convect, space, cub_pt, cub_wg, num_cubs, alpha)
 
 1577      template<
typename CubatureFactory_>
 
 1586        CubatureRuleType cubature(Cubature::ctor_factory, cubature_factory);
 
 1589        int num_cubs = cubature.get_num_points();
 
 1590        typename CubatureRuleType::PointType* cub_pt = cubature.get_points();
 
 1591        DataType* cub_wg = cubature.get_weights();
 
 1593        BACKEND_SKELETON_VOID(assemble_vector_cuda, assemble_vector_generic, assemble_vector_generic, vector, convect, primal, space, cub_pt, cub_wg, num_cubs, alpha)
 
 1603        BACKEND_SKELETON_VOID(set_sd_v_norm_cuda, set_sd_v_norm_generic, set_sd_v_norm_generic, convect)
 
 1613        BACKEND_SKELETON_VOID(set_sd_v_norm_cuda, set_sd_v_norm_generic, set_sd_v_norm_generic, convect)
 
 1620        const DataType* vec_data = convect.template elements<LAFEM::Perspective::pod>();
 
 1623        VoxelAssembly::AssemblyMappingData<DataType, IndexType> mapping_data = mesh_data.get_assembly_field();
 
 1624        auto burgers_params = wrap_burgers_params();
 
 1631      void assemble_vector_generic(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& vector, 
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect, 
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& primal,
 
 1632       const SpaceType& space, 
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, 
const DataType* cub_wg, 
int num_cubs, DataType alpha)
 const 
 1634        DataType* vec_data = vector.template elements<LAFEM::Perspective::pod>();
 
 1635        const DataType* conv_data = convect.template elements<LAFEM::Perspective::pod>();
 
 1636        const DataType* primal_data = primal.template elements<LAFEM::Perspective::pod>();
 
 1638        VoxelAssembly::AssemblyCubatureData<DataType> cub_data = {(
void*)cub_pt, cub_wg, num_cubs};
 
 1639        VoxelAssembly::AssemblyMappingData<DataType, IndexType> mapping_data = mesh_data.get_assembly_field();
 
 1640        auto burgers_params = wrap_burgers_params();
 
 1648      void set_sd_v_norm_generic(
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect)
 
 1654      void set_sd_v_norm_generic(
const Global::Vector<LAFEM::DenseVectorBlocked<DataType, IndexType, dim>, LAFEM::VectorMirror<DataType, IndexType>>& convect)
 
 1659      #ifdef FEAT_HAVE_CUDA 
 1660      void assemble_matrix1_cuda(LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, dim>& matrix, 
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect,
 
 1661       const SpaceType& space, 
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, 
const DataType* cub_wg, 
int num_cubs, DataType alpha)
 const 
 1663        VoxelAssembly::CSRMatrixData<DataType, IndexType> mat_data = {matrix.template val<LAFEM::Perspective::pod>(), matrix.row_ptr(), matrix.col_ind(), matrix.rows(), matrix.columns()};
 
 1664        const DataType* vec_data = convect.template elements<LAFEM::Perspective::pod>();
 
 1666        typedef typename Cubature::Rule<ShapeType, DataType, DataType>::PointType CubPointType;
 
 1668        void* cub_pt_device = Util::cuda_malloc(
Index(num_cubs) * 
sizeof(CubPointType));
 
 1669        Util::cuda_copy_host_to_device(cub_pt_device, (
void*)cub_pt, 
Index(num_cubs) * 
sizeof(CubPointType));
 
 1671        void* cub_wg_device = Util::cuda_malloc(
Index(num_cubs) * 
sizeof(DataType));
 
 1672        Util::cuda_copy_host_to_device(cub_wg_device, (
void*)cub_wg, 
Index(num_cubs) * 
sizeof(DataType));
 
 1674        VoxelAssembly::AssemblyCubatureData<DataType> d_cub_data = {cub_pt_device, (DataType*)cub_wg_device, num_cubs};
 
 1675        VoxelAssembly::AssemblyMappingData<DataType, IndexType> d_mapping_data = mesh_data.get_assembly_field();
 
 1676        auto burgers_params = wrap_burgers_params();
 
 1679                                                        d_mapping_data, mesh_data.get_coloring_maps(),
 
 1680                                                        mesh_data.get_color_map_sizes(), alpha, burgers_params,
 
 1681                                                        shared_mem, blocksize, gridsize, print_occupancy);
 
 1682        Util::cuda_free(cub_wg_device);
 
 1683        Util::cuda_free(cub_pt_device);
 
 1687      void assemble_vector_cuda(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& vector, 
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect, 
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& primal,
 
 1688       const SpaceType& space, 
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, 
const DataType* cub_wg, 
int num_cubs, DataType alpha)
 const 
 1690        DataType* vec_data = vector.template elements<LAFEM::Perspective::pod>();
 
 1691        const DataType* conv_data = convect.template elements<LAFEM::Perspective::pod>();
 
 1692        const DataType* primal_data = primal.template elements<LAFEM::Perspective::pod>();
 
 1694        typedef typename Cubature::Rule<ShapeType, DataType, DataType>::PointType CubPointType;
 
 1696        void* cub_pt_device = Util::cuda_malloc(
Index(num_cubs) * 
sizeof(CubPointType));
 
 1697        Util::cuda_copy_host_to_device(cub_pt_device, (
void*)cub_pt, 
Index(num_cubs) * 
sizeof(CubPointType));
 
 1699        void* cub_wg_device = Util::cuda_malloc(
Index(num_cubs) * 
sizeof(DataType));
 
 1700        Util::cuda_copy_host_to_device(cub_wg_device, (
void*)cub_wg, 
Index(num_cubs) * 
sizeof(DataType));
 
 1702        VoxelAssembly::AssemblyCubatureData<DataType> d_cub_data = {cub_pt_device, (DataType*)cub_wg_device, num_cubs};
 
 1703        VoxelAssembly::AssemblyMappingData<DataType, IndexType> d_mapping_data = mesh_data.get_assembly_field();
 
 1704        auto burgers_params = wrap_burgers_params();
 
 1707                                                              mesh_data.get_coloring_maps(), mesh_data.get_color_map_sizes(), alpha, burgers_params,
 
 1708                                                              shared_mem, 32, gridsize, print_occupancy);
 
 1709        Util::cuda_free(cub_wg_device);
 
 1710        Util::cuda_free(cub_pt_device);
 
 1713      void set_sd_v_norm_cuda(
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect)
 
 1719      void set_sd_v_norm_cuda(
const Global::Vector<LAFEM::DenseVectorBlocked<DataType, IndexType, dim>, LAFEM::VectorMirror<DataType, IndexType>>& convect)
 
 1725      void set_kernel_launch_params(
int target_elements, 
int blocksize_)
 
 1727        const int max_shared_mem = int(Util::cuda_get_shared_mem_per_sm());
 
 1728        const int max_blocks_per_sm = int(Util::cuda_get_max_blocks_per_sm());
 
 1729        const int max_sm_per_device = int(Util::cuda_get_sm_count());
 
 1736        constexpr int element_size = dim*dim*int(
sizeof(DataType));
 
 1738        constexpr int shared_size_nec = 
sizeof(VoxelAssembly::BurgersSharedDataGlobalWrapper<VoxelAssembly::SpaceHelper<SpaceType, DataType, IndexType>>) + 
sizeof(VoxelAssembly::BurgersSharedDataKernelWrapper<VoxelAssembly::SpaceHelper<SpaceType, DataType, IndexType>>);
 
 1741        shared_mem = 
Math::max(shared_mem, shared_size_nec + target_elements * element_size);
 
 1743        XASSERTM(shared_mem < max_shared_mem, 
"Targeted shared memory is " + 
stringify(shared_mem) + 
"B but Hardware only supports " + 
stringify(max_shared_mem) + 
" shared memory.");
 
 1744        blocksize = blocksize_;
 
 1745        const int max_color_sizes = int(mesh_data.get_max_color_size());
 
 1749        gridsize = std::min(
int(max_color_sizes), 2*max_sm_per_device*(max_blocks_per_sm/(
int(blocksize)/32)));
 
 1754      void assemble_matrix1_cuda(LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, dim>& DOXY(matrix), 
const SpaceType& DOXY(space), 
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* DOXY(cub_pt), 
const DataType* DOXY(cub_wg), 
int DOXY(num_cubs), DataType DOXY(alpha))
 const 
 1756        XABORTM(
"What in the nine hells are you doing here?");
 
 1759      void assemble_vector_cuda(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&, 
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&, 
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&,
 
 1760       const SpaceType&, 
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType*, 
const DataType*, 
int, DataType)
 const 
 1762        XABORTM(
"This call was logged and your local FBI agent was informed about your transgressions");
 
 1765      void set_sd_v_norm_cuda(
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&)
 
 1767        XABORTM(
"Should not have called that....");
 
 1771      void set_sd_v_norm_cuda(
const Global::Vector<LAFEM::DenseVectorBlocked<DataType, IndexType, dim>, LAFEM::VectorMirror<DataType, IndexType>>&)
 
 1773        XABORTM(
"Should not have called that....");
 
#define XABORTM(msg)
Abortion macro definition with custom message.
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Cubature Rule class template.
Global vector wrapper class template.
Blocked Dense data vector class template.
Index size() const
The number of elements.
CSR based blocked sparse matrix.
Index rows() const
Retrieve matrix row count.
IT_ * col_ind()
Retrieve column indices array.
IT_ * row_ptr()
Retrieve row start index array.
Index columns() const
Retrieve matrix column count.
Handles vector prolongation, restriction and serialization.
Standard Lagrange-2 Finite-Element space class template.
Index get_num_dofs() const
Returns the number of dofs.
Tiny Matrix class template.
Tiny Vector class template.
Data handler for Lagrange based FE spaces.
VoxelAssembly::Q2StandardHyperCube< dim_ > SpaceType
typedefs
void set_sd_v_norm(const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &convect)
Sets the internal maximum streamline diffusion velocity norm.
void assemble_matrix1(LAFEM::SparseMatrixBCSR< DataType, IndexType, dim, dim > &matrix, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &convect, const SpaceType &space, const CubatureFactory_ &cubature_factory, DataType alpha=DataType(1)) const
Assembles the burgers operator for finitie element space with same test and trial space.
void assemble_vector(LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &vector, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &convect, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &primal, const SpaceType &space, const CubatureFactory_ &cubature_factory, DataType alpha=DataType(1)) const
Assembles the application of the burgers operator to a given primal vector.
DataType sd_v_norm
The current maximum velocity norm.
DataType sd_delta
Streamline diffusion parameter.
DataHandler mesh_data
The meshdata handler.
DataType sd_nu
Streamline diffosuion viscosity. Generally equal to nu.
bool print_occupancy
print out occupancy
int blocksize
the block size to be used
VoxelBurgersAssembler(const SpaceType &space, const ColoringType_ &coloring, int hint=-1)
Constructor for burgers assembler.
int gridsize
the grid size to be used
int shared_mem
how much shared memory should the kernel use
DataType theta
Reaction scaling.
void set_sd_v_norm(const Global::Vector< LAFEM::DenseVectorBlocked< DataType, IndexType, dim >, LAFEM::VectorMirror< DataType, IndexType > > &convect)
Sets the internal maximum streamline diffusion velocity norm.
bool deformation
Should we use full deformation formulation?
VoxelAssembly::AssemblyBurgersData< DataType > wrap_burgers_params() const
Wraps the set burgers parameter into convenient struct.
DataType beta
Convection scaling.
DataType frechet_beta
Scaling parameter for full jacobian convection part.
DataType nu
The viscosity.
Burgers Voxel Assembly template.
T_ abs(T_ x)
Returns the absolute value.
T_ max(T_ a, T_ b)
Returns the maximum of two values.
std::size_t element_size(const Pack::Type type)
Returns the size of a Pack::Type element in bytes.
CUDA_HOST_DEVICE T_ dot(const T_ &a, const T_ &b)
Computes the dot-product of two scalars.
void assemble_burgers_defect_host(const Space_ &space, DT_ *vector_data, const DT_ *conv_data, const DT_ *primal_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params)
Host kernel wrapper for the defect burgers assembler.
void assemble_burgers_csr_host(const Space_ &space, const CSRMatrixData< DT_, IT_ > &matrix_data, const DT_ *conv_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params)
Host kernel wrapper for the full matrix burgers assembler.
DT_ get_sd_v_norm_host(const LAFEM::DenseVectorBlocked< DT_, IT_, dim_ > &convect)
Host kernel wrapper for the local sd_v_norm calculation.
void assemble_burgers_defect(const Space_ &space, DT_ *vector_data, const DT_ *conv_data, const DT_ *primal_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, int shared_mem, int blocksize, int gridsize, bool print_occupancy)
Device kernel wrapper for the defect burgers assembler.
DT_ get_sd_v_norm(const LAFEM::DenseVectorBlocked< DT_, IT_, dim_ > &convect)
Device kernel wrapper for the local sd_v_norm calculation.
void assemble_burgers_csr(const Space_ &space, const CSRMatrixData< DT_, IT_ > &matrix_data, const DT_ *conv_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, int shared_mem, int blocksize, int gridsize, bool print_occupancy)
Device kernel wrapper for the full matrix burgers assembler.
CUDA_DEVICE __forceinline__ void grouped_burgers_mat_assembly_kernel(const ThreadGroup_ &tg, BurgersSharedDataKernelWrapper< SpaceHelp_ > *shared_wrapper, int loc_assemble_size, int assemble_offset, typename SpaceHelp_::DataType *loc_mat, const typename SpaceHelp_::DataType *local_conv_dofs, const Tiny::Matrix< typename SpaceHelp_::DataType, SpaceHelp_::dim, SpaceHelp_::num_verts > &local_coeffs, const typename SpaceHelp_::DomainPointType *cub_pt, const typename SpaceHelp_::DataType *cub_wg, const int num_cubs, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const bool need_streamline, const bool need_convection, const typename SpaceHelp_::DataType tol_eps)
Burgers Matrix assembly kernel.
CUDA_HOST_DEVICE void burgers_defect_assembly_kernel(LocVecType_ &loc_vec, const LocVecType_ &local_prim_dofs, const LocVecType_ &local_conv_dofs, const Tiny::Matrix< typename SpaceHelp_::DataType, dim_, num_verts_ > &local_coeffs, const typename SpaceHelp_::DomainPointType *cub_pt, const typename SpaceHelp_::DataType *cub_wg, const int num_cubs, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const bool need_convection)
Burgers Vector assembly kernel.
CUDA_DEVICE __forceinline__ void grouped_burgers_mat_alt_assembly_kernel(const ThreadGroup_ &tg, BurgersSharedDataKernelWrapper< SpaceHelp_ > *shared_wrapper, int loc_assemble_size, int assemble_offset, typename SpaceHelp_::DataType *loc_mat, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const bool need_streamline, const bool need_convection, const typename SpaceHelp_::DataType tol_eps)
appears to not work better... compiler optimizes this sufficiently?
CUDA_HOST_DEVICE void burgers_mat_assembly_kernel(LocMatType_ &loc_mat, const LocVecType_ &local_conv_dofs, const Tiny::Matrix< typename SpaceHelp_::DataType, dim_, num_verts_ > &local_coeffs, const typename SpaceHelp_::DomainPointType *cub_pt, const typename SpaceHelp_::DataType *cub_wg, const int num_cubs, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const bool need_streamline, const bool need_convection, const typename SpaceHelp_::DataType tol_eps)
Burgers Matrix assembly kernel.
String stringify(const T_ &item)
Converts an item into a String.
@ value
specifies whether the space should supply basis function values
std::uint64_t Index
Index data type.
@ dom_point
specifies whether the trafo should supply domain point coordinates
Data for burgers assembler.
A data field for a cubature rule.
A data field for all necessary values that define the dof mapping for assembly.
Helper struct wrapping the data required as shared data.