9#include <kernel/backend.hpp>
11#include <kernel/voxel_assembly/voxel_assembly_common.hpp>
12#include <kernel/voxel_assembly/helper/data_handler.hpp>
13#include <kernel/voxel_assembly/helper/space_helper.hpp>
14#include <kernel/cubature/rule.hpp>
15#include <kernel/lafem/sparse_matrix_bcsr.hpp>
16#include <kernel/lafem/dense_vector_blocked.hpp>
17#include <kernel/trafo/standard/mapping.hpp>
18#include <kernel/geometry/conformal_mesh.hpp>
19#include <kernel/util/tiny_algebra.hpp>
20#include <kernel/global/vector.hpp>
21#include <kernel/lafem/vector_mirror.hpp>
24#include <kernel/util/cuda_util.hpp>
28#include <cooperative_groups.h>
29#include <cooperative_groups/reduce.h>
30namespace cg = cooperative_groups;
35 namespace VoxelAssembly
46 template<
typename Space_,
typename DT_,
typename IT_>
49 #if defined(FEAT_HAVE_CUDA) || defined(DOXYGEN)
51 template<
typename SpaceHelp_>
54 typedef SpaceHelp_ SpaceHelp;
55 static constexpr int dim = SpaceHelp::dim;
56 typedef typename SpaceHelp::SpaceType SpaceType;
57 typedef typename SpaceHelp::DataType DataType;
59 static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
64 typename SpaceHelp::EvalData basis_data;
65 typename SpaceHelp::JacobianMatrixType loc_jac;
66 typename SpaceHelp::JacobianMatrixType loc_jac_inv;
70 typename SpaceHelp::DomainPointType dom_point;
104 template<
typename SpaceHelp_,
typename LocMatType_,
typename LocVecType_,
int dim_,
int num_verts_>
106 const typename SpaceHelp_::DomainPointType* cub_pt,
const typename SpaceHelp_::DataType* cub_wg,
const int num_cubs,
108 const bool need_streamline,
const bool need_convection,
const typename SpaceHelp_::DataType tol_eps)
110 typedef SpaceHelp_ SpaceHelp;
111 constexpr int dim = SpaceHelp::dim;
112 typedef typename SpaceHelp::SpaceType SpaceType;
113 typedef typename SpaceHelp::DataType DataType;
115 const DataType& nu{burgers_params.nu};
116 const DataType& theta{burgers_params.theta};
117 const DataType& beta{burgers_params.beta};
118 const DataType& frechet_beta{burgers_params.frechet_beta};
119 const DataType& sd_delta{burgers_params.sd_delta};
120 const DataType& sd_nu{burgers_params.sd_nu};
121 const DataType& sd_v_norm{burgers_params.sd_v_norm};
122 const bool& deformation{burgers_params.deformation};
140 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
144 typename SpaceHelp::JacobianMatrixType loc_jac, loc_jac_inv;
145 typename SpaceHelp::EvalData basis_data;
151 VecValueType loc_v(DataType(0)), mean_v(DataType(0));
152 MatValueType loc_grad_v(DataType(0));
153 DataType local_delta(DataType(0));
162 const VecValueType barycenter(DataType(0));
165 SpaceHelp::eval_ref_values(basis_data, barycenter);
166 SpaceHelp::trans_values(basis_data);
167 for(
int i(0); i < num_loc_dofs; ++i)
169 mean_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
171 const DataType local_norm_v = mean_v.norm_euclid();
173 if(local_norm_v > tol_eps)
175 const DataType local_h = SpaceHelp::width_directed(mean_v, local_coeffs) * local_norm_v;
176 const DataType local_re = (local_norm_v * local_h) / sd_nu;
177 local_delta = sd_delta * (local_h / sd_v_norm) * (DataType(2) * local_re) / (DataType(1) + local_re);
181 for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
183 const typename SpaceHelp::DomainPointType&
dom_point = cub_pt[cub_ind];
184 SpaceHelp::eval_ref_values(basis_data,
dom_point);
185 SpaceHelp::trans_values(basis_data);
187 SpaceHelp::calc_jac_mat(loc_jac,
dom_point, local_coeffs);
188 loc_jac_inv.set_inverse(loc_jac);
190 SpaceHelp::eval_ref_gradients(basis_data,
dom_point);
191 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);
193 const DataType weight = loc_jac.det() * cub_wg[cub_ind];
194 if(need_convection || need_streamline)
197 for(
int i = 0; i < num_loc_dofs; ++i)
199 loc_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
203 if(CudaMath::cuda_abs(frechet_beta) > DataType(0))
205 if(
Math::abs(frechet_beta) > DataType(0))
209 for(
int i = 0; i < num_loc_dofs; ++i)
211 loc_grad_v.add_outer_product(local_conv_dofs[i], basis_data.phi[i].grad);
217 for(
int i = 0; i < num_loc_dofs; ++i)
219 streamdiff_coeffs[i] =
Tiny::dot(loc_v, basis_data.phi[i].grad);
225 for(
int i(0); i < num_loc_dofs; ++i)
228 for(
int j(0); j < num_loc_dofs; ++j)
231 const DataType
value = nu * weight *
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
234 loc_mat[i][j].add_scalar_main_diag(
value);
235 loc_mat[i][j].add_outer_product(basis_data.phi[j].grad, basis_data.phi[i].grad, nu * weight);
241 for(
int i(0); i < num_loc_dofs; ++i)
244 for(
int j(0); j < num_loc_dofs; ++j)
247 const DataType
value = nu * weight *
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
250 loc_mat[i][j].add_scalar_main_diag(
value);
258 for(
int i(0); i < num_loc_dofs; ++i)
261 for(
int j(0); j < num_loc_dofs; ++j)
264 const DataType
value = beta * weight * basis_data.phi[i].value *
Tiny::dot(loc_v, basis_data.phi[j].grad);
267 loc_mat[i][j].add_scalar_main_diag(
value);
274 if(CudaMath::cuda_abs(frechet_beta) > DataType(0))
276 if(
Math::abs(frechet_beta) > DataType(0))
280 for(
int i(0); i < num_loc_dofs; ++i)
283 for(
int j(0); j < num_loc_dofs; ++j)
286 const DataType
value = frechet_beta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
289 loc_mat[i][j].axpy(
value, loc_grad_v);
296 if(CudaMath::cuda_abs(theta) > DataType(0))
302 for(
int i(0); i < num_loc_dofs; ++i)
305 for(
int j(0); j < num_loc_dofs; ++j)
308 const DataType
value = theta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
311 loc_mat[i][j].add_scalar_main_diag(
value);
317 if((need_streamline) && (local_delta > tol_eps))
320 for(
int i(0); i < num_loc_dofs; ++i)
323 for(
int j(0); j < num_loc_dofs; ++j)
326 const DataType
value = local_delta * weight * streamdiff_coeffs[i] * streamdiff_coeffs[j];
329 loc_mat[i][j].add_scalar_main_diag(
value);
337 #if defined(__CUDACC__) || defined(DOXYGEN)
340 template<
typename ThreadGroup_,
typename SpaceHelp_>
342 int loc_assemble_size,
int assemble_offset,
typename SpaceHelp_::DataType* loc_mat,
const typename SpaceHelp_::DataType* local_conv_dofs,
344 const typename SpaceHelp_::DomainPointType* cub_pt,
const typename SpaceHelp_::DataType* cub_wg,
const int cub_ind,
346 const bool need_streamline,
const bool need_convection,
const typename SpaceHelp_::DataType tol_eps)
348 typedef SpaceHelp_ SpaceHelp;
349 constexpr int dim = SpaceHelp::dim;
350 typedef typename SpaceHelp::SpaceType SpaceType;
351 typedef typename SpaceHelp::DataType DataType;
353 const int t_idx = tg.thread_rank();
354 const int g_size = tg.num_threads();
356 const DataType& nu{burgers_params.nu};
357 const DataType& theta{burgers_params.theta};
358 const DataType& beta{burgers_params.beta};
359 const DataType& frechet_beta{burgers_params.frechet_beta};
360 const DataType& sd_delta{burgers_params.sd_delta};
361 const DataType& sd_nu{burgers_params.sd_nu};
362 const DataType& sd_v_norm{burgers_params.sd_v_norm};
363 const bool& deformation{burgers_params.deformation};
381 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
383 typedef Tiny::Vector<DataType, dim> VecValueType;
384 typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
390 typedef BurgersSharedDataKernelWrapper<SpaceHelp> BSDKWrapper;
391 typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
392 typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
393 MatValueType& loc_grad_v = shared_wrapper->loc_grad_v;
394 VecValueType& loc_v = shared_wrapper->loc_v;
395 VecValueType& mean_v = shared_wrapper->mean_v;
396 typename SpaceHelp::DomainPointType&
dom_point = shared_wrapper->dom_point;
397 Tiny::Vector<DataType, num_loc_dofs>& streamdiff_coeffs = shared_wrapper->streamdiff_coeffs;
398 DataType& local_delta = shared_wrapper->local_delta;
399 DataType& det = shared_wrapper->det;
400 DataType& weight = shared_wrapper->weight;
401 typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
402 bool& need_frechet = shared_wrapper->need_frechet;
404 VoxelAssembly::coalesced_format(tg, (
unsigned int*) shared_wrapper,
sizeof(BSDKWrapper)/(
sizeof(
unsigned int)));
407 cg::invoke_one(tg, [&](){need_frechet = CudaMath::cuda_abs(frechet_beta) > DataType(0);});
413 cg::invoke_one(tg, [&]() {mean_v = DataType(0);});
417 cg::invoke_one(tg, [&](){
const VecValueType bc(0);
418 SpaceHelp::eval_ref_values(basis_data, bc);
419 SpaceHelp::trans_values(basis_data);});
423 for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
425 VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &mean_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
430 DataType local_norm_v = mean_v.norm_euclid();
432 if(local_norm_v > tol_eps)
434 cg::invoke_one(tg, [&](){
435 const DataType local_h = SpaceHelp::width_directed(mean_v, local_coeffs) * local_norm_v;
436 const DataType local_re = (local_norm_v * local_h) / sd_nu;
437 local_delta = sd_delta * (local_h / sd_v_norm) * (DataType(2) * local_re) / (DataType(1) + local_re);
444 cg::invoke_one(tg, [&](){
dom_point = cub_pt[cub_ind];
445 SpaceHelp::eval_ref_values(basis_data,
dom_point);
446 SpaceHelp::trans_values(basis_data);});
449 SpaceHelp::grouped_calc_jac_mat(tg, loc_jac,
dom_point, &local_coeffs[0][0]);
451 cg::invoke_one(tg, [&](){det = loc_jac.det();
452 weight = det * cub_wg[cub_ind];});
454 loc_jac_inv.grouped_set_inverse(tg, loc_jac, det);
456 cg::invoke_one(tg, [&](){
457 SpaceHelp::eval_ref_gradients(basis_data,
dom_point);
458 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);});
461 if(need_convection || need_streamline)
463 VoxelAssembly::coalesced_format(tg, &loc_v[0], dim);
466 for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
468 VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &loc_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
475 VoxelAssembly::coalesced_format(tg, &loc_grad_v[0][0], dim*dim);
477 for(
int i = t_idx; i < num_loc_dofs; i += g_size)
479 VoxelAssembly::grouped_add_outer_product(tg, &loc_grad_v[0][0], &local_conv_dofs[i*dim], basis_data.phi[i].grad);
487 for(
int i = t_idx; i < num_loc_dofs; i += g_size)
489 streamdiff_coeffs[i] =
Tiny::dot(loc_v, basis_data.phi[i].grad);
499 template<
typename ThreadGroup_,
typename SpaceHelp_>
501 int loc_assemble_size,
int assemble_offset,
typename SpaceHelp_::DataType* loc_mat,
503 const bool need_streamline,
const bool need_convection,
const typename SpaceHelp_::DataType tol_eps)
505 typedef SpaceHelp_ SpaceHelp;
506 constexpr int dim = SpaceHelp::dim;
507 typedef typename SpaceHelp::SpaceType SpaceType;
508 typedef typename SpaceHelp::DataType DataType;
510 const int t_idx = tg.thread_rank();
511 const int g_size = tg.num_threads();
513 const DataType& nu{burgers_params.nu};
514 const DataType& theta{burgers_params.theta};
515 const DataType& beta{burgers_params.beta};
516 const DataType& frechet_beta{burgers_params.frechet_beta};
517 const DataType& sd_delta{burgers_params.sd_delta};
518 const DataType& sd_nu{burgers_params.sd_nu};
519 const DataType& sd_v_norm{burgers_params.sd_v_norm};
520 const bool& deformation{burgers_params.deformation};
538 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
548 typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
549 typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
550 MatValueType& loc_grad_v = shared_wrapper->loc_grad_v;
551 VecValueType& loc_v = shared_wrapper->loc_v;
552 VecValueType& mean_v = shared_wrapper->mean_v;
553 typename SpaceHelp::DomainPointType&
dom_point = shared_wrapper->dom_point;
555 DataType& local_delta = shared_wrapper->local_delta;
556 DataType& det = shared_wrapper->det;
557 DataType& weight = shared_wrapper->weight;
558 typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
559 bool& need_frechet = shared_wrapper->need_frechet;
562 for(
int idx = t_idx; idx < loc_assemble_size; idx += g_size)
564 const int i = (idx+assemble_offset) / num_loc_dofs;
565 const int j = (idx+assemble_offset) % num_loc_dofs;
569 const DataType
value = nu * weight *
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
577 const DataType
value = nu * weight *
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
586 const DataType
value = beta * weight * basis_data.phi[i].value * VoxelAssembly::dot(basis_data.phi[j].grad, &loc_v[0]);
596 const DataType
value = frechet_beta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
604 if(CudaMath::cuda_abs(theta) > DataType(0))
610 const DataType
value = theta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
617 if((need_streamline) && (local_delta > tol_eps))
620 const DataType
value = local_delta * weight * streamdiff_coeffs[i] * streamdiff_coeffs[j];
651 template<
typename ThreadGroup_,
typename SpaceHelp_>
654 const typename SpaceHelp_::DomainPointType* cub_pt,
const typename SpaceHelp_::DataType* cub_wg,
const int num_cubs,
656 const bool need_streamline,
const bool need_convection,
const typename SpaceHelp_::DataType tol_eps)
658 typedef SpaceHelp_ SpaceHelp;
659 constexpr int dim = SpaceHelp::dim;
660 typedef typename SpaceHelp::SpaceType SpaceType;
661 typedef typename SpaceHelp::DataType DataType;
663 const int t_idx = tg.thread_rank();
664 const int g_size = tg.num_threads();
666 const DataType& nu{burgers_params.nu};
667 const DataType& theta{burgers_params.theta};
668 const DataType& beta{burgers_params.beta};
669 const DataType& frechet_beta{burgers_params.frechet_beta};
670 const DataType& sd_delta{burgers_params.sd_delta};
671 const DataType& sd_nu{burgers_params.sd_nu};
672 const DataType& sd_v_norm{burgers_params.sd_v_norm};
673 const bool& deformation{burgers_params.deformation};
691 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
700 typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
701 typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
702 MatValueType& loc_grad_v = shared_wrapper->loc_grad_v;
703 VecValueType& loc_v = shared_wrapper->loc_v;
704 VecValueType& mean_v = shared_wrapper->mean_v;
705 typename SpaceHelp::DomainPointType&
dom_point = shared_wrapper->dom_point;
707 DataType& local_delta = shared_wrapper->local_delta;
708 DataType& det = shared_wrapper->det;
709 DataType& weight = shared_wrapper->weight;
710 typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
711 bool& need_frechet = shared_wrapper->need_frechet;
713 VoxelAssembly::coalesced_format(tg, (
unsigned int*) shared_wrapper,
sizeof(BSDKWrapper)/(
sizeof(
unsigned int)));
716 cg::invoke_one(tg, [&](){need_frechet = CudaMath::cuda_abs(frechet_beta) > DataType(0);});
722 cg::invoke_one(tg, [&]() {mean_v = DataType(0);});
726 cg::invoke_one(tg, [&](){
const VecValueType bc(0);
727 SpaceHelp::eval_ref_values(basis_data, bc);
728 SpaceHelp::trans_values(basis_data);});
732 for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
734 VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &mean_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
739 DataType local_norm_v = mean_v.norm_euclid();
741 if(local_norm_v > tol_eps)
743 cg::invoke_one(tg, [&](){
744 const DataType local_h = SpaceHelp::width_directed(mean_v, local_coeffs) * local_norm_v;
745 const DataType local_re = (local_norm_v * local_h) / sd_nu;
746 local_delta = sd_delta * (local_h / sd_v_norm) * (DataType(2) * local_re) / (DataType(1) + local_re);
754 for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
756 cg::invoke_one(tg, [&](){
dom_point = cub_pt[cub_ind];
757 SpaceHelp::eval_ref_values(basis_data,
dom_point);
758 SpaceHelp::trans_values(basis_data);});
761 SpaceHelp::grouped_calc_jac_mat(tg, loc_jac,
dom_point, &local_coeffs[0][0]);
763 cg::invoke_one(tg, [&](){det = loc_jac.det();
764 weight = det * cub_wg[cub_ind];});
766 loc_jac_inv.grouped_set_inverse(tg, loc_jac, det);
768 cg::invoke_one(tg, [&](){
769 SpaceHelp::eval_ref_gradients(basis_data,
dom_point);
770 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);});
773 if(need_convection || need_streamline)
775 VoxelAssembly::coalesced_format(tg, &loc_v[0], dim);
778 for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
780 VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &loc_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
787 VoxelAssembly::coalesced_format(tg, &loc_grad_v[0][0], dim*dim);
789 for(
int i = t_idx; i < num_loc_dofs; i += g_size)
791 VoxelAssembly::grouped_add_outer_product(tg, &loc_grad_v[0][0], &local_conv_dofs[i*dim], basis_data.phi[i].grad);
799 for(
int i = t_idx; i < num_loc_dofs; i += g_size)
801 streamdiff_coeffs[i] =
Tiny::dot(loc_v, basis_data.phi[i].grad);
808 for(
int idx = t_idx; idx < loc_assemble_size; idx += g_size)
811 const int i = (idx+assemble_offset) / num_loc_dofs;
812 const int j = (idx+assemble_offset) % num_loc_dofs;
817 const DataType
value = nu * weight *
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
825 const DataType
value = nu * weight *
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
834 const DataType
value = beta * weight * basis_data.phi[i].value * VoxelAssembly::dot(basis_data.phi[j].grad, &loc_v[0]);
844 const DataType
value = frechet_beta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
852 if(CudaMath::cuda_abs(theta) > DataType(0))
858 const DataType
value = theta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
865 if((need_streamline) && (local_delta > tol_eps))
868 const DataType
value = local_delta * weight * streamdiff_coeffs[i] * streamdiff_coeffs[j];
900 template<
typename SpaceHelp_,
typename LocVecType_,
int dim_,
int num_verts_>
902 const typename SpaceHelp_::DomainPointType* cub_pt,
const typename SpaceHelp_::DataType* cub_wg,
const int num_cubs,
904 const bool need_convection)
906 typedef SpaceHelp_ SpaceHelp;
907 constexpr int dim = SpaceHelp::dim;
908 typedef typename SpaceHelp::SpaceType SpaceType;
909 typedef typename SpaceHelp::DataType DataType;
911 const DataType& nu{burgers_params.nu};
912 const DataType& theta{burgers_params.theta};
913 const DataType& beta{burgers_params.beta};
918 const bool& deformation{burgers_params.deformation};
936 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
940 typename SpaceHelp::JacobianMatrixType loc_jac, loc_jac_inv;
941 typename SpaceHelp::EvalData basis_data;
947 VecValueType loc_v(DataType(0));
953 for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
955 const typename SpaceHelp::DomainPointType&
dom_point = cub_pt[cub_ind];
956 SpaceHelp::eval_ref_values(basis_data,
dom_point);
957 SpaceHelp::trans_values(basis_data);
959 SpaceHelp::calc_jac_mat(loc_jac,
dom_point, local_coeffs);
960 loc_jac_inv.set_inverse(loc_jac);
962 SpaceHelp::eval_ref_gradients(basis_data,
dom_point);
963 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);
965 const DataType weight = loc_jac.det() * cub_wg[cub_ind];
969 for(
int i = 0; i < num_loc_dofs; ++i)
971 loc_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
977 for(
int i(0); i < num_loc_dofs; ++i)
980 for(
int j(0); j < num_loc_dofs; ++j)
983 const DataType value1 = nu * weight *
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
984 const DataType value2 = nu * weight *
Tiny::dot(local_prim_dofs[j], basis_data.phi[i].grad);
986 loc_vec[i].axpy(value1, local_prim_dofs[j]);
987 loc_vec[i].axpy(value2, basis_data.phi[j].grad);
993 for(
int i(0); i < num_loc_dofs; ++i)
996 for(
int j(0); j < num_loc_dofs; ++j)
999 const DataType
value = nu * weight *
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
1002 loc_vec[i].axpy(
value, local_prim_dofs[j]);
1010 for(
int i(0); i < num_loc_dofs; ++i)
1013 for(
int j(0); j < num_loc_dofs; ++j)
1016 const DataType
value = beta * weight * basis_data.phi[i].value *
Tiny::dot(loc_v, basis_data.phi[j].grad);
1019 loc_vec[i].axpy(
value, local_prim_dofs[j]);
1026 if(CudaMath::cuda_abs(theta) > DataType(0))
1032 for(
int i(0); i < num_loc_dofs; ++i)
1035 for(
int j(0); j < num_loc_dofs; ++j)
1038 const DataType
value = theta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
1041 loc_vec[i].axpy(
value, local_prim_dofs[j]);
1050 template<
typename ThreadGroup_,
typename SpaceHelp_>
1052 typename SpaceHelp_::DataType* loc_vec,
const typename SpaceHelp_::DataType* local_prim_dofs,
const typename SpaceHelp_::DataType* local_conv_dofs,
1054 const typename SpaceHelp_::DomainPointType* cub_pt,
const typename SpaceHelp_::DataType* cub_wg,
const int num_cubs,
1056 const bool need_convection)
1058 typedef SpaceHelp_ SpaceHelp;
1059 constexpr int dim = SpaceHelp::dim;
1060 typedef typename SpaceHelp::SpaceType SpaceType;
1061 typedef typename SpaceHelp::DataType DataType;
1062 const int t_idx = tg.thread_rank();
1063 const int g_size = tg.num_threads();
1065 const DataType& nu{burgers_params.nu};
1066 const DataType& theta{burgers_params.theta};
1067 const DataType& beta{burgers_params.beta};
1072 const bool& deformation{burgers_params.deformation};
1090 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
1094 typedef BurgersSharedDataKernelWrapper<SpaceHelp> BSDKWrapper;
1095 typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
1096 typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
1098 DataType& det = shared_wrapper->det;
1099 DataType& weight = shared_wrapper->weight;
1100 typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
1102 typedef Tiny::Vector<DataType, dim> VecValueType;
1105 VecValueType& loc_v = shared_wrapper->loc_v;
1107 typename SpaceHelp::DomainPointType&
dom_point = shared_wrapper->dom_point;
1109 VoxelAssembly::coalesced_format(tg, (
unsigned int*) shared_wrapper,
sizeof(BSDKWrapper)/(
sizeof(
unsigned int)));
1112 cg::thread_block_tile<4> tile4 = cg::tiled_partition<4>(tg);
1114 for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
1117 cg::invoke_one(tg, [&](){
dom_point = cub_pt[cub_ind];
1118 SpaceHelp::eval_ref_values(basis_data,
dom_point);
1119 SpaceHelp::trans_values(basis_data);});
1122 SpaceHelp::grouped_calc_jac_mat(tg, loc_jac,
dom_point, &local_coeffs[0][0]);
1124 cg::invoke_one(tg, [&](){det = loc_jac.det();
1125 weight = det * cub_wg[cub_ind];});
1127 loc_jac_inv.grouped_set_inverse(tg, loc_jac, det);
1129 cg::invoke_one(tg, [&](){
1130 SpaceHelp::eval_ref_gradients(basis_data,
dom_point);
1131 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);});
1136 VoxelAssembly::coalesced_format(tg, &loc_v[0], dim);
1139 for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
1141 VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &loc_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
1147 for(
int idx = tile4.meta_group_rank(); idx < loc_assemble_size*dim; idx += tile4.meta_group_size())
1150 DataType val = DataType(0);
1152 const int i = (idx/dim+assemble_offset);
1153 const int ii = idx%dim;
1156 for(
int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
1159 val += nu * weight * (
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad) * local_prim_dofs[j*dim + ii]
1160 +
Tiny::dot(((Tiny::Vector<DataType, dim>*)local_prim_dofs)[j], basis_data.phi[i].grad) * basis_data.phi[j].grad[ii]);
1165 for(
int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
1168 val += nu * weight *
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad) * local_prim_dofs[j*dim + ii];
1174 for(
int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
1177 val += beta * weight * basis_data.phi[i].value *
Tiny::dot(loc_v, basis_data.phi[j].grad) * local_prim_dofs[j*dim + ii];
1183 if(CudaMath::cuda_abs(theta) > DataType(0))
1188 for(
int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
1191 val += theta * weight * basis_data.phi[i].value * basis_data.phi[j].value * local_prim_dofs[j*dim + ii];
1196 cuda::atomic_ref<DataType, cuda::thread_scope_block> a_ref(*(loc_vec+idx));
1197 cg::reduce_update_async(tile4, a_ref, val, cg::plus<DataType>());
1207 #if defined(FEAT_HAVE_CUDA) || defined(DOXYGEN)
1231 template<
typename Space_,
typename DT_,
typename IT_>
1234 const DT_* conv_data,
1237 const std::vector<int*>& coloring_maps,
1238 const std::vector<Index>& coloring_map_sizes,
1240 int shared_mem,
int blocksize,
int gridsize,
bool print_occupancy);
1266 template<
typename Space_,
typename DT_,
typename IT_>
1269 const DT_* conv_data,
1270 const DT_* primal_data,
1273 const std::vector<int*>& coloring_maps,
1274 const std::vector<Index>& coloring_map_sizes,
1276 int shared_mem,
int blocksize,
int gridsize,
bool print_occupancy);
1291 template<
typename DT_,
typename IT_,
int dim_>
1306 template<
typename DT_,
typename IT_,
int dim_>
1329 template<
typename Space_,
typename DT_,
typename IT_>
1332 const DT_* conv_data,
1335 const std::vector<int*>& coloring_maps,
1336 const std::vector<Index>& coloring_map_sizes,
1359 template<
typename Space_,
typename DT_,
typename IT_>
1362 const DT_* conv_data,
1363 const DT_* primal_data,
1366 const std::vector<int*>& coloring_maps,
1367 const std::vector<Index>& coloring_map_sizes,
1383 template<
typename DT_,
typename IT_,
int dim_>
1399 template<
typename DT_,
typename IT_,
int dim_>
1418 template<
int dim_,
typename DT_,
typename IT_>
1426 typedef typename SpaceHelp::ShapeType ShapeType;
1427 typedef typename SpaceHelp::DataType DataType;
1428 typedef typename SpaceHelp::IndexType IndexType;
1431 static constexpr int dim = SpaceHelp::dim;
1433 typedef typename SpaceHelp::DomainPointType DomainPointType;
1434 typedef typename SpaceHelp::ImagePointType ImagePointType;
1435 typedef typename SpaceHelp::ValueType ValueType;
1436 typedef typename SpaceHelp::JacobianMatrixType JacobianMatrixType;
1493 template<
typename ColoringType_>
1495 mesh_data(space, coloring, hint),
1496 nu(DataType(0)), theta(DataType(0)), beta(DataType(0)), frechet_beta(DataType(0)), sd_delta(DataType(0)),
1497 sd_nu(DataType(0)), sd_v_norm(DataType(0)), shared_mem(0), gridsize(1), blocksize(32), print_occupancy(false), deformation(true)
1499 #ifdef FEAT_HAVE_CUDA
1501 const std::size_t stack_limit = Util::cuda_get_max_cache_thread();
1502 const std::size_t stack_limit_target =
sizeof(DataType) * (dim == 3 ? 8096u : 1012u);
1503 if(stack_limit < stack_limit_target)
1504 Util::cuda_set_max_cache_thread(stack_limit_target);
1507 int target_elements = SpaceType::DofMappingType::dof_count * (dim == 3 ? (SpaceType::DofMappingType::dof_count/2+1) : SpaceType::DofMappingType::dof_count);
1508 set_kernel_launch_params(target_elements, blocksize);
1545 template<
typename CubatureFactory_>
1547 const SpaceType& space,
const CubatureFactory_& cubature_factory, DataType alpha = DataType(1))
const
1555 CubatureRuleType cubature(Cubature::ctor_factory, cubature_factory);
1558 int num_cubs = cubature.get_num_points();
1559 typename CubatureRuleType::PointType* cub_pt = cubature.get_points();
1560 DataType* cub_wg = cubature.get_weights();
1562 BACKEND_SKELETON_VOID(assemble_matrix1_cuda, assemble_matrix1_generic, assemble_matrix1_generic, matrix, convect, space, cub_pt, cub_wg, num_cubs, alpha)
1577 template<
typename CubatureFactory_>
1586 CubatureRuleType cubature(Cubature::ctor_factory, cubature_factory);
1589 int num_cubs = cubature.get_num_points();
1590 typename CubatureRuleType::PointType* cub_pt = cubature.get_points();
1591 DataType* cub_wg = cubature.get_weights();
1593 BACKEND_SKELETON_VOID(assemble_vector_cuda, assemble_vector_generic, assemble_vector_generic, vector, convect, primal, space, cub_pt, cub_wg, num_cubs, alpha)
1603 BACKEND_SKELETON_VOID(set_sd_v_norm_cuda, set_sd_v_norm_generic, set_sd_v_norm_generic, convect)
1613 BACKEND_SKELETON_VOID(set_sd_v_norm_cuda, set_sd_v_norm_generic, set_sd_v_norm_generic, convect)
1620 const DataType* vec_data = convect.template elements<LAFEM::Perspective::pod>();
1623 VoxelAssembly::AssemblyMappingData<DataType, IndexType> mapping_data = mesh_data.get_assembly_field();
1624 auto burgers_params = wrap_burgers_params();
1631 void assemble_vector_generic(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& vector,
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect,
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& primal,
1632 const SpaceType& space,
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt,
const DataType* cub_wg,
int num_cubs, DataType alpha)
const
1634 DataType* vec_data = vector.template elements<LAFEM::Perspective::pod>();
1635 const DataType* conv_data = convect.template elements<LAFEM::Perspective::pod>();
1636 const DataType* primal_data = primal.template elements<LAFEM::Perspective::pod>();
1638 VoxelAssembly::AssemblyCubatureData<DataType> cub_data = {(
void*)cub_pt, cub_wg, num_cubs};
1639 VoxelAssembly::AssemblyMappingData<DataType, IndexType> mapping_data = mesh_data.get_assembly_field();
1640 auto burgers_params = wrap_burgers_params();
1648 void set_sd_v_norm_generic(
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect)
1654 void set_sd_v_norm_generic(
const Global::Vector<LAFEM::DenseVectorBlocked<DataType, IndexType, dim>, LAFEM::VectorMirror<DataType, IndexType>>& convect)
1659 #ifdef FEAT_HAVE_CUDA
1660 void assemble_matrix1_cuda(LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, dim>& matrix,
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect,
1661 const SpaceType& space,
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt,
const DataType* cub_wg,
int num_cubs, DataType alpha)
const
1663 VoxelAssembly::CSRMatrixData<DataType, IndexType> mat_data = {matrix.template val<LAFEM::Perspective::pod>(), matrix.row_ptr(), matrix.col_ind(), matrix.rows(), matrix.columns()};
1664 const DataType* vec_data = convect.template elements<LAFEM::Perspective::pod>();
1666 typedef typename Cubature::Rule<ShapeType, DataType, DataType>::PointType CubPointType;
1668 void* cub_pt_device = Util::cuda_malloc(
Index(num_cubs) *
sizeof(CubPointType));
1669 Util::cuda_copy_host_to_device(cub_pt_device, (
void*)cub_pt,
Index(num_cubs) *
sizeof(CubPointType));
1671 void* cub_wg_device = Util::cuda_malloc(
Index(num_cubs) *
sizeof(DataType));
1672 Util::cuda_copy_host_to_device(cub_wg_device, (
const void*)cub_wg,
Index(num_cubs) *
sizeof(DataType));
1674 VoxelAssembly::AssemblyCubatureData<DataType> d_cub_data = {cub_pt_device, (DataType*)cub_wg_device, num_cubs};
1675 VoxelAssembly::AssemblyMappingData<DataType, IndexType> d_mapping_data = mesh_data.get_assembly_field();
1676 auto burgers_params = wrap_burgers_params();
1679 d_mapping_data, mesh_data.get_coloring_maps(),
1680 mesh_data.get_color_map_sizes(), alpha, burgers_params,
1681 shared_mem, blocksize, gridsize, print_occupancy);
1682 Util::cuda_free(cub_wg_device);
1683 Util::cuda_free(cub_pt_device);
1687 void assemble_vector_cuda(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& vector,
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect,
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& primal,
1688 const SpaceType& space,
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt,
const DataType* cub_wg,
int num_cubs, DataType alpha)
const
1690 DataType* vec_data = vector.template elements<LAFEM::Perspective::pod>();
1691 const DataType* conv_data = convect.template elements<LAFEM::Perspective::pod>();
1692 const DataType* primal_data = primal.template elements<LAFEM::Perspective::pod>();
1694 typedef typename Cubature::Rule<ShapeType, DataType, DataType>::PointType CubPointType;
1696 void* cub_pt_device = Util::cuda_malloc(
Index(num_cubs) *
sizeof(CubPointType));
1697 Util::cuda_copy_host_to_device(cub_pt_device, (
void*)cub_pt,
Index(num_cubs) *
sizeof(CubPointType));
1699 void* cub_wg_device = Util::cuda_malloc(
Index(num_cubs) *
sizeof(DataType));
1700 Util::cuda_copy_host_to_device(cub_wg_device, (
const void*)cub_wg,
Index(num_cubs) *
sizeof(DataType));
1702 VoxelAssembly::AssemblyCubatureData<DataType> d_cub_data = {cub_pt_device, (DataType*)cub_wg_device, num_cubs};
1703 VoxelAssembly::AssemblyMappingData<DataType, IndexType> d_mapping_data = mesh_data.get_assembly_field();
1704 auto burgers_params = wrap_burgers_params();
1707 mesh_data.get_coloring_maps(), mesh_data.get_color_map_sizes(), alpha, burgers_params,
1708 shared_mem, 32, gridsize, print_occupancy);
1709 Util::cuda_free(cub_wg_device);
1710 Util::cuda_free(cub_pt_device);
1713 void set_sd_v_norm_cuda(
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect)
1719 void set_sd_v_norm_cuda(
const Global::Vector<LAFEM::DenseVectorBlocked<DataType, IndexType, dim>, LAFEM::VectorMirror<DataType, IndexType>>& convect)
1725 void set_kernel_launch_params(
int target_elements,
int blocksize_)
1727 const int max_shared_mem = int(Util::cuda_get_shared_mem_per_sm());
1728 const int max_blocks_per_sm = int(Util::cuda_get_max_blocks_per_sm());
1729 const int max_sm_per_device = int(Util::cuda_get_sm_count());
1736 constexpr int element_size = dim*dim*int(
sizeof(DataType));
1738 constexpr int shared_size_nec =
sizeof(VoxelAssembly::BurgersSharedDataGlobalWrapper<VoxelAssembly::SpaceHelper<SpaceType, DataType, IndexType>>) +
sizeof(VoxelAssembly::BurgersSharedDataKernelWrapper<VoxelAssembly::SpaceHelper<SpaceType, DataType, IndexType>>);
1741 shared_mem =
Math::max(shared_mem, shared_size_nec + target_elements * element_size);
1743 XASSERTM(shared_mem < max_shared_mem,
"Targeted shared memory is " +
stringify(shared_mem) +
"B but Hardware only supports " +
stringify(max_shared_mem) +
" shared memory.");
1744 blocksize = blocksize_;
1745 const int max_color_sizes = int(mesh_data.get_max_color_size());
1749 gridsize = std::min(
int(max_color_sizes), 2*max_sm_per_device*(max_blocks_per_sm/(
int(blocksize)/32)));
1754 void assemble_matrix1_cuda(LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, dim>& DOXY(matrix),
const SpaceType& DOXY(space),
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* DOXY(cub_pt),
const DataType* DOXY(cub_wg),
int DOXY(num_cubs), DataType DOXY(alpha))
const
1756 XABORTM(
"What in the nine hells are you doing here?");
1759 void assemble_vector_cuda(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&,
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&,
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&,
1760 const SpaceType&,
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType*,
const DataType*,
int, DataType)
const
1762 XABORTM(
"This call was logged and your local FBI agent was informed about your transgressions");
1765 void set_sd_v_norm_cuda(
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&)
1767 XABORTM(
"Should not have called that....");
1771 void set_sd_v_norm_cuda(
const Global::Vector<LAFEM::DenseVectorBlocked<DataType, IndexType, dim>, LAFEM::VectorMirror<DataType, IndexType>>&)
1773 XABORTM(
"Should not have called that....");
#define XABORTM(msg)
Abortion macro definition with custom message.
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Cubature Rule class template.
Global vector wrapper class template.
Blocked Dense data vector class template.
Index size() const
The number of elements.
CSR based blocked sparse matrix.
Index rows() const
Retrieve matrix row count.
IT_ * col_ind()
Retrieve column indices array.
IT_ * row_ptr()
Retrieve row start index array.
Index columns() const
Retrieve matrix column count.
Handles vector prolongation, restriction and serialization.
Standard Lagrange-2 Finite-Element space class template.
Index get_num_dofs() const
Returns the number of dofs.
Tiny Matrix class template.
Tiny Vector class template.
Data handler for Lagrange based FE spaces.
VoxelAssembly::Q2StandardHyperCube< dim_ > SpaceType
typedefs
void set_sd_v_norm(const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &convect)
Sets the internal maximum streamline diffusion velocity norm.
void assemble_matrix1(LAFEM::SparseMatrixBCSR< DataType, IndexType, dim, dim > &matrix, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &convect, const SpaceType &space, const CubatureFactory_ &cubature_factory, DataType alpha=DataType(1)) const
Assembles the burgers operator for finitie element space with same test and trial space.
void assemble_vector(LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &vector, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &convect, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &primal, const SpaceType &space, const CubatureFactory_ &cubature_factory, DataType alpha=DataType(1)) const
Assembles the application of the burgers operator to a given primal vector.
DataType sd_v_norm
The current maximum velocity norm.
DataType sd_delta
Streamline diffusion parameter.
DataHandler mesh_data
The meshdata handler.
DataType sd_nu
Streamline diffosuion viscosity. Generally equal to nu.
bool print_occupancy
print out occupancy
int blocksize
the block size to be used
VoxelBurgersAssembler(const SpaceType &space, const ColoringType_ &coloring, int hint=-1)
Constructor for burgers assembler.
int gridsize
the grid size to be used
int shared_mem
how much shared memory should the kernel use
DataType theta
Reaction scaling.
void set_sd_v_norm(const Global::Vector< LAFEM::DenseVectorBlocked< DataType, IndexType, dim >, LAFEM::VectorMirror< DataType, IndexType > > &convect)
Sets the internal maximum streamline diffusion velocity norm.
bool deformation
Should we use full deformation formulation?
VoxelAssembly::AssemblyBurgersData< DataType > wrap_burgers_params() const
Wraps the set burgers parameter into convenient struct.
DataType beta
Convection scaling.
DataType frechet_beta
Scaling parameter for full jacobian convection part.
DataType nu
The viscosity.
Burgers Voxel Assembly template.
T_ abs(T_ x)
Returns the absolute value.
T_ max(T_ a, T_ b)
Returns the maximum of two values.
std::size_t element_size(const Pack::Type type)
Returns the size of a Pack::Type element in bytes.
CUDA_HOST_DEVICE T_ dot(const T_ &a, const T_ &b)
Computes the dot-product of two scalars.
void assemble_burgers_defect_host(const Space_ &space, DT_ *vector_data, const DT_ *conv_data, const DT_ *primal_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params)
Host kernel wrapper for the defect burgers assembler.
void assemble_burgers_csr_host(const Space_ &space, const CSRMatrixData< DT_, IT_ > &matrix_data, const DT_ *conv_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params)
Host kernel wrapper for the full matrix burgers assembler.
DT_ get_sd_v_norm_host(const LAFEM::DenseVectorBlocked< DT_, IT_, dim_ > &convect)
Host kernel wrapper for the local sd_v_norm calculation.
void assemble_burgers_defect(const Space_ &space, DT_ *vector_data, const DT_ *conv_data, const DT_ *primal_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, int shared_mem, int blocksize, int gridsize, bool print_occupancy)
Device kernel wrapper for the defect burgers assembler.
DT_ get_sd_v_norm(const LAFEM::DenseVectorBlocked< DT_, IT_, dim_ > &convect)
Device kernel wrapper for the local sd_v_norm calculation.
void assemble_burgers_csr(const Space_ &space, const CSRMatrixData< DT_, IT_ > &matrix_data, const DT_ *conv_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, int shared_mem, int blocksize, int gridsize, bool print_occupancy)
Device kernel wrapper for the full matrix burgers assembler.
CUDA_DEVICE __forceinline__ void grouped_burgers_mat_assembly_kernel(const ThreadGroup_ &tg, BurgersSharedDataKernelWrapper< SpaceHelp_ > *shared_wrapper, int loc_assemble_size, int assemble_offset, typename SpaceHelp_::DataType *loc_mat, const typename SpaceHelp_::DataType *local_conv_dofs, const Tiny::Matrix< typename SpaceHelp_::DataType, SpaceHelp_::dim, SpaceHelp_::num_verts > &local_coeffs, const typename SpaceHelp_::DomainPointType *cub_pt, const typename SpaceHelp_::DataType *cub_wg, const int num_cubs, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const bool need_streamline, const bool need_convection, const typename SpaceHelp_::DataType tol_eps)
Burgers Matrix assembly kernel.
CUDA_HOST_DEVICE void burgers_defect_assembly_kernel(LocVecType_ &loc_vec, const LocVecType_ &local_prim_dofs, const LocVecType_ &local_conv_dofs, const Tiny::Matrix< typename SpaceHelp_::DataType, dim_, num_verts_ > &local_coeffs, const typename SpaceHelp_::DomainPointType *cub_pt, const typename SpaceHelp_::DataType *cub_wg, const int num_cubs, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const bool need_convection)
Burgers Vector assembly kernel.
CUDA_DEVICE __forceinline__ void grouped_burgers_mat_alt_assembly_kernel(const ThreadGroup_ &tg, BurgersSharedDataKernelWrapper< SpaceHelp_ > *shared_wrapper, int loc_assemble_size, int assemble_offset, typename SpaceHelp_::DataType *loc_mat, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const bool need_streamline, const bool need_convection, const typename SpaceHelp_::DataType tol_eps)
appears to not work better... compiler optimizes this sufficiently?
CUDA_HOST_DEVICE void burgers_mat_assembly_kernel(LocMatType_ &loc_mat, const LocVecType_ &local_conv_dofs, const Tiny::Matrix< typename SpaceHelp_::DataType, dim_, num_verts_ > &local_coeffs, const typename SpaceHelp_::DomainPointType *cub_pt, const typename SpaceHelp_::DataType *cub_wg, const int num_cubs, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const bool need_streamline, const bool need_convection, const typename SpaceHelp_::DataType tol_eps)
Burgers Matrix assembly kernel.
String stringify(const T_ &item)
Converts an item into a String.
@ value
specifies whether the space should supply basis function values
std::uint64_t Index
Index data type.
@ dom_point
specifies whether the trafo should supply domain point coordinates
Data for burgers assembler.
A data field for a cubature rule.
A data field for all necessary values that define the dof mapping for assembly.
Helper struct wrapping the data required as shared data.