2#ifndef FEAT_KERNEL_VOXEL_ASSEMBLY_BURGERS_VELO_MATERIAL_HPP
3#define FEAT_KERNEL_VOXEL_ASSEMBLY_BURGERS_VELO_MATERIAL_HPP 1
6#include <kernel/backend.hpp>
8#include <kernel/voxel_assembly/voxel_assembly_common.hpp>
9#include <kernel/voxel_assembly/helper/data_handler.hpp>
10#include <kernel/voxel_assembly/helper/space_helper.hpp>
11#include <kernel/cubature/rule.hpp>
12#include <kernel/lafem/sparse_matrix_bcsr.hpp>
13#include <kernel/lafem/dense_vector_blocked.hpp>
14#include <kernel/trafo/standard/mapping.hpp>
15#include <kernel/geometry/conformal_mesh.hpp>
16#include <kernel/util/tiny_algebra.hpp>
17#include <kernel/global/vector.hpp>
18#include <kernel/lafem/vector_mirror.hpp>
19#include <kernel/voxel_assembly/burgers_assembler.hpp>
21#include <kernel/util/cuda_util.hpp>
26 namespace VoxelAssembly
29 #if defined(FEAT_HAVE_CUDA) || defined(DOXYGEN)
31 template<
typename SpaceHelp_,
bool need_streamdiff_ = true>
34 typedef SpaceHelp_ SpaceHelp;
35 static constexpr int dim = SpaceHelp::dim;
36 typedef typename SpaceHelp::SpaceType SpaceType;
37 typedef typename SpaceHelp::DataType DataType;
39 static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
44 typename SpaceHelp::EvalData basis_data;
45 typename SpaceHelp::JacobianMatrixType loc_jac;
46 typename SpaceHelp::JacobianMatrixType loc_jac_inv;
50 typename SpaceHelp::DomainPointType dom_point;
60 template<
typename SpaceHelp_>
63 typedef SpaceHelp_ SpaceHelp;
64 static constexpr int dim = SpaceHelp::dim;
65 typedef typename SpaceHelp::SpaceType SpaceType;
66 typedef typename SpaceHelp::DataType DataType;
68 static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
73 typename SpaceHelp::EvalData basis_data;
74 typename SpaceHelp::JacobianMatrixType loc_jac;
75 typename SpaceHelp::JacobianMatrixType loc_jac_inv;
78 typename SpaceHelp::DomainPointType dom_point;
97 template<
typename Space_,
typename DT_,
typename IT_>
130 template<
typename SpaceHelp_,
typename LocMatType_,
typename LocVecType_,
int dim_,
int num_verts_,
typename ViscFunc_,
typename ViscDerFunc_,
bool need_stream_diff_>
132 const typename SpaceHelp_::DomainPointType* cub_pt,
const typename SpaceHelp_::DataType* cub_wg,
const int num_cubs,
135 const bool need_convection,
const typename SpaceHelp_::DataType tol_eps,
136 ViscFunc_ visco_func, ViscDerFunc_ visco_d_func)
140#pragma nv_diag_suppress = 177
141#pragma nv_diag_suppress = 550
144 typedef SpaceHelp_ SpaceHelp;
145 constexpr int dim = SpaceHelp::dim;
146 typedef typename SpaceHelp::SpaceType SpaceType;
147 typedef typename SpaceHelp::DataType DataType;
149 constexpr bool need_streamline = need_stream_diff_;
153 const DataType& theta{burgers_params.theta};
154 const DataType& beta{burgers_params.beta};
155 const DataType& frechet_beta{burgers_params.frechet_beta};
156 const DataType& sd_delta{burgers_params.sd_delta};
157 const DataType& sd_nu{burgers_params.sd_nu};
158 const DataType& sd_v_norm{burgers_params.sd_v_norm};
163 const DataType& frechet_material{material_params.frechet_material};
164 const DataType& reg_eps{material_params.reg_eps};
165 const bool& need_frechet_material{material_params.need_frechet_material};
183 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
187 typename SpaceHelp::JacobianMatrixType loc_jac, loc_jac_inv;
188 typename SpaceHelp::EvalData basis_data;
193 VecValueType loc_v(DataType(0));
194 MatValueType loc_grad_v(DataType(0)), strain_rate_tensor_2(DataType(0));
195 DataType local_delta(DataType(0));
196 DataType nu_loc(DataType(0));
197 DataType gamma_dot(DataType(0));
201#pragma nv_diag_default = 550
202#pragma nv_diag_default = 177
207 if constexpr(need_streamline)
209 VecValueType mean_v(DataType(0));
211 const VecValueType barycenter(DataType(0));
214 SpaceHelp::eval_ref_values(basis_data, barycenter);
215 SpaceHelp::trans_values(basis_data);
216 for(
int i(0); i < num_loc_dofs; ++i)
218 mean_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
220 const DataType local_norm_v = mean_v.norm_euclid();
222 if(local_norm_v > tol_eps)
228 const DataType local_h = SpaceHelp::width_directed(mean_v, local_coeffs) * local_norm_v;
229 const DataType local_re = (local_norm_v * local_h) / sd_nu;
230 local_delta = sd_delta * (local_h / sd_v_norm) * (DataType(2) * local_re) / (DataType(1) + local_re);
234 for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
236 const typename SpaceHelp::DomainPointType&
dom_point = cub_pt[cub_ind];
237 SpaceHelp::eval_ref_values(basis_data,
dom_point);
238 SpaceHelp::trans_values(basis_data);
240 SpaceHelp::calc_jac_mat(loc_jac,
dom_point, local_coeffs);
241 loc_jac_inv.set_inverse(loc_jac);
243 SpaceHelp::eval_ref_gradients(basis_data,
dom_point);
244 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);
246 const DataType weight = loc_jac.det() * cub_wg[cub_ind];
247 if(need_convection || need_streamline)
250 for(
int i = 0; i < num_loc_dofs; ++i)
252 loc_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
258 for(
int i = 0; i < num_loc_dofs; ++i)
260 loc_grad_v.add_outer_product(local_conv_dofs[i], basis_data.phi[i].grad);
264 strain_rate_tensor_2.set_transpose(loc_grad_v);
265 strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
267 gamma_dot = CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
269 gamma_dot =
Math::sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
273 nu_loc = visco_func(gamma_dot);
277 for(
int i(0); i < num_loc_dofs; ++i)
280 for(
int j(0); j < num_loc_dofs; ++j)
283 const DataType
value = nu_loc * weight *
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
286 loc_mat[i][j].add_scalar_main_diag(
value);
287 loc_mat[i][j].add_outer_product(basis_data.phi[j].grad, basis_data.phi[i].grad, nu_loc * weight);
291 if(need_frechet_material)
293 const DataType fac = frechet_material * visco_d_func(gamma_dot)/(gamma_dot + reg_eps);
296 for(
int i(0); i < num_loc_dofs; ++i)
302 for(
int j(0); j < num_loc_dofs; ++j)
307 loc_mat[i][j].add_outer_product(du_grad_phi, du_grad_psi, fac * weight);
315 for(
int i(0); i < num_loc_dofs; ++i)
318 for(
int j(0); j < num_loc_dofs; ++j)
321 const DataType
value = beta * weight * basis_data.phi[i].value *
Tiny::dot(loc_v, basis_data.phi[j].grad);
324 loc_mat[i][j].add_scalar_main_diag(
value);
331 if(CudaMath::cuda_abs(frechet_beta) > DataType(0))
333 if(
Math::abs(frechet_beta) > DataType(0))
337 for(
int i(0); i < num_loc_dofs; ++i)
340 for(
int j(0); j < num_loc_dofs; ++j)
343 const DataType
value = frechet_beta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
346 loc_mat[i][j].axpy(
value, loc_grad_v);
353 if(CudaMath::cuda_abs(theta) > DataType(0))
359 for(
int i(0); i < num_loc_dofs; ++i)
362 for(
int j(0); j < num_loc_dofs; ++j)
365 const DataType
value = theta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
368 loc_mat[i][j].add_scalar_main_diag(
value);
373 if constexpr(need_streamline)
376 for(
int i = 0; i < num_loc_dofs; ++i)
378 streamdiff_coeffs[i] =
Tiny::dot(loc_v, basis_data.phi[i].grad);
383 if((local_delta > tol_eps))
386 for(
int i(0); i < num_loc_dofs; ++i)
389 for(
int j(0); j < num_loc_dofs; ++j)
392 const DataType
value = local_delta * weight * streamdiff_coeffs[i] * streamdiff_coeffs[j];
395 loc_mat[i][j].add_scalar_main_diag(
value);
404 #if defined(__CUDACC__)
429 template<
typename ThreadGroup_,
typename SpaceHelp_,
typename ViscFunc_,
typename ViscDerFunc_,
bool need_stream_diff_>
431 int loc_assemble_size,
int assemble_offset,
typename SpaceHelp_::DataType* loc_mat,
const typename SpaceHelp_::DataType* local_conv_dofs,
433 const typename SpaceHelp_::DomainPointType* cub_pt,
const typename SpaceHelp_::DataType* cub_wg,
const int num_cubs,
436 const bool need_convection,
const typename SpaceHelp_::DataType tol_eps,
437 ViscFunc_ visco_func, ViscDerFunc_ visco_d_func)
440#pragma nv_diag_suppress = 177
441#pragma nv_diag_suppress = 550
443 typedef SpaceHelp_ SpaceHelp;
444 constexpr int dim = SpaceHelp::dim;
445 typedef typename SpaceHelp::SpaceType SpaceType;
446 typedef typename SpaceHelp::DataType DataType;
447 constexpr bool need_streamline = need_stream_diff_;
449 const int t_idx = tg.thread_rank();
450 const int g_size = tg.num_threads();
453 const DataType& theta{burgers_params.theta};
454 const DataType& beta{burgers_params.beta};
455 const DataType& frechet_beta{burgers_params.frechet_beta};
456 const DataType& sd_delta{burgers_params.sd_delta};
457 const DataType& sd_nu{burgers_params.sd_nu};
458 const DataType& sd_v_norm{burgers_params.sd_v_norm};
460 const DataType& frechet_material{material_params.frechet_material};
461 const DataType& reg_eps{material_params.reg_eps};
462 const bool& need_frechet_material{material_params.need_frechet_material};
480 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
482 typedef Tiny::Vector<DataType, dim> VecValueType;
483 typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
488 typedef BurgersMatSharedDataKernelWrapper<SpaceHelp, need_stream_diff_> BSDKWrapper;
489 typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
490 typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
491 MatValueType& loc_grad_v = shared_wrapper->loc_grad_v;
492 VecValueType& loc_v = shared_wrapper->loc_v;
493 VecValueType* mean_v_p =
nullptr;
494 if constexpr(need_streamline) mean_v_p = &(shared_wrapper->mean_v);
495 typename SpaceHelp::DomainPointType&
dom_point = shared_wrapper->dom_point;
496 Tiny::Vector<DataType, num_loc_dofs>* streamdiff_coeffs_p =
nullptr;
497 if constexpr(need_streamline) streamdiff_coeffs_p = &(shared_wrapper->streamdiff_coeffs);
498 DataType& local_delta = shared_wrapper->local_delta;
499 DataType& nu_loc = shared_wrapper->nu_loc;
500 DataType& gamma_dot = shared_wrapper->gamma_dot;
501 DataType& det = shared_wrapper->det;
502 DataType& weight = shared_wrapper->weight;
503 typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
504 bool& need_frechet = shared_wrapper->need_frechet;
507 MatValueType strain_rate_tensor_2(DataType(0));
509 VoxelAssembly::coalesced_format(tg, (
unsigned int*) shared_wrapper,
sizeof(BSDKWrapper)/(
sizeof(
unsigned int)));
512 cg::invoke_one(tg, [&](){need_frechet = CudaMath::cuda_abs(frechet_beta) > DataType(0);});
516#pragma nv_diag_default = 550
517#pragma nv_diag_default = 177
519 if constexpr(need_streamline)
521 cg::invoke_one(tg, [&]() {*mean_v_p = DataType(0);});
525 cg::invoke_one(tg, [&](){
const VecValueType bc(0);
526 SpaceHelp::eval_ref_values(basis_data, bc);
527 SpaceHelp::trans_values(basis_data);});
531 for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
533 VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &(*mean_v_p)[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
538 DataType local_norm_v = mean_v_p->norm_euclid();
540 if(local_norm_v > tol_eps)
542 cg::invoke_one(tg, [&](){
543 const DataType local_h = SpaceHelp::width_directed(*mean_v_p, local_coeffs) * local_norm_v;
544 const DataType local_re = (local_norm_v * local_h) / sd_nu;
545 local_delta = sd_delta * (local_h / sd_v_norm) * (DataType(2) * local_re) / (DataType(1) + local_re);
553 for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
555 cg::invoke_one(tg, [&](){
dom_point = cub_pt[cub_ind];
556 SpaceHelp::eval_ref_values(basis_data,
dom_point);
557 SpaceHelp::trans_values(basis_data);});
560 SpaceHelp::grouped_calc_jac_mat(tg, loc_jac,
dom_point, &local_coeffs[0][0]);
562 cg::invoke_one(tg, [&](){det = loc_jac.det();
563 weight = det * cub_wg[cub_ind];});
565 loc_jac_inv.grouped_set_inverse(tg, loc_jac, det);
567 cg::invoke_one(tg, [&](){
568 SpaceHelp::eval_ref_gradients(basis_data,
dom_point);
569 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);});
572 if(need_convection || need_streamline)
574 VoxelAssembly::coalesced_format(tg, &loc_v[0], dim);
577 for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
579 VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &loc_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
585 VoxelAssembly::coalesced_format(tg, &loc_grad_v[0][0], dim*dim);
587 for(
int i = t_idx; i < num_loc_dofs; i += g_size)
589 VoxelAssembly::grouped_add_outer_product(tg, &loc_grad_v[0][0], &local_conv_dofs[i*dim], basis_data.phi[i].grad);
596 strain_rate_tensor_2.set_transpose(loc_grad_v);
597 strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
598 cg::invoke_one(tg, [&](){
601 gamma_dot = CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
602 nu_loc = visco_func(gamma_dot);
607 if constexpr(need_streamline)
610 for(
int i = t_idx; i < num_loc_dofs; i += g_size)
612 (*streamdiff_coeffs_p)[i] =
Tiny::dot(loc_v, basis_data.phi[i].grad);
619 const DataType fac = frechet_material * visco_d_func(gamma_dot)/(gamma_dot + reg_eps);
621 for(
int idx = t_idx; idx < loc_assemble_size; idx += g_size)
624 const int i = (idx+assemble_offset) / num_loc_dofs;
625 const int j = (idx+assemble_offset) % num_loc_dofs;
629 const DataType
value = nu_loc * weight *
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
631 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(
value);
632 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_outer_product(basis_data.phi[j].grad, basis_data.phi[i].grad, nu_loc * weight);
635 if(need_frechet_material)
637 Tiny::Vector<DataType, dim> du_grad_phi;
638 du_grad_phi.set_mat_vec_mult(strain_rate_tensor_2, basis_data.phi[i].grad);
639 Tiny::Vector<DataType, dim> du_grad_psi;
640 du_grad_psi.set_mat_vec_mult(strain_rate_tensor_2, basis_data.phi[j].grad);
642 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_outer_product(du_grad_phi, du_grad_psi, fac*weight);
649 const DataType
value = beta * weight * basis_data.phi[i].value * VoxelAssembly::dot(basis_data.phi[j].grad, &loc_v[0]);
652 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(
value);
659 const DataType
value = frechet_beta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
662 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->
axpy(
value, loc_grad_v);
667 if(CudaMath::cuda_abs(theta) > DataType(0))
673 const DataType
value = theta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
676 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(
value);
680 if constexpr(need_streamline)
682 if(local_delta > tol_eps)
685 const DataType
value = local_delta * weight * (*streamdiff_coeffs_p)[i] * (*streamdiff_coeffs_p)[j];
688 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(
value);
720 template<
typename SpaceHelp_,
typename LocVecType_,
int dim_,
int num_verts_,
typename ViscFunc_>
722 const typename SpaceHelp_::DomainPointType* cub_pt,
const typename SpaceHelp_::DataType* cub_wg,
const int num_cubs,
724 const bool need_convection, ViscFunc_ visco_func)
726 typedef SpaceHelp_ SpaceHelp;
727 constexpr int dim = SpaceHelp::dim;
728 typedef typename SpaceHelp::SpaceType SpaceType;
729 typedef typename SpaceHelp::DataType DataType;
732 const DataType& theta{burgers_params.theta};
733 const DataType& beta{burgers_params.beta};
756 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
760 typename SpaceHelp::JacobianMatrixType loc_jac, loc_jac_inv;
761 typename SpaceHelp::EvalData basis_data;
767 VecValueType loc_v(DataType(0));
768 DataType nu_loc(DataType(0));
774 for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
776 const typename SpaceHelp::DomainPointType&
dom_point = cub_pt[cub_ind];
777 SpaceHelp::eval_ref_values(basis_data,
dom_point);
778 SpaceHelp::trans_values(basis_data);
780 SpaceHelp::calc_jac_mat(loc_jac,
dom_point, local_coeffs);
781 loc_jac_inv.set_inverse(loc_jac);
783 SpaceHelp::eval_ref_gradients(basis_data,
dom_point);
784 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);
786 const DataType weight = loc_jac.det() * cub_wg[cub_ind];
790 for(
int i = 0; i < num_loc_dofs; ++i)
792 loc_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
798 MatValueType loc_grad_v(DataType(0)), strain_rate_tensor_2(DataType(0));
800 for(
int i = 0; i < num_loc_dofs; ++i)
802 loc_grad_v.add_outer_product(local_conv_dofs[i], basis_data.phi[i].grad);
805 strain_rate_tensor_2.set_transpose(loc_grad_v);
806 strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
808 nu_loc = visco_func(CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius());
810 nu_loc = visco_func(
Math::sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius());
816 for(
int i(0); i < num_loc_dofs; ++i)
819 for(
int j(0); j < num_loc_dofs; ++j)
822 const DataType value1 = nu_loc * weight *
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
823 const DataType value2 = nu_loc * weight *
Tiny::dot(local_prim_dofs[j], basis_data.phi[i].grad);
825 loc_vec[i].axpy(value1, local_prim_dofs[j]);
826 loc_vec[i].axpy(value2, basis_data.phi[j].grad);
833 for(
int i(0); i < num_loc_dofs; ++i)
836 for(
int j(0); j < num_loc_dofs; ++j)
839 const DataType
value = beta * weight * basis_data.phi[i].value *
Tiny::dot(loc_v, basis_data.phi[j].grad);
842 loc_vec[i].axpy(
value, local_prim_dofs[j]);
849 if(CudaMath::cuda_abs(theta) > DataType(0))
855 for(
int i(0); i < num_loc_dofs; ++i)
858 for(
int j(0); j < num_loc_dofs; ++j)
861 const DataType
value = theta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
864 loc_vec[i].axpy(
value, local_prim_dofs[j]);
883 template<
typename ThreadGroup_,
typename SpaceHelp_,
typename ViscFunc_>
885 typename SpaceHelp_::DataType* loc_vec,
const typename SpaceHelp_::DataType* local_prim_dofs,
const typename SpaceHelp_::DataType* local_conv_dofs,
887 const typename SpaceHelp_::DomainPointType* cub_pt,
const typename SpaceHelp_::DataType* cub_wg,
const int num_cubs,
889 const bool need_convection, ViscFunc_ visco_func)
891 typedef SpaceHelp_ SpaceHelp;
892 constexpr int dim = SpaceHelp::dim;
893 typedef typename SpaceHelp::SpaceType SpaceType;
894 typedef typename SpaceHelp::DataType DataType;
895 const int t_idx = tg.thread_rank();
896 const int g_size = tg.num_threads();
899 const DataType& theta{burgers_params.theta};
900 const DataType& beta{burgers_params.beta};
923 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
927 typedef BurgersMatSharedDataKernelWrapper<SpaceHelp, false> BSDKWrapper;
928 typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
929 typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
931 DataType& det = shared_wrapper->det;
932 DataType& weight = shared_wrapper->weight;
933 DataType& nu_loc = shared_wrapper->nu_loc;
934 DataType& gamma_dot = shared_wrapper->gamma_dot;
935 typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
937 typedef Tiny::Vector<DataType, dim> VecValueType;
938 typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
940 MatValueType& loc_grad_v = shared_wrapper->loc_grad_v;
941 VecValueType& loc_v = shared_wrapper->loc_v;
943 typename SpaceHelp::DomainPointType&
dom_point = shared_wrapper->dom_point;
945 VoxelAssembly::coalesced_format(tg, (
unsigned int*) shared_wrapper,
sizeof(BSDKWrapper)/(
sizeof(
unsigned int)));
948 cg::thread_block_tile<4> tile4 = cg::tiled_partition<4>(tg);
950 for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
953 cg::invoke_one(tg, [&](){
dom_point = cub_pt[cub_ind];
954 SpaceHelp::eval_ref_values(basis_data,
dom_point);
955 SpaceHelp::trans_values(basis_data);});
958 SpaceHelp::grouped_calc_jac_mat(tg, loc_jac,
dom_point, &local_coeffs[0][0]);
960 cg::invoke_one(tg, [&](){det = loc_jac.det();
961 weight = det * cub_wg[cub_ind];});
963 loc_jac_inv.grouped_set_inverse(tg, loc_jac, det);
965 cg::invoke_one(tg, [&](){
966 SpaceHelp::eval_ref_gradients(basis_data,
dom_point);
967 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);});
972 VoxelAssembly::coalesced_format(tg, &loc_v[0], dim);
975 for(
int idx = t_idx; idx < num_loc_dofs; idx += g_size)
977 VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &loc_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
983 VoxelAssembly::coalesced_format(tg, &loc_grad_v[0][0], dim*dim);
985 for(
int i = t_idx; i < num_loc_dofs; i += g_size)
987 VoxelAssembly::grouped_add_outer_product(tg, &loc_grad_v[0][0], &local_conv_dofs[i*dim], basis_data.phi[i].grad);
990 cg::invoke_one(tg, [&](){
991 MatValueType strain_rate_tensor_2(DataType(0));
992 strain_rate_tensor_2.set_transpose(loc_grad_v);
993 strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
994 gamma_dot = CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
995 nu_loc = visco_func(gamma_dot);
1001 for(
int idx = tile4.meta_group_rank(); idx < loc_assemble_size*dim; idx += tile4.meta_group_size())
1004 DataType val = DataType(0);
1006 const int i = (idx/dim+assemble_offset);
1007 const int ii = idx%dim;
1009 for(
int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
1012 val += nu_loc * weight * (
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad) * local_prim_dofs[j*dim + ii]
1013 +
Tiny::dot(((Tiny::Vector<DataType, dim>*)local_prim_dofs)[j], basis_data.phi[i].grad) * basis_data.phi[j].grad[ii]);
1019 for(
int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
1022 val += beta * weight * basis_data.phi[i].value *
Tiny::dot(loc_v, basis_data.phi[j].grad) * local_prim_dofs[j*dim + ii];
1028 if(CudaMath::cuda_abs(theta) > DataType(0))
1033 for(
int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
1036 val += theta * weight * basis_data.phi[i].value * basis_data.phi[j].value * local_prim_dofs[j*dim + ii];
1041 cuda::atomic_ref<DataType, cuda::thread_scope_block> a_ref(*(loc_vec+idx));
1042 cg::reduce_update_async(tile4, a_ref, val, cg::plus<DataType>());
1052 #if defined(FEAT_HAVE_CUDA) || defined(DOXYGEN)
1074 template<
typename Space_,
typename DT_,
typename IT_>
1077 const DT_* conv_data,
1080 const std::vector<int*>& coloring_maps,
1081 const std::vector<Index>& coloring_map_sizes,
1084 MaterialType mat_type,
int shared_mem,
int blocksize,
int gridsize,
bool print_occupancy);
1108 template<
typename Space_,
typename DT_,
typename IT_>
1111 const DT_* conv_data,
1112 const DT_* primal_data,
1115 const std::vector<int*>& coloring_maps,
1116 const std::vector<Index>& coloring_map_sizes,
1120 int shared_mem,
int blocksize,
int gridsize,
bool print_occupancy);
1145 template<
typename Space_,
typename DT_,
typename IT_>
1148 const DT_* conv_data,
1151 const std::vector<int*>& coloring_maps,
1152 const std::vector<Index>& coloring_map_sizes,
1179 template<
typename Space_,
typename DT_,
typename IT_>
1182 const DT_* conv_data,
1183 const DT_* primal_data,
1186 const std::vector<int*>& coloring_maps,
1187 const std::vector<Index>& coloring_map_sizes,
1209 template<
int dim_,
typename DT_,
typename IT_>
1219 typedef typename SpaceHelp::ShapeType ShapeType;
1220 typedef typename SpaceHelp::DataType DataType;
1221 typedef typename SpaceHelp::IndexType IndexType;
1224 static constexpr int dim = SpaceHelp::dim;
1226 typedef typename SpaceHelp::DomainPointType DomainPointType;
1227 typedef typename SpaceHelp::ImagePointType ImagePointType;
1228 typedef typename SpaceHelp::ValueType ValueType;
1229 typedef typename SpaceHelp::JacobianMatrixType JacobianMatrixType;
1265 template<
typename ColoringType_>
1268 frechet_material(DataType(0)), reg_eps(DataType(1E-100)), material_type(
MaterialType::carreau)
1304 template<
typename CubatureFactory_>
1306 const SpaceType& space,
const CubatureFactory_& cubature_factory, DataType alpha = DataType(1))
const
1314 CubatureRuleType cubature(Cubature::ctor_factory, cubature_factory);
1317 int num_cubs = cubature.get_num_points();
1318 typename CubatureRuleType::PointType* cub_pt = cubature.get_points();
1319 DataType* cub_wg = cubature.get_weights();
1321 BACKEND_SKELETON_VOID(assemble_matrix1_cuda, assemble_matrix1_generic, assemble_matrix1_generic, matrix, convect, space, cub_pt, cub_wg, num_cubs, alpha)
1336 template<
typename CubatureFactory_>
1345 CubatureRuleType cubature(Cubature::ctor_factory, cubature_factory);
1348 int num_cubs = cubature.get_num_points();
1349 typename CubatureRuleType::PointType* cub_pt = cubature.get_points();
1350 DataType* cub_wg = cubature.get_weights();
1352 BACKEND_SKELETON_VOID(assemble_vector_cuda, assemble_vector_generic, assemble_vector_generic, vector, convect, primal, space, cub_pt, cub_wg, num_cubs, alpha)
1359 const DataType* vec_data = convect.template elements<LAFEM::Perspective::pod>();
1362 VoxelAssembly::AssemblyMappingData<DataType, IndexType> mapping_data = this->mesh_data.get_assembly_field();
1363 auto burgers_params = this->wrap_burgers_params();
1364 auto material_params = this->wrap_material_params();
1368 this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha,
1369 burgers_params, material_params, material_type);
1372 void assemble_vector_generic(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& vector,
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect,
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& primal,
1373 const SpaceType& space,
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt,
const DataType* cub_wg,
int num_cubs, DataType alpha)
const
1375 DataType* vec_data = vector.template elements<LAFEM::Perspective::pod>();
1376 const DataType* conv_data = convect.template elements<LAFEM::Perspective::pod>();
1377 const DataType* primal_data = primal.template elements<LAFEM::Perspective::pod>();
1379 VoxelAssembly::AssemblyCubatureData<DataType> cub_data = {(
void*)cub_pt, cub_wg, num_cubs};
1380 VoxelAssembly::AssemblyMappingData<DataType, IndexType> mapping_data = this->mesh_data.get_assembly_field();
1381 auto burgers_params = this->wrap_burgers_params();
1382 auto material_params = wrap_material_params();
1386 this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha,
1387 burgers_params, material_params, material_type);
1391 #ifdef FEAT_HAVE_CUDA
1392 void assemble_matrix1_cuda(LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, dim>& matrix,
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect,
1393 const SpaceType& space,
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt,
const DataType* cub_wg,
int num_cubs, DataType alpha)
const
1395 VoxelAssembly::CSRMatrixData<DataType, IndexType> mat_data = {matrix.template val<LAFEM::Perspective::pod>(), matrix.row_ptr(), matrix.col_ind(), matrix.rows(), matrix.columns()};
1396 const DataType* vec_data = convect.template elements<LAFEM::Perspective::pod>();
1398 typedef typename Cubature::Rule<ShapeType, DataType, DataType>::PointType CubPointType;
1400 void* cub_pt_device = Util::cuda_malloc(
Index(num_cubs) *
sizeof(CubPointType));
1401 Util::cuda_copy_host_to_device(cub_pt_device, (
void*)cub_pt,
Index(num_cubs) *
sizeof(CubPointType));
1403 void* cub_wg_device = Util::cuda_malloc(
Index(num_cubs) *
sizeof(DataType));
1404 Util::cuda_copy_host_to_device(cub_wg_device, (
const void*)cub_wg,
Index(num_cubs) *
sizeof(DataType));
1406 VoxelAssembly::AssemblyCubatureData<DataType> d_cub_data = {cub_pt_device, (DataType*)cub_wg_device, num_cubs};
1407 VoxelAssembly::AssemblyMappingData<DataType, IndexType> d_mapping_data = this->mesh_data.get_assembly_field();
1408 auto burgers_params = this->wrap_burgers_params();
1409 auto material_params = wrap_material_params();
1412 this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha, burgers_params, material_params, material_type,
1413 this->shared_mem, this->blocksize, this->gridsize, this->print_occupancy);
1414 Util::cuda_free(cub_wg_device);
1415 Util::cuda_free(cub_pt_device);
1419 void assemble_vector_cuda(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& vector,
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect,
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& primal,
1420 const SpaceType& space,
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt,
const DataType* cub_wg,
int num_cubs, DataType alpha)
const
1422 DataType* vec_data = vector.template elements<LAFEM::Perspective::pod>();
1423 const DataType* conv_data = convect.template elements<LAFEM::Perspective::pod>();
1424 const DataType* primal_data = primal.template elements<LAFEM::Perspective::pod>();
1426 typedef typename Cubature::Rule<ShapeType, DataType, DataType>::PointType CubPointType;
1428 void* cub_pt_device = Util::cuda_malloc(
Index(num_cubs) *
sizeof(CubPointType));
1429 Util::cuda_copy_host_to_device(cub_pt_device, (
void*)cub_pt,
Index(num_cubs) *
sizeof(CubPointType));
1431 void* cub_wg_device = Util::cuda_malloc(
Index(num_cubs) *
sizeof(DataType));
1432 Util::cuda_copy_host_to_device(cub_wg_device, (
const void*)cub_wg,
Index(num_cubs) *
sizeof(DataType));
1434 VoxelAssembly::AssemblyCubatureData<DataType> d_cub_data = {cub_pt_device, (DataType*)cub_wg_device, num_cubs};
1435 VoxelAssembly::AssemblyMappingData<DataType, IndexType> d_mapping_data = this->mesh_data.get_assembly_field();
1436 auto burgers_params = this->wrap_burgers_params();
1437 auto material_params = wrap_material_params();
1440 this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha, burgers_params, material_params, material_type,
1441 this->shared_mem, this->blocksize, this->gridsize, this->print_occupancy);
1442 Util::cuda_free(cub_wg_device);
1443 Util::cuda_free(cub_pt_device);
1447 void assemble_matrix1_cuda(LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, dim>& DOXY(matrix),
const SpaceType& DOXY(space),
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* DOXY(cub_pt),
const DataType* DOXY(cub_wg),
int DOXY(num_cubs), DataType DOXY(alpha))
const
1449 XABORTM(
"What in the nine hells are you doing here?");
1452 void assemble_vector_cuda(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&,
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&,
const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&,
1453 const SpaceType&,
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType*,
const DataType*,
int, DataType)
const
1455 XABORTM(
"This call was logged and your local FBI agent was informed about your transgression");
#define XABORTM(msg)
Abortion macro definition with custom message.
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Cubature Rule class template.
Blocked Dense data vector class template.
Index size() const
The number of elements.
CSR based blocked sparse matrix.
Index rows() const
Retrieve matrix row count.
IT_ * col_ind()
Retrieve column indices array.
IT_ * row_ptr()
Retrieve row start index array.
Index columns() const
Retrieve matrix column count.
Standard Lagrange-2 Finite-Element space class template.
Index get_num_dofs() const
Returns the number of dofs.
Tiny Matrix class template.
Tiny Vector class template.
CUDA_HOST_DEVICE Vector & set_mat_vec_mult(const Matrix< T_, n_, m_, sma_, sna_ > &a, const Vector< T_, m_, sx_ > &x)
Sets this vector to the result of a matrix-vector product.
Q2Lagrange thread parallel assembly class for the burgers operator.
Burgers Voxel Assembly template.
MaterialType material_type
material type
DataType frechet_material
Scaling factor full material jacobian.
VoxelBurgersVeloMaterialAssembler(const SpaceType &space, const ColoringType_ &coloring, int hint=-1)
Constructor for burgers velo material assembler.
void assemble_matrix1(LAFEM::SparseMatrixBCSR< DataType, IndexType, dim, dim > &matrix, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &convect, const SpaceType &space, const CubatureFactory_ &cubature_factory, DataType alpha=DataType(1)) const
Assembles the burgers velo material operator for finitie element space with same test and trial space...
DataType mu_0
Material Parameter.
void assemble_vector(LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &vector, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &convect, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &primal, const SpaceType &space, const CubatureFactory_ &cubature_factory, DataType alpha=DataType(1)) const
Assembles the application of the burgers operator to a given primal vector.
VoxelAssembly::AssemblyMaterialData< DataType > wrap_material_params() const
Wraps the set burgers parameter into convenient struct.
BaseClass::SpaceType SpaceType
typedefs
DataType reg_eps
regularity parameter
Burgers Velocity Material Voxel Assembly template.
T_ sqrt(T_ x)
Returns the square-root of a value.
T_ abs(T_ x)
Returns the absolute value.
T_ pow(T_ x, T_ y)
Returns x raised to the power of y.
CUDA_HOST_DEVICE T_ dot(const T_ &a, const T_ &b)
Computes the dot-product of two scalars.
CUDA_HOST_DEVICE void axpy(T_ &y, const T_ &x, const T_ &alpha)
Performs an AXPY of two scalars.
void assemble_burgers_velo_material_defect_host(const Space_ &space, DT_ *vector_data, const DT_ *conv_data, const DT_ *primal_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType material_type)
Host kernel wrapper for the defect burgers assembler.
void assemble_burgers_velo_material_csr(const Space_ &space, const CSRMatrixData< DT_, IT_ > &matrix_data, const DT_ *conv_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType mat_type, int shared_mem, int blocksize, int gridsize, bool print_occupancy)
Device kernel wrapper for the full matrix burgers assembler.
void assemble_burgers_velo_material_csr_host(const Space_ &space, const CSRMatrixData< DT_, IT_ > &matrix_data, const DT_ *conv_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType material_type)
Host kernel wrapper for the full matrix burgers assembler.
void assemble_burgers_velo_material_defect(const Space_ &space, DT_ *vector_data, const DT_ *conv_data, const DT_ *primal_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType material_type, int shared_mem, int blocksize, int gridsize, bool print_occupancy)
Device kernel wrapper for the defect burgers assembler.
CUDA_HOST_DEVICE void burgers_velo_material_mat_assembly_kernel(LocMatType_ &loc_mat, const LocVecType_ &local_conv_dofs, const Tiny::Matrix< typename SpaceHelp_::DataType, dim_, num_verts_ > &local_coeffs, const typename SpaceHelp_::DomainPointType *cub_pt, const typename SpaceHelp_::DataType *cub_wg, const int num_cubs, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const VoxelAssembly::AssemblyMaterialData< typename SpaceHelp_::DataType > &material_params, const bool need_convection, const typename SpaceHelp_::DataType tol_eps, ViscFunc_ visco_func, ViscDerFunc_ visco_d_func)
Burgers Velo Material Matrix assembly kernel.
CUDA_HOST_DEVICE void burgers_velo_material_defect_assembly_kernel(LocVecType_ &loc_vec, const LocVecType_ &local_prim_dofs, const LocVecType_ &local_conv_dofs, const Tiny::Matrix< typename SpaceHelp_::DataType, dim_, num_verts_ > &local_coeffs, const typename SpaceHelp_::DomainPointType *cub_pt, const typename SpaceHelp_::DataType *cub_wg, const int num_cubs, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const bool need_convection, ViscFunc_ visco_func)
Burgers Vector assembly kernel.
MaterialType
Enum for different material types.
@ value
specifies whether the space should supply basis function values
std::uint64_t Index
Index data type.
@ dom_point
specifies whether the trafo should supply domain point coordinates
Data for burgers assembler.
A data field for a cubature rule.
A data field for all necessary values that define the dof mapping for assembly.
Data for material burgers assembler.
Helper struct wrapping the data required as shared data.