FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
burgers_velo_material_assembler.hpp
1#pragma once
2#ifndef FEAT_KERNEL_VOXEL_ASSEMBLY_BURGERS_VELO_MATERIAL_HPP
3#define FEAT_KERNEL_VOXEL_ASSEMBLY_BURGERS_VELO_MATERIAL_HPP 1
4
6#include <kernel/backend.hpp>
8#include <kernel/voxel_assembly/voxel_assembly_common.hpp>
9#include <kernel/voxel_assembly/helper/data_handler.hpp>
10#include <kernel/voxel_assembly/helper/space_helper.hpp>
11#include <kernel/cubature/rule.hpp>
12#include <kernel/lafem/sparse_matrix_bcsr.hpp>
13#include <kernel/lafem/dense_vector_blocked.hpp>
14#include <kernel/trafo/standard/mapping.hpp>
15#include <kernel/geometry/conformal_mesh.hpp>
16#include <kernel/util/tiny_algebra.hpp>
17#include <kernel/global/vector.hpp>
18#include <kernel/lafem/vector_mirror.hpp>
19#include <kernel/voxel_assembly/burgers_assembler.hpp>
20#ifdef FEAT_HAVE_CUDA
21#include <kernel/util/cuda_util.hpp>
22#endif
23
24namespace FEAT
25{
26 namespace VoxelAssembly
27 {
28
29 #if defined(FEAT_HAVE_CUDA) || defined(DOXYGEN)
31 template<typename SpaceHelp_, bool need_streamdiff_ = true>
33 {
34 typedef SpaceHelp_ SpaceHelp;
35 static constexpr int dim = SpaceHelp::dim;
36 typedef typename SpaceHelp::SpaceType SpaceType;
37 typedef typename SpaceHelp::DataType DataType;
38 //define local sizes
39 static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
40 // local vector and matrix defines
43
44 typename SpaceHelp::EvalData basis_data;
45 typename SpaceHelp::JacobianMatrixType loc_jac;
46 typename SpaceHelp::JacobianMatrixType loc_jac_inv;
47 MatValueType loc_grad_v;
48 VecValueType loc_v;
49 VecValueType mean_v;
50 typename SpaceHelp::DomainPointType dom_point;
52 DataType local_delta;
53 DataType nu_loc;
54 DataType gamma_dot;
55 DataType det;
56 DataType weight;
57 bool need_frechet;
58 };
59
60 template<typename SpaceHelp_>
61 struct BurgersMatSharedDataKernelWrapper<SpaceHelp_, false>
62 {
63 typedef SpaceHelp_ SpaceHelp;
64 static constexpr int dim = SpaceHelp::dim;
65 typedef typename SpaceHelp::SpaceType SpaceType;
66 typedef typename SpaceHelp::DataType DataType;
67 //define local sizes
68 static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
69 // local vector and matrix defines
72
73 typename SpaceHelp::EvalData basis_data;
74 typename SpaceHelp::JacobianMatrixType loc_jac;
75 typename SpaceHelp::JacobianMatrixType loc_jac_inv;
76 MatValueType loc_grad_v;
77 VecValueType loc_v;
78 typename SpaceHelp::DomainPointType dom_point;
79 DataType local_delta;
80 DataType nu_loc;
81 DataType gamma_dot;
82 DataType det;
83 DataType weight;
84 bool need_frechet;
85 };
86
87 #endif
97 template<typename Space_, typename DT_, typename IT_>
99
100 namespace Kernel
101 {
102
130 template<typename SpaceHelp_, typename LocMatType_, typename LocVecType_, int dim_, int num_verts_, typename ViscFunc_, typename ViscDerFunc_, bool need_stream_diff_>
131 CUDA_HOST_DEVICE void burgers_velo_material_mat_assembly_kernel(LocMatType_& loc_mat, const LocVecType_& local_conv_dofs, const Tiny::Matrix<typename SpaceHelp_::DataType, dim_, num_verts_>& local_coeffs,
132 const typename SpaceHelp_::DomainPointType* cub_pt, const typename SpaceHelp_::DataType* cub_wg, const int num_cubs,
135 const bool need_convection, const typename SpaceHelp_::DataType tol_eps,
136 ViscFunc_ visco_func, ViscDerFunc_ visco_d_func)
137 {
138 // disable unused variable warnings
139#ifdef __CUDACC__
140#pragma nv_diag_suppress = 177
141#pragma nv_diag_suppress = 550
142#endif
143
144 typedef SpaceHelp_ SpaceHelp;
145 constexpr int dim = SpaceHelp::dim;
146 typedef typename SpaceHelp::SpaceType SpaceType;
147 typedef typename SpaceHelp::DataType DataType;
148
149 constexpr bool need_streamline = need_stream_diff_;
150
151 // burgers params
152 // const DataType& nu{burgers_params.nu};
153 const DataType& theta{burgers_params.theta};
154 const DataType& beta{burgers_params.beta};
155 const DataType& frechet_beta{burgers_params.frechet_beta};
156 const DataType& sd_delta{burgers_params.sd_delta};
157 const DataType& sd_nu{burgers_params.sd_nu};
158 const DataType& sd_v_norm{burgers_params.sd_v_norm};
159 // formulation only makes sense with deformation formulation
160 // const bool& deformation{burgers_params.deformation};
161
162 // additional material parameters
163 const DataType& frechet_material{material_params.frechet_material};
164 const DataType& reg_eps{material_params.reg_eps};
165 const bool& need_frechet_material{material_params.need_frechet_material};
166
167 // #ifdef __CUDACC__
168 // const DataType tol_eps = CudaMath::cuda_get_sqrt_eps<DataType>();
169 // #else
170 // const DataType tol_eps = Math::sqrt(Math::eps<DataType>());
171 // #endif
172
173 // #ifdef __CUDACC__
174 // const bool need_streamline = (CudaMath::cuda_abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
175 // const bool need_convection = CudaMath::cuda_abs(beta) > DataType(0);
176 // #else
177 // const bool need_streamline = (Math::abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
178 // const bool need_convection = Math::abs(beta) > DataType(0);
179 // #endif
180
181
182 //define local sizes
183 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
184 //get number of nodes per element
185 // constexpr int num_loc_verts = SpaceType::MeshType::template IndexSet<dim, 0>::Type::num_indices;
186 // define local arrays
187 typename SpaceHelp::JacobianMatrixType loc_jac, loc_jac_inv;
188 typename SpaceHelp::EvalData basis_data;
189
190 // local vector and matrix defines
191 typedef Tiny::Vector<DataType, dim> VecValueType;
192 typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
193 VecValueType loc_v(DataType(0));
194 MatValueType loc_grad_v(DataType(0)), strain_rate_tensor_2(DataType(0));
195 DataType local_delta(DataType(0));
196 DataType nu_loc(DataType(0));
197 DataType gamma_dot(DataType(0));
198
199 // reenable unused variable warnings
200#ifdef __CUDACC__
201#pragma nv_diag_default = 550
202#pragma nv_diag_default = 177
203#endif
204
205 loc_mat.format();
206
207 if constexpr(need_streamline) //need streamdiff? constexpr since we need this relatively rarely
208 {
209 VecValueType mean_v(DataType(0));
210 //set domain point to barycenter, which is zero for Hypercubes
211 const VecValueType barycenter(DataType(0));
212 // NewSpaceHelp::_map_point(img_point, barycenter, local_coeffs);
213 //only reserve memory for reference values
214 SpaceHelp::eval_ref_values(basis_data, barycenter);
215 SpaceHelp::trans_values(basis_data);
216 for(int i(0); i < num_loc_dofs; ++i)
217 {
218 mean_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
219 }
220 const DataType local_norm_v = mean_v.norm_euclid();
221
222 if(local_norm_v > tol_eps)
223 {
224 //we need this trafo data -> for what??
225 // SpaceHelp::calc_jac_mat(loc_jac, barycenter, local_coeffs);
226 // loc_jac_inv.set_inverse(loc_jac);
227
228 const DataType local_h = SpaceHelp::width_directed(mean_v, local_coeffs) * local_norm_v;
229 const DataType local_re = (local_norm_v * local_h) / sd_nu;
230 local_delta = sd_delta * (local_h / sd_v_norm) * (DataType(2) * local_re) / (DataType(1) + local_re);
231 }
232 }
233
234 for(int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
235 {
236 const typename SpaceHelp::DomainPointType& dom_point = cub_pt[cub_ind];
237 SpaceHelp::eval_ref_values(basis_data, dom_point);
238 SpaceHelp::trans_values(basis_data);
239 // NewSpaceHelp::_map_point(img_point, dom_point, local_coeffs);//not needed?
240 SpaceHelp::calc_jac_mat(loc_jac, dom_point, local_coeffs);
241 loc_jac_inv.set_inverse(loc_jac);
242
243 SpaceHelp::eval_ref_gradients(basis_data, dom_point);
244 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);
245
246 const DataType weight = loc_jac.det() * cub_wg[cub_ind];
247 if(need_convection || need_streamline) // need streamdiff or convection?
248 {
249 loc_v.format();
250 for(int i = 0; i < num_loc_dofs; ++i)
251 {
252 loc_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
253 }
254 }
255 // assemble loc_grad_v, always required for this assembler
256 {
257 loc_grad_v.format();
258 for(int i = 0; i < num_loc_dofs; ++i)
259 {
260 loc_grad_v.add_outer_product(local_conv_dofs[i], basis_data.phi[i].grad);
261 }
262 }
263 // required terms for carreau assembly
264 strain_rate_tensor_2.set_transpose(loc_grad_v);
265 strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
266 #ifdef __CUDACC__
267 gamma_dot = CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
268 #else
269 gamma_dot = Math::sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
270 #endif
271
272 //default to nu ?
273 nu_loc = visco_func(gamma_dot);
274
275
276 // deformation assembly, always required
277 for(int i(0); i < num_loc_dofs; ++i)
278 {
279 // trial function loop
280 for(int j(0); j < num_loc_dofs; ++j)
281 {
282 // compute scalar value
283 const DataType value = nu_loc * weight * Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
284
285 // update local matrix
286 loc_mat[i][j].add_scalar_main_diag(value);
287 loc_mat[i][j].add_outer_product(basis_data.phi[j].grad, basis_data.phi[i].grad, nu_loc * weight);
288 }
289 }
290 // frechet carreau term "only" makes sense in defo formulation
291 if(need_frechet_material)
292 {
293 const DataType fac = frechet_material * visco_d_func(gamma_dot)/(gamma_dot + reg_eps);
294 //std::cout << fac ;
295 // test function loop
296 for(int i(0); i < num_loc_dofs; ++i)
297 {
298 Tiny::Vector<DataType, dim> du_grad_phi;
299 du_grad_phi.set_mat_vec_mult(strain_rate_tensor_2, basis_data.phi[i].grad);
300
301 // trial function loop
302 for(int j(0); j < num_loc_dofs; ++j)
303 {
304 Tiny::Vector<DataType, dim> du_grad_psi;
305 du_grad_psi.set_mat_vec_mult(strain_rate_tensor_2, basis_data.phi[j].grad);
306 // add outer product of grad(phi) and grad(psi)
307 loc_mat[i][j].add_outer_product(du_grad_phi, du_grad_psi, fac * weight);
308 }
309 }
310 }
311
312 if(need_convection) // assemble convection?
313 {
314 // test function loop
315 for(int i(0); i < num_loc_dofs; ++i)
316 {
317 // trial function loop
318 for(int j(0); j < num_loc_dofs; ++j)
319 {
320 // compute scalar value
321 const DataType value = beta * weight * basis_data.phi[i].value * Tiny::dot(loc_v, basis_data.phi[j].grad);
322
323 // update local matrix
324 loc_mat[i][j].add_scalar_main_diag(value);
325 }
326 }
327 }
328
329 // assemble convection Frechet?
330 #ifdef __CUDACC__
331 if(CudaMath::cuda_abs(frechet_beta) > DataType(0))
332 #else
333 if(Math::abs(frechet_beta) > DataType(0))
334 #endif
335 {
336 // test function loop
337 for(int i(0); i < num_loc_dofs; ++i)
338 {
339 // trial function loop
340 for(int j(0); j < num_loc_dofs; ++j)
341 {
342 // compute scalar value
343 const DataType value = frechet_beta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
344
345 // update local matrix
346 loc_mat[i][j].axpy(value, loc_grad_v);
347 }
348 }
349 }
350
351 // assemble reaction?
352 #ifdef __CUDACC__
353 if(CudaMath::cuda_abs(theta) > DataType(0))
354 #else
355 if(Math::abs(theta) > DataType(0))
356 #endif
357 {
358 // test function loop
359 for(int i(0); i < num_loc_dofs; ++i)
360 {
361 // trial function loop
362 for(int j(0); j < num_loc_dofs; ++j)
363 {
364 // compute scalar value
365 const DataType value = theta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
366
367 // update local matrix
368 loc_mat[i][j].add_scalar_main_diag(value);
369 }
370 }
371 }
372
373 if constexpr(need_streamline) // need streamdiff?
374 {
375 Tiny::Vector<DataType, num_loc_dofs> streamdiff_coeffs(DataType(0));
376 for(int i = 0; i < num_loc_dofs; ++i)
377 {
378 streamdiff_coeffs[i] = Tiny::dot(loc_v, basis_data.phi[i].grad);
379 }
380
381
382 // assemble streamline diffusion?
383 if((local_delta > tol_eps))
384 {
385 // test function loop
386 for(int i(0); i < num_loc_dofs; ++i)
387 {
388 // trial function loop
389 for(int j(0); j < num_loc_dofs; ++j)
390 {
391 // compute scalar value
392 const DataType value = local_delta * weight * streamdiff_coeffs[i] * streamdiff_coeffs[j];
393
394 // update local matrix
395 loc_mat[i][j].add_scalar_main_diag(value);
396 }
397 }
398 }
399 }
400 // next cubature point
401 }
402 }
403
404 #if defined(__CUDACC__)
429 template<typename ThreadGroup_, typename SpaceHelp_, typename ViscFunc_, typename ViscDerFunc_, bool need_stream_diff_>
430 CUDA_DEVICE __forceinline__ void grouped_burgers_velo_material_mat_assembly_kernel(const ThreadGroup_& tg, BurgersMatSharedDataKernelWrapper<SpaceHelp_, need_stream_diff_>* shared_wrapper,
431 int loc_assemble_size, int assemble_offset, typename SpaceHelp_::DataType* loc_mat, const typename SpaceHelp_::DataType* local_conv_dofs,
433 const typename SpaceHelp_::DomainPointType* cub_pt, const typename SpaceHelp_::DataType* cub_wg, const int num_cubs,
436 const bool need_convection, const typename SpaceHelp_::DataType tol_eps,
437 ViscFunc_ visco_func, ViscDerFunc_ visco_d_func)
438 {
439 // disable unused variable warnings
440#pragma nv_diag_suppress = 177
441#pragma nv_diag_suppress = 550
442
443 typedef SpaceHelp_ SpaceHelp;
444 constexpr int dim = SpaceHelp::dim;
445 typedef typename SpaceHelp::SpaceType SpaceType;
446 typedef typename SpaceHelp::DataType DataType;
447 constexpr bool need_streamline = need_stream_diff_;
448 // constexpr int num_loc_verts = SpaceHelp::num_verts;
449 const int t_idx = tg.thread_rank();
450 const int g_size = tg.num_threads();
451
452 // const DataType& nu{burgers_params.nu};
453 const DataType& theta{burgers_params.theta};
454 const DataType& beta{burgers_params.beta};
455 const DataType& frechet_beta{burgers_params.frechet_beta};
456 const DataType& sd_delta{burgers_params.sd_delta};
457 const DataType& sd_nu{burgers_params.sd_nu};
458 const DataType& sd_v_norm{burgers_params.sd_v_norm};
459
460 const DataType& frechet_material{material_params.frechet_material};
461 const DataType& reg_eps{material_params.reg_eps};
462 const bool& need_frechet_material{material_params.need_frechet_material};
463
464 // #ifdef __CUDACC__
465 // const DataType tol_eps = CudaMath::cuda_get_sqrt_eps<DataType>();
466 // #else
467 // const DataType tol_eps = Math::sqrt(Math::eps<DataType>());
468 // #endif
469
470 // #ifdef __CUDACC__
471 // const bool need_streamline = (CudaMath::cuda_abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
472 // const bool need_convection = CudaMath::cuda_abs(beta) > DataType(0);
473 // #else
474 // const bool need_streamline = (Math::abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
475 // const bool need_convection = Math::abs(beta) > DataType(0);
476 // #endif
477
478
479 //define local sizes
480 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
481 // local vector and matrix defines
482 typedef Tiny::Vector<DataType, dim> VecValueType;
483 typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
484 //get number of nodes per element
485 // constexpr int num_loc_verts = SpaceType::MeshType::template IndexSet<dim, 0>::Type::num_indices;
486 // define local arrays
487 // use extern shared for this?!
488 typedef BurgersMatSharedDataKernelWrapper<SpaceHelp, need_stream_diff_> BSDKWrapper;
489 typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
490 typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
491 MatValueType& loc_grad_v = shared_wrapper->loc_grad_v;
492 VecValueType& loc_v = shared_wrapper->loc_v;
493 VecValueType* mean_v_p = nullptr;
494 if constexpr(need_streamline) mean_v_p = &(shared_wrapper->mean_v);
495 typename SpaceHelp::DomainPointType& dom_point = shared_wrapper->dom_point;
496 Tiny::Vector<DataType, num_loc_dofs>* streamdiff_coeffs_p = nullptr;
497 if constexpr(need_streamline) streamdiff_coeffs_p = &(shared_wrapper->streamdiff_coeffs);
498 DataType& local_delta = shared_wrapper->local_delta;
499 DataType& nu_loc = shared_wrapper->nu_loc;
500 DataType& gamma_dot = shared_wrapper->gamma_dot;
501 DataType& det = shared_wrapper->det;
502 DataType& weight = shared_wrapper->weight;
503 typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
504 bool& need_frechet = shared_wrapper->need_frechet;
505
506 // local tmp strainrate value
507 MatValueType strain_rate_tensor_2(DataType(0));
508
509 VoxelAssembly::coalesced_format(tg, (unsigned int*) shared_wrapper, sizeof(BSDKWrapper)/(sizeof(unsigned int)));
510 tg.sync();
511
512 cg::invoke_one(tg, [&](){need_frechet = CudaMath::cuda_abs(frechet_beta) > DataType(0);});
513 tg.sync();
514
515 // reenable unused variable warnings
516#pragma nv_diag_default = 550
517#pragma nv_diag_default = 177
518
519 if constexpr(need_streamline) //need streamdiff?
520 {
521 cg::invoke_one(tg, [&]() {*mean_v_p = DataType(0);});
522 //set domain point to barycenter, which is zero for Hypercubes
523 // NewSpaceHelp::_map_point(img_point, barycenter, local_coeffs);
524 //only reserve memory for reference values
525 cg::invoke_one(tg, [&](){ const VecValueType bc(0);
526 SpaceHelp::eval_ref_values(basis_data, bc);
527 SpaceHelp::trans_values(basis_data);});
528
529 tg.sync();
530
531 for(int idx = t_idx; idx < num_loc_dofs; idx += g_size)
532 {
533 VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &(*mean_v_p)[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
534 }
535
536 tg.sync();
537
538 DataType local_norm_v = mean_v_p->norm_euclid();
539
540 if(local_norm_v > tol_eps)
541 {
542 cg::invoke_one(tg, [&](){
543 const DataType local_h = SpaceHelp::width_directed(*mean_v_p, local_coeffs) * local_norm_v;
544 const DataType local_re = (local_norm_v * local_h) / sd_nu;
545 local_delta = sd_delta * (local_h / sd_v_norm) * (DataType(2) * local_re) / (DataType(1) + local_re);
546 });
547 }
548 }
549
550 tg.sync();
551
552
553 for(int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
554 {
555 cg::invoke_one(tg, [&](){dom_point = cub_pt[cub_ind];
556 SpaceHelp::eval_ref_values(basis_data, dom_point);
557 SpaceHelp::trans_values(basis_data);});
558 // NewSpaceHelp::_map_point(img_point, dom_point, local_coeffs);//not needed?
559 tg.sync();
560 SpaceHelp::grouped_calc_jac_mat(tg, loc_jac, dom_point, &local_coeffs[0][0]);
561 tg.sync();
562 cg::invoke_one(tg, [&](){det = loc_jac.det();
563 weight = det * cub_wg[cub_ind];});
564 tg.sync();
565 loc_jac_inv.grouped_set_inverse(tg, loc_jac, det);
566 tg.sync();
567 cg::invoke_one(tg, [&](){
568 SpaceHelp::eval_ref_gradients(basis_data, dom_point);
569 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);});
570
571 tg.sync();
572 if(need_convection || need_streamline) // need streamdiff or convection?
573 {
574 VoxelAssembly::coalesced_format(tg, &loc_v[0], dim);
575 tg.sync();
576
577 for(int idx = t_idx; idx < num_loc_dofs; idx += g_size)
578 {
579 VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &loc_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
580 }
581 // tg.sync();
582 }
583
584 {
585 VoxelAssembly::coalesced_format(tg, &loc_grad_v[0][0], dim*dim);
586 tg.sync();
587 for(int i = t_idx; i < num_loc_dofs; i += g_size)
588 {
589 VoxelAssembly::grouped_add_outer_product(tg, &loc_grad_v[0][0], &local_conv_dofs[i*dim], basis_data.phi[i].grad);
590 }
591 // tg.sync();
592 }
593 tg.sync();
594 // assemble necessary values for material assembler
595 {
596 strain_rate_tensor_2.set_transpose(loc_grad_v);
597 strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
598 cg::invoke_one(tg, [&](){
599 // strain_rate_tensor_2.set_transpose(loc_grad_v);
600 // strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
601 gamma_dot = CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
602 nu_loc = visco_func(gamma_dot);
603 });
604 }
605
606
607 if constexpr(need_streamline) // need streamdiff?
608 {
609 tg.sync();
610 for(int i = t_idx; i < num_loc_dofs; i += g_size)
611 {
612 (*streamdiff_coeffs_p)[i] = Tiny::dot(loc_v, basis_data.phi[i].grad);
613 }
614 // tg.sync();
615 }
616
617 tg.sync();
618
619 const DataType fac = frechet_material * visco_d_func(gamma_dot)/(gamma_dot + reg_eps);
620
621 for(int idx = t_idx; idx < loc_assemble_size; idx += g_size)
622 {
623 // the thread local test and trial function indices
624 const int i = (idx+assemble_offset) / num_loc_dofs;
625 const int j = (idx+assemble_offset) % num_loc_dofs;
626
627 {
628 // compute scalar value
629 const DataType value = nu_loc * weight * Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
630
631 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(value);
632 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_outer_product(basis_data.phi[j].grad, basis_data.phi[i].grad, nu_loc * weight);
633 }
634
635 if(need_frechet_material)
636 {
637 Tiny::Vector<DataType, dim> du_grad_phi;
638 du_grad_phi.set_mat_vec_mult(strain_rate_tensor_2, basis_data.phi[i].grad);
639 Tiny::Vector<DataType, dim> du_grad_psi;
640 du_grad_psi.set_mat_vec_mult(strain_rate_tensor_2, basis_data.phi[j].grad);
641 // printf("from thread %i du grad psi %f, %f \n", idx, du_grad_psi[0], du_grad_psi[1]);
642 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_outer_product(du_grad_phi, du_grad_psi, fac*weight);
643
644 }
645
646 if(need_convection) // assemble convection?
647 {
648 // compute scalar value
649 const DataType value = beta * weight * basis_data.phi[i].value * VoxelAssembly::dot(basis_data.phi[j].grad, &loc_v[0]);
650
651 // update local matrix
652 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(value);
653 }
654
655 // assemble convection Frechet?
656 if(need_frechet)
657 {
658 // compute scalar value
659 const DataType value = frechet_beta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
660
661 // update local matrix
662 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->axpy(value, loc_grad_v);
663 }
664
665 // assemble reaction?
666 #ifdef __CUDACC__
667 if(CudaMath::cuda_abs(theta) > DataType(0))
668 #else
669 if(Math::abs(theta) > DataType(0))
670 #endif
671 {
672 // compute scalar value
673 const DataType value = theta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
674
675 // update local matrix
676 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(value);
677 }
678
679 // assemble streamline diffusion?
680 if constexpr(need_streamline)
681 {
682 if(local_delta > tol_eps)
683 {
684 // compute scalar value
685 const DataType value = local_delta * weight * (*streamdiff_coeffs_p)[i] * (*streamdiff_coeffs_p)[j];
686
687 // update local matrix
688 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(value);
689 }
690 }
691 }
692 // next cubature point
693 }
694 }
695 #endif
696
720 template<typename SpaceHelp_, typename LocVecType_, int dim_, int num_verts_, typename ViscFunc_>
721 CUDA_HOST_DEVICE void burgers_velo_material_defect_assembly_kernel(LocVecType_& loc_vec, const LocVecType_& local_prim_dofs, const LocVecType_& local_conv_dofs, const Tiny::Matrix<typename SpaceHelp_::DataType, dim_, num_verts_>& local_coeffs,
722 const typename SpaceHelp_::DomainPointType* cub_pt, const typename SpaceHelp_::DataType* cub_wg, const int num_cubs,
724 const bool need_convection, ViscFunc_ visco_func)
725 {
726 typedef SpaceHelp_ SpaceHelp;
727 constexpr int dim = SpaceHelp::dim;
728 typedef typename SpaceHelp::SpaceType SpaceType;
729 typedef typename SpaceHelp::DataType DataType;
730
731 // const DataType& nu{burgers_params.nu};
732 const DataType& theta{burgers_params.theta};
733 const DataType& beta{burgers_params.beta};
734 // const DataType& frechet_beta{burgers_params.frechet_beta};
735 // const DataType& sd_delta{burgers_params.sd_delta};
736 // const DataType& sd_nu{burgers_params.sd_nu};
737 // const DataType& sd_v_norm{burgers_params.sd_v_norm};
738 // const bool& deformation{burgers_params.deformation};
739
740 // #ifdef __CUDACC__
741 // const DataType tol_eps = CudaMath::cuda_get_sqrt_eps<DataType>();
742 // #else
743 // const DataType tol_eps = Math::sqrt(Math::eps<DataType>());
744 // #endif
745
746 // #ifdef __CUDACC__
747 // const bool need_streamline = (CudaMath::cuda_abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
748 // const bool need_convection = CudaMath::cuda_abs(beta) > DataType(0);
749 // #else
750 // const bool need_streamline = (Math::abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
751 // const bool need_convection = Math::abs(beta) > DataType(0);
752 // #endif
753
754
755 //define local sizes
756 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
757 //get number of nodes per element
758 // constexpr int num_loc_verts = SpaceType::MeshType::template IndexSet<dim, 0>::Type::num_indices;
759 // define local arrays
760 typename SpaceHelp::JacobianMatrixType loc_jac, loc_jac_inv;
761 typename SpaceHelp::EvalData basis_data;
762
763 // local vector and matrix defines
764 typedef Tiny::Vector<DataType, dim> VecValueType;
765 typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
766
767 VecValueType loc_v(DataType(0));
768 DataType nu_loc(DataType(0));
769
770 loc_vec.format();
771
772 Tiny::Vector<DataType, num_loc_dofs> streamdiff_coeffs(DataType(0));
773
774 for(int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
775 {
776 const typename SpaceHelp::DomainPointType& dom_point = cub_pt[cub_ind];
777 SpaceHelp::eval_ref_values(basis_data, dom_point);
778 SpaceHelp::trans_values(basis_data);
779 // NewSpaceHelp::_map_point(img_point, dom_point, local_coeffs);//not needed?
780 SpaceHelp::calc_jac_mat(loc_jac, dom_point, local_coeffs);
781 loc_jac_inv.set_inverse(loc_jac);
782
783 SpaceHelp::eval_ref_gradients(basis_data, dom_point);
784 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);
785
786 const DataType weight = loc_jac.det() * cub_wg[cub_ind];
787 if(need_convection) // need streamdiff or convection?
788 {
789 loc_v.format();
790 for(int i = 0; i < num_loc_dofs; ++i)
791 {
792 loc_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
793 }
794 }
795
796 // assemble loc_grad_v, always required for this assembler
797 {
798 MatValueType loc_grad_v(DataType(0)), strain_rate_tensor_2(DataType(0));
799 loc_grad_v.format();
800 for(int i = 0; i < num_loc_dofs; ++i)
801 {
802 loc_grad_v.add_outer_product(local_conv_dofs[i], basis_data.phi[i].grad);
803 }
804 // required terms for carreau assembly
805 strain_rate_tensor_2.set_transpose(loc_grad_v);
806 strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
807 #ifdef __CUDACC__
808 nu_loc = visco_func(CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius());
809 #else
810 nu_loc = visco_func(Math::sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius());
811 #endif
812
813 }
814
815 // always deformation
816 for(int i(0); i < num_loc_dofs; ++i)
817 {
818 // trial function loop
819 for(int j(0); j < num_loc_dofs; ++j)
820 {
821 // compute scalar values
822 const DataType value1 = nu_loc * weight * Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
823 const DataType value2 = nu_loc * weight * Tiny::dot(local_prim_dofs[j], basis_data.phi[i].grad);
824 // update local vector
825 loc_vec[i].axpy(value1, local_prim_dofs[j]);
826 loc_vec[i].axpy(value2, basis_data.phi[j].grad);
827 }
828 }
829
830 if(need_convection) // assemble convection?
831 {
832 // test function loop
833 for(int i(0); i < num_loc_dofs; ++i)
834 {
835 // trial function loop
836 for(int j(0); j < num_loc_dofs; ++j)
837 {
838 // compute scalar value
839 const DataType value = beta * weight * basis_data.phi[i].value * Tiny::dot(loc_v, basis_data.phi[j].grad);
840
841 // update local matrix
842 loc_vec[i].axpy(value, local_prim_dofs[j]);
843 }
844 }
845 }
846
847 // assemble reaction?
848 #ifdef __CUDACC__
849 if(CudaMath::cuda_abs(theta) > DataType(0))
850 #else
851 if(Math::abs(theta) > DataType(0))
852 #endif
853 {
854 // test function loop
855 for(int i(0); i < num_loc_dofs; ++i)
856 {
857 // trial function loop
858 for(int j(0); j < num_loc_dofs; ++j)
859 {
860 // compute scalar value
861 const DataType value = theta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
862
863 // update local matrix
864 loc_vec[i].axpy(value, local_prim_dofs[j]);
865 }
866 }
867 }
868 // next cubature point
869 }
870 }
871
872 #ifdef __CUDACC__
883 template<typename ThreadGroup_, typename SpaceHelp_, typename ViscFunc_>
884 CUDA_DEVICE void grouped_burgers_velo_material_defect_assembly_kernel(const ThreadGroup_& tg, BurgersMatSharedDataKernelWrapper<SpaceHelp_, false>* shared_wrapper, int loc_assemble_size, int assemble_offset,
885 typename SpaceHelp_::DataType* loc_vec, const typename SpaceHelp_::DataType* local_prim_dofs, const typename SpaceHelp_::DataType* local_conv_dofs,
887 const typename SpaceHelp_::DomainPointType* cub_pt, const typename SpaceHelp_::DataType* cub_wg, const int num_cubs,
889 const bool need_convection, ViscFunc_ visco_func)
890 {
891 typedef SpaceHelp_ SpaceHelp;
892 constexpr int dim = SpaceHelp::dim;
893 typedef typename SpaceHelp::SpaceType SpaceType;
894 typedef typename SpaceHelp::DataType DataType;
895 const int t_idx = tg.thread_rank();
896 const int g_size = tg.num_threads();
897
898 //const DataType& nu{burgers_params.nu};
899 const DataType& theta{burgers_params.theta};
900 const DataType& beta{burgers_params.beta};
901 // const DataType& frechet_beta{burgers_params.frechet_beta};
902 // const DataType& sd_delta{burgers_params.sd_delta};
903 // const DataType& sd_nu{burgers_params.sd_nu};
904 // const DataType& sd_v_norm{burgers_params.sd_v_norm};
905 // const bool& deformation{burgers_params.deformation};
906
907 // #ifdef __CUDACC__
908 // const DataType tol_eps = CudaMath::cuda_get_sqrt_eps<DataType>();
909 // #else
910 // const DataType tol_eps = Math::sqrt(Math::eps<DataType>());
911 // #endif
912
913 // #ifdef __CUDACC__
914 // const bool need_streamline = (CudaMath::cuda_abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
915 // const bool need_convection = CudaMath::cuda_abs(beta) > DataType(0);
916 // #else
917 // const bool need_streamline = (Math::abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
918 // const bool need_convection = Math::abs(beta) > DataType(0);
919 // #endif
920
921
922 //define local sizes
923 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
924 //get number of nodes per element
925 // constexpr int num_loc_verts = SpaceType::MeshType::template IndexSet<dim, 0>::Type::num_indices;
926 // define local arrays
927 typedef BurgersMatSharedDataKernelWrapper<SpaceHelp, false> BSDKWrapper;
928 typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
929 typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
930
931 DataType& det = shared_wrapper->det;
932 DataType& weight = shared_wrapper->weight;
933 DataType& nu_loc = shared_wrapper->nu_loc;
934 DataType& gamma_dot = shared_wrapper->gamma_dot;
935 typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
936 // local vector and matrix defines
937 typedef Tiny::Vector<DataType, dim> VecValueType;
938 typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
939
940 MatValueType& loc_grad_v = shared_wrapper->loc_grad_v;
941 VecValueType& loc_v = shared_wrapper->loc_v;
942
943 typename SpaceHelp::DomainPointType& dom_point = shared_wrapper->dom_point;
944
945 VoxelAssembly::coalesced_format(tg, (unsigned int*) shared_wrapper, sizeof(BSDKWrapper)/(sizeof(unsigned int)));
946 tg.sync();
947 // 4 threads should work on one entry, requires block size to be mutliple of 4
948 cg::thread_block_tile<4> tile4 = cg::tiled_partition<4>(tg);
949
950 for(int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
951 {
952 tg.sync();
953 cg::invoke_one(tg, [&](){dom_point = cub_pt[cub_ind];
954 SpaceHelp::eval_ref_values(basis_data, dom_point);
955 SpaceHelp::trans_values(basis_data);});
956 // NewSpaceHelp::_map_point(img_point, dom_point, local_coeffs);//not needed?
957 tg.sync();
958 SpaceHelp::grouped_calc_jac_mat(tg, loc_jac, dom_point, &local_coeffs[0][0]);
959 tg.sync();
960 cg::invoke_one(tg, [&](){det = loc_jac.det();
961 weight = det * cub_wg[cub_ind];});
962 tg.sync();
963 loc_jac_inv.grouped_set_inverse(tg, loc_jac, det);
964 tg.sync();
965 cg::invoke_one(tg, [&](){
966 SpaceHelp::eval_ref_gradients(basis_data, dom_point);
967 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);});
968
969 tg.sync();
970 if(need_convection) // need streamdiff or convection?
971 {
972 VoxelAssembly::coalesced_format(tg, &loc_v[0], dim);
973 tg.sync();
974
975 for(int idx = t_idx; idx < num_loc_dofs; idx += g_size)
976 {
977 VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &loc_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
978 }
979 }
980 tg.sync();
981 // assemble necessary values for material assembler
982 {
983 VoxelAssembly::coalesced_format(tg, &loc_grad_v[0][0], dim*dim);
984 tg.sync();
985 for(int i = t_idx; i < num_loc_dofs; i += g_size)
986 {
987 VoxelAssembly::grouped_add_outer_product(tg, &loc_grad_v[0][0], &local_conv_dofs[i*dim], basis_data.phi[i].grad);
988 }
989 tg.sync();
990 cg::invoke_one(tg, [&](){
991 MatValueType strain_rate_tensor_2(DataType(0));
992 strain_rate_tensor_2.set_transpose(loc_grad_v);
993 strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
994 gamma_dot = CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
995 nu_loc = visco_func(gamma_dot);
996 });
997 }
998
999 tg.sync();
1000
1001 for(int idx = tile4.meta_group_rank(); idx < loc_assemble_size*dim; idx += tile4.meta_group_size())
1002 {
1003 // compute scalar value
1004 DataType val = DataType(0);
1005 // the thread local test function index
1006 const int i = (idx/dim+assemble_offset);
1007 const int ii = idx%dim;
1008 {
1009 for(int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
1010 {
1011 // compute scalar values
1012 val += nu_loc * weight * (Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad) * local_prim_dofs[j*dim + ii]
1013 + Tiny::dot(((Tiny::Vector<DataType, dim>*)local_prim_dofs)[j], basis_data.phi[i].grad) * basis_data.phi[j].grad[ii]);
1014 }
1015 }
1016
1017 if(need_convection) // assemble convection?
1018 {
1019 for(int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
1020 {
1021 // compute scalar values
1022 val += beta * weight * basis_data.phi[i].value * Tiny::dot(loc_v, basis_data.phi[j].grad) * local_prim_dofs[j*dim + ii];
1023 }
1024 }
1025
1026 // assemble reaction?
1027 #ifdef __CUDACC__
1028 if(CudaMath::cuda_abs(theta) > DataType(0))
1029 #else
1030 if(Math::abs(theta) > DataType(0))
1031 #endif
1032 {
1033 for(int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
1034 {
1035 // compute scalar values
1036 val += theta * weight * basis_data.phi[i].value * basis_data.phi[j].value * local_prim_dofs[j*dim + ii];
1037 }
1038 }
1039 // reduce result and store value with async call
1040 // tile4.sync();
1041 cuda::atomic_ref<DataType, cuda::thread_scope_block> a_ref(*(loc_vec+idx));
1042 cg::reduce_update_async(tile4, a_ref, val, cg::plus<DataType>());
1043 }
1044 // next cubature point
1045 }
1046 }
1047 #endif
1048 } // namespace Kernel
1049
1050 namespace Arch
1051 {
1052 #if defined(FEAT_HAVE_CUDA) || defined(DOXYGEN)
1074 template<typename Space_, typename DT_, typename IT_>
1075 void assemble_burgers_velo_material_csr(const Space_& space,
1076 const CSRMatrixData<DT_, IT_>& matrix_data,
1077 const DT_* conv_data,
1078 const AssemblyCubatureData<DT_>& cubature,
1079 const AssemblyMappingData<DT_, IT_>& dof_mapping,
1080 const std::vector<int*>& coloring_maps,
1081 const std::vector<Index>& coloring_map_sizes,
1082 DT_ alpha, const AssemblyBurgersData<DT_>& burgers_params,
1083 const AssemblyMaterialData<DT_>& material_params,
1084 MaterialType mat_type, int shared_mem, int blocksize, int gridsize, bool print_occupancy);
1085
1108 template<typename Space_, typename DT_, typename IT_>
1110 DT_* vector_data,
1111 const DT_* conv_data,
1112 const DT_* primal_data,
1113 const AssemblyCubatureData<DT_>& cubature,
1114 const AssemblyMappingData<DT_, IT_>& dof_mapping,
1115 const std::vector<int*>& coloring_maps,
1116 const std::vector<Index>& coloring_map_sizes,
1117 DT_ alpha, const AssemblyBurgersData<DT_>& burgers_params,
1118 const AssemblyMaterialData<DT_>& material_params,
1119 MaterialType material_type,
1120 int shared_mem, int blocksize, int gridsize, bool print_occupancy);
1121
1122 #endif
1123
1145 template<typename Space_, typename DT_, typename IT_>
1146 void assemble_burgers_velo_material_csr_host(const Space_& space,
1147 const CSRMatrixData<DT_, IT_>& matrix_data,
1148 const DT_* conv_data,
1149 const AssemblyCubatureData<DT_>& cubature,
1150 const AssemblyMappingData<DT_, IT_>& dof_mapping,
1151 const std::vector<int*>& coloring_maps,
1152 const std::vector<Index>& coloring_map_sizes,
1153 DT_ alpha, const AssemblyBurgersData<DT_>& burgers_params,
1154 const AssemblyMaterialData<DT_>& material_params,
1155 MaterialType material_type);
1156
1179 template<typename Space_, typename DT_, typename IT_>
1180 void assemble_burgers_velo_material_defect_host(const Space_& space,
1181 DT_* vector_data,
1182 const DT_* conv_data,
1183 const DT_* primal_data,
1184 const AssemblyCubatureData<DT_>& cubature,
1185 const AssemblyMappingData<DT_, IT_>& dof_mapping,
1186 const std::vector<int*>& coloring_maps,
1187 const std::vector<Index>& coloring_map_sizes,
1188 DT_ alpha, const AssemblyBurgersData<DT_>& burgers_params,
1189 const AssemblyMaterialData<DT_>& material_params,
1190 MaterialType material_type);
1191
1192
1193 }
1194
1195
1196#ifndef __CUDACC__
1209 template<int dim_, typename DT_, typename IT_>
1211 public VoxelBurgersAssembler<Q2StandardHyperCube<dim_>, DT_, IT_>
1212 {
1213 public:
1217 typedef typename BaseClass::DataHandler DataHandler;
1218 typedef typename BaseClass::SpaceHelp SpaceHelp;
1219 typedef typename SpaceHelp::ShapeType ShapeType;
1220 typedef typename SpaceHelp::DataType DataType;
1221 typedef typename SpaceHelp::IndexType IndexType;
1222
1224 static constexpr int dim = SpaceHelp::dim;
1225
1226 typedef typename SpaceHelp::DomainPointType DomainPointType;
1227 typedef typename SpaceHelp::ImagePointType ImagePointType;
1228 typedef typename SpaceHelp::ValueType ValueType;
1229 typedef typename SpaceHelp::JacobianMatrixType JacobianMatrixType;
1230
1233
1235 DataType reg_eps;
1236
1238 DataType mu_0;
1239 DataType exp;
1240 DataType lambda;
1241 DataType a_T;
1242 DataType yasuda_a;
1243 DataType mu_inf;
1244
1247
1248
1249 public:
1250 explicit VoxelBurgersVeloMaterialAssembler() = default;
1251
1265 template<typename ColoringType_>
1266 explicit VoxelBurgersVeloMaterialAssembler(const SpaceType& space, const ColoringType_& coloring, int hint = -1) :
1267 BaseClass(space, coloring, hint),
1268 frechet_material(DataType(0)), reg_eps(DataType(1E-100)), material_type(MaterialType::carreau)
1269 {
1270 }
1271
1272 // rule of 5
1274
1276
1278
1280
1282
1289 {
1290 return VoxelAssembly::AssemblyMaterialData<DataType>{frechet_material, reg_eps, mu_0, exp, lambda, a_T, yasuda_a, mu_inf, frechet_material>Math::pow(Math::eps<DataType>(), DataType(0.9))};
1291 }
1292
1304 template<typename CubatureFactory_>
1306 const SpaceType& space, const CubatureFactory_& cubature_factory, DataType alpha = DataType(1)) const
1307 {
1308 XASSERTM(matrix.rows() == space.get_num_dofs(), "invalid matrix dimensions");
1309 XASSERTM(matrix.columns() == space.get_num_dofs(), "invalid matrix dimensions");
1310 XASSERTM(convect.size() == space.get_num_dofs(), "invalid vector size");
1311
1312 //define cubature
1313 typedef Cubature::Rule<ShapeType, DataType, DataType> CubatureRuleType;
1314 CubatureRuleType cubature(Cubature::ctor_factory, cubature_factory);
1315
1316 //get cubature points and weights
1317 int num_cubs = cubature.get_num_points();
1318 typename CubatureRuleType::PointType* cub_pt = cubature.get_points();
1319 DataType* cub_wg = cubature.get_weights();
1320
1321 BACKEND_SKELETON_VOID(assemble_matrix1_cuda, assemble_matrix1_generic, assemble_matrix1_generic, matrix, convect, space, cub_pt, cub_wg, num_cubs, alpha)
1322 }
1323
1336 template<typename CubatureFactory_>
1338 const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& primal, const SpaceType& space, const CubatureFactory_& cubature_factory, DataType alpha = DataType(1)) const
1339 {
1340 XASSERTM(vector.size() == space.get_num_dofs(), "invalid vector size");
1341 XASSERTM(convect.size() == space.get_num_dofs(), "invalid vector size");
1342
1343 //define cubature
1344 typedef Cubature::Rule<ShapeType, DataType, DataType> CubatureRuleType;
1345 CubatureRuleType cubature(Cubature::ctor_factory, cubature_factory);
1346
1347 //get cubature points and weights
1348 int num_cubs = cubature.get_num_points();
1349 typename CubatureRuleType::PointType* cub_pt = cubature.get_points();
1350 DataType* cub_wg = cubature.get_weights();
1351
1352 BACKEND_SKELETON_VOID(assemble_vector_cuda, assemble_vector_generic, assemble_vector_generic, vector, convect, primal, space, cub_pt, cub_wg, num_cubs, alpha)
1353 }
1354
1356 const SpaceType& space, typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, const DataType* cub_wg, int num_cubs, DataType alpha) const
1357 {
1358 VoxelAssembly::CSRMatrixData<DataType, IndexType> mat_data = {matrix.template val<LAFEM::Perspective::pod>(), matrix.row_ptr(), matrix.col_ind(), matrix.rows(), matrix.columns()};
1359 const DataType* vec_data = convect.template elements<LAFEM::Perspective::pod>();
1360
1361 VoxelAssembly::AssemblyCubatureData<DataType> cub_data = {(void*)cub_pt, cub_wg, num_cubs};
1362 VoxelAssembly::AssemblyMappingData<DataType, IndexType> mapping_data = this->mesh_data.get_assembly_field();
1363 auto burgers_params = this->wrap_burgers_params();
1364 auto material_params = this->wrap_material_params();
1365
1366
1367 VoxelAssembly::Arch::assemble_burgers_velo_material_csr_host(space, mat_data, vec_data, cub_data, mapping_data,
1368 this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha,
1369 burgers_params, material_params, material_type);
1370 }
1371
1372 void assemble_vector_generic(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& vector, const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect, const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& primal,
1373 const SpaceType& space, typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, const DataType* cub_wg, int num_cubs, DataType alpha) const
1374 {
1375 DataType* vec_data = vector.template elements<LAFEM::Perspective::pod>();
1376 const DataType* conv_data = convect.template elements<LAFEM::Perspective::pod>();
1377 const DataType* primal_data = primal.template elements<LAFEM::Perspective::pod>();
1378
1379 VoxelAssembly::AssemblyCubatureData<DataType> cub_data = {(void*)cub_pt, cub_wg, num_cubs};
1380 VoxelAssembly::AssemblyMappingData<DataType, IndexType> mapping_data = this->mesh_data.get_assembly_field();
1381 auto burgers_params = this->wrap_burgers_params();
1382 auto material_params = wrap_material_params();
1383
1384
1385 VoxelAssembly::Arch::assemble_burgers_velo_material_defect_host(space, vec_data, conv_data, primal_data, cub_data, mapping_data,
1386 this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha,
1387 burgers_params, material_params, material_type);
1388 //free resources
1389 }
1390
1391 #ifdef FEAT_HAVE_CUDA
1392 void assemble_matrix1_cuda(LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, dim>& matrix, const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect,
1393 const SpaceType& space, typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, const DataType* cub_wg, int num_cubs, DataType alpha) const
1394 {
1395 VoxelAssembly::CSRMatrixData<DataType, IndexType> mat_data = {matrix.template val<LAFEM::Perspective::pod>(), matrix.row_ptr(), matrix.col_ind(), matrix.rows(), matrix.columns()};
1396 const DataType* vec_data = convect.template elements<LAFEM::Perspective::pod>();
1397
1398 typedef typename Cubature::Rule<ShapeType, DataType, DataType>::PointType CubPointType;
1399 //initialize all necessary pointer arrays and values //maybe more sense to specify cubature rule and set this to a const mem location?
1400 void* cub_pt_device = Util::cuda_malloc(Index(num_cubs) * sizeof(CubPointType));
1401 Util::cuda_copy_host_to_device(cub_pt_device, (void*)cub_pt, Index(num_cubs) * sizeof(CubPointType));
1402
1403 void* cub_wg_device = Util::cuda_malloc(Index(num_cubs) * sizeof(DataType));
1404 Util::cuda_copy_host_to_device(cub_wg_device, (const void*)cub_wg, Index(num_cubs) * sizeof(DataType));
1405
1406 VoxelAssembly::AssemblyCubatureData<DataType> d_cub_data = {cub_pt_device, (DataType*)cub_wg_device, num_cubs};
1407 VoxelAssembly::AssemblyMappingData<DataType, IndexType> d_mapping_data = this->mesh_data.get_assembly_field();
1408 auto burgers_params = this->wrap_burgers_params();
1409 auto material_params = wrap_material_params();
1410
1411 VoxelAssembly::Arch::assemble_burgers_velo_material_csr(space, mat_data, vec_data, d_cub_data, d_mapping_data,
1412 this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha, burgers_params, material_params, material_type,
1413 this->shared_mem, this->blocksize, this->gridsize, this->print_occupancy);
1414 Util::cuda_free(cub_wg_device);
1415 Util::cuda_free(cub_pt_device);
1416 }
1417
1418
1419 void assemble_vector_cuda(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& vector, const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect, const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& primal,
1420 const SpaceType& space, typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, const DataType* cub_wg, int num_cubs, DataType alpha) const
1421 {
1422 DataType* vec_data = vector.template elements<LAFEM::Perspective::pod>();
1423 const DataType* conv_data = convect.template elements<LAFEM::Perspective::pod>();
1424 const DataType* primal_data = primal.template elements<LAFEM::Perspective::pod>();
1425
1426 typedef typename Cubature::Rule<ShapeType, DataType, DataType>::PointType CubPointType;
1427 //initialize all necessary pointer arrays and values //maybe more sense to specify cubature rule and set this to a const mem location?
1428 void* cub_pt_device = Util::cuda_malloc(Index(num_cubs) * sizeof(CubPointType));
1429 Util::cuda_copy_host_to_device(cub_pt_device, (void*)cub_pt, Index(num_cubs) * sizeof(CubPointType));
1430
1431 void* cub_wg_device = Util::cuda_malloc(Index(num_cubs) * sizeof(DataType));
1432 Util::cuda_copy_host_to_device(cub_wg_device, (const void*)cub_wg, Index(num_cubs) * sizeof(DataType));
1433
1434 VoxelAssembly::AssemblyCubatureData<DataType> d_cub_data = {cub_pt_device, (DataType*)cub_wg_device, num_cubs};
1435 VoxelAssembly::AssemblyMappingData<DataType, IndexType> d_mapping_data = this->mesh_data.get_assembly_field();
1436 auto burgers_params = this->wrap_burgers_params();
1437 auto material_params = wrap_material_params();
1438
1439 VoxelAssembly::Arch::assemble_burgers_velo_material_defect(space, vec_data, conv_data, primal_data, d_cub_data, d_mapping_data,
1440 this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha, burgers_params, material_params, material_type,
1441 this->shared_mem, this->blocksize, this->gridsize, this->print_occupancy);
1442 Util::cuda_free(cub_wg_device);
1443 Util::cuda_free(cub_pt_device);
1444 }
1445
1446 #else //FEAT_HAVE_CUDA
1447 void assemble_matrix1_cuda(LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, dim>& DOXY(matrix), const SpaceType& DOXY(space), typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* DOXY(cub_pt), const DataType* DOXY(cub_wg), int DOXY(num_cubs), DataType DOXY(alpha)) const
1448 {
1449 XABORTM("What in the nine hells are you doing here?");
1450 }
1451
1452 void assemble_vector_cuda(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&, const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&, const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&,
1453 const SpaceType&, typename Cubature::Rule<ShapeType, DataType, DataType>::PointType*, const DataType*, int, DataType) const
1454 {
1455 XABORTM("This call was logged and your local FBI agent was informed about your transgression");
1456 }
1457
1458 #endif
1459
1460 }; // class VoxelBurgersAssembler<Q2StandardCube>
1461#endif
1462 }
1463}
1464
1465#endif
#define XABORTM(msg)
Abortion macro definition with custom message.
Definition: assertion.hpp:192
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Definition: assertion.hpp:263
FEAT Kernel base header.
Cubature Rule class template.
Definition: rule.hpp:38
Blocked Dense data vector class template.
Index size() const
The number of elements.
CSR based blocked sparse matrix.
Index rows() const
Retrieve matrix row count.
IT_ * col_ind()
Retrieve column indices array.
IT_ * row_ptr()
Retrieve row start index array.
Index columns() const
Retrieve matrix column count.
Standard Lagrange-2 Finite-Element space class template.
Definition: element.hpp:39
Index get_num_dofs() const
Returns the number of dofs.
Definition: element.hpp:121
Tiny Matrix class template.
Tiny Vector class template.
CUDA_HOST_DEVICE Vector & set_mat_vec_mult(const Matrix< T_, n_, m_, sma_, sna_ > &a, const Vector< T_, m_, sx_ > &x)
Sets this vector to the result of a matrix-vector product.
Q2Lagrange thread parallel assembly class for the burgers operator.
Burgers Voxel Assembly template.
VoxelBurgersVeloMaterialAssembler(const SpaceType &space, const ColoringType_ &coloring, int hint=-1)
Constructor for burgers velo material assembler.
void assemble_matrix1(LAFEM::SparseMatrixBCSR< DataType, IndexType, dim, dim > &matrix, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &convect, const SpaceType &space, const CubatureFactory_ &cubature_factory, DataType alpha=DataType(1)) const
Assembles the burgers velo material operator for finitie element space with same test and trial space...
void assemble_vector(LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &vector, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &convect, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &primal, const SpaceType &space, const CubatureFactory_ &cubature_factory, DataType alpha=DataType(1)) const
Assembles the application of the burgers operator to a given primal vector.
VoxelAssembly::AssemblyMaterialData< DataType > wrap_material_params() const
Wraps the set burgers parameter into convenient struct.
Burgers Velocity Material Voxel Assembly template.
T_ sqrt(T_ x)
Returns the square-root of a value.
Definition: math.hpp:300
T_ abs(T_ x)
Returns the absolute value.
Definition: math.hpp:275
T_ pow(T_ x, T_ y)
Returns x raised to the power of y.
Definition: math.hpp:643
CUDA_HOST_DEVICE T_ dot(const T_ &a, const T_ &b)
Computes the dot-product of two scalars.
CUDA_HOST_DEVICE void axpy(T_ &y, const T_ &x, const T_ &alpha)
Performs an AXPY of two scalars.
void assemble_burgers_velo_material_defect_host(const Space_ &space, DT_ *vector_data, const DT_ *conv_data, const DT_ *primal_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType material_type)
Host kernel wrapper for the defect burgers assembler.
void assemble_burgers_velo_material_csr(const Space_ &space, const CSRMatrixData< DT_, IT_ > &matrix_data, const DT_ *conv_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType mat_type, int shared_mem, int blocksize, int gridsize, bool print_occupancy)
Device kernel wrapper for the full matrix burgers assembler.
void assemble_burgers_velo_material_csr_host(const Space_ &space, const CSRMatrixData< DT_, IT_ > &matrix_data, const DT_ *conv_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType material_type)
Host kernel wrapper for the full matrix burgers assembler.
void assemble_burgers_velo_material_defect(const Space_ &space, DT_ *vector_data, const DT_ *conv_data, const DT_ *primal_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType material_type, int shared_mem, int blocksize, int gridsize, bool print_occupancy)
Device kernel wrapper for the defect burgers assembler.
CUDA_HOST_DEVICE void burgers_velo_material_mat_assembly_kernel(LocMatType_ &loc_mat, const LocVecType_ &local_conv_dofs, const Tiny::Matrix< typename SpaceHelp_::DataType, dim_, num_verts_ > &local_coeffs, const typename SpaceHelp_::DomainPointType *cub_pt, const typename SpaceHelp_::DataType *cub_wg, const int num_cubs, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const VoxelAssembly::AssemblyMaterialData< typename SpaceHelp_::DataType > &material_params, const bool need_convection, const typename SpaceHelp_::DataType tol_eps, ViscFunc_ visco_func, ViscDerFunc_ visco_d_func)
Burgers Velo Material Matrix assembly kernel.
CUDA_HOST_DEVICE void burgers_velo_material_defect_assembly_kernel(LocVecType_ &loc_vec, const LocVecType_ &local_prim_dofs, const LocVecType_ &local_conv_dofs, const Tiny::Matrix< typename SpaceHelp_::DataType, dim_, num_verts_ > &local_coeffs, const typename SpaceHelp_::DomainPointType *cub_pt, const typename SpaceHelp_::DataType *cub_wg, const int num_cubs, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const bool need_convection, ViscFunc_ visco_func)
Burgers Vector assembly kernel.
MaterialType
Enum for different material types.
FEAT namespace.
Definition: adjactor.hpp:12
@ value
specifies whether the space should supply basis function values
std::uint64_t Index
Index data type.
@ dom_point
specifies whether the trafo should supply domain point coordinates
A data field for all necessary values that define the dof mapping for assembly.
Data for material burgers assembler.
Helper struct wrapping the data required as shared data.