FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
burgers_velo_material_assembler.hpp
1#pragma once
2#ifndef FEAT_KERNEL_VOXEL_ASSEMBLY_BURGERS_VELO_MATERIAL_HPP
3#define FEAT_KERNEL_VOXEL_ASSEMBLY_BURGERS_VELO_MATERIAL_HPP 1
4
6#include <kernel/backend.hpp>
8#include <kernel/voxel_assembly/voxel_assembly_common.hpp>
9#include <kernel/voxel_assembly/helper/data_handler.hpp>
10#include <kernel/voxel_assembly/helper/space_helper.hpp>
11#include <kernel/cubature/rule.hpp>
12#include <kernel/lafem/sparse_matrix_bcsr.hpp>
13#include <kernel/lafem/dense_vector_blocked.hpp>
14#include <kernel/trafo/standard/mapping.hpp>
15#include <kernel/geometry/conformal_mesh.hpp>
16#include <kernel/util/tiny_algebra.hpp>
17#include <kernel/global/vector.hpp>
18#include <kernel/lafem/vector_mirror.hpp>
19#include <kernel/voxel_assembly/burgers_assembler.hpp>
20#ifdef FEAT_HAVE_CUDA
21#include <kernel/util/cuda_util.hpp>
22#endif
23
24namespace FEAT
25{
26 namespace VoxelAssembly
27 {
28
29 #if defined(FEAT_HAVE_CUDA) || defined(DOXYGEN)
31 template<typename SpaceHelp_, bool need_streamdiff_ = true>
33 {
34 typedef SpaceHelp_ SpaceHelp;
35 static constexpr int dim = SpaceHelp::dim;
36 typedef typename SpaceHelp::SpaceType SpaceType;
37 typedef typename SpaceHelp::DataType DataType;
38 //define local sizes
39 static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
40 // local vector and matrix defines
43
44 typename SpaceHelp::EvalData basis_data;
45 typename SpaceHelp::JacobianMatrixType loc_jac;
46 typename SpaceHelp::JacobianMatrixType loc_jac_inv;
47 MatValueType loc_grad_v;
48 VecValueType loc_v;
49 VecValueType mean_v;
50 typename SpaceHelp::DomainPointType dom_point;
52 DataType local_delta;
53 DataType nu_loc;
54 DataType gamma_dot;
55 DataType det;
56 DataType weight;
57 bool need_frechet;
58 };
59
60 template<typename SpaceHelp_>
61 struct BurgersMatSharedDataKernelWrapper<SpaceHelp_, false>
62 {
63 typedef SpaceHelp_ SpaceHelp;
64 static constexpr int dim = SpaceHelp::dim;
65 typedef typename SpaceHelp::SpaceType SpaceType;
66 typedef typename SpaceHelp::DataType DataType;
67 //define local sizes
68 static constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
69 // local vector and matrix defines
72
73 typename SpaceHelp::EvalData basis_data;
74 typename SpaceHelp::JacobianMatrixType loc_jac;
75 typename SpaceHelp::JacobianMatrixType loc_jac_inv;
76 MatValueType loc_grad_v;
77 VecValueType loc_v;
78 typename SpaceHelp::DomainPointType dom_point;
79 DataType local_delta;
80 DataType nu_loc;
81 DataType gamma_dot;
82 DataType det;
83 DataType weight;
84 bool need_frechet;
85 };
86
87 #endif
97 template<typename Space_, typename DT_, typename IT_>
99
100 namespace Kernel
101 {
102
130 template<typename SpaceHelp_, typename LocMatType_, typename LocVecType_, int dim_, int num_verts_, typename ViscFunc_, typename ViscDerFunc_, bool need_stream_diff_>
131 CUDA_HOST_DEVICE void burgers_velo_material_mat_assembly_kernel(LocMatType_& loc_mat, const LocVecType_& local_conv_dofs, const Tiny::Matrix<typename SpaceHelp_::DataType, dim_, num_verts_>& local_coeffs,
132 const typename SpaceHelp_::DomainPointType* cub_pt, const typename SpaceHelp_::DataType* cub_wg, const int num_cubs,
135 const bool need_convection, const typename SpaceHelp_::DataType tol_eps,
136 ViscFunc_ visco_func, ViscDerFunc_ visco_d_func)
137 {
138 typedef SpaceHelp_ SpaceHelp;
139 constexpr int dim = SpaceHelp::dim;
140 typedef typename SpaceHelp::SpaceType SpaceType;
141 typedef typename SpaceHelp::DataType DataType;
142
143 constexpr bool need_streamline = need_stream_diff_;
144
145 // burgers params
146 // const DataType& nu{burgers_params.nu};
147 const DataType& theta{burgers_params.theta};
148 const DataType& beta{burgers_params.beta};
149 const DataType& frechet_beta{burgers_params.frechet_beta};
150 const DataType& sd_delta{burgers_params.sd_delta};
151 const DataType& sd_nu{burgers_params.sd_nu};
152 const DataType& sd_v_norm{burgers_params.sd_v_norm};
153 // formulation only makes sense with deformation formulation
154 // const bool& deformation{burgers_params.deformation};
155
156 // additional material parameters
157 const DataType& frechet_material{material_params.frechet_material};
158 const DataType& reg_eps{material_params.reg_eps};
159 const bool& need_frechet_material{material_params.need_frechet_material};
160
161 // #ifdef __CUDACC__
162 // const DataType tol_eps = CudaMath::cuda_get_sqrt_eps<DataType>();
163 // #else
164 // const DataType tol_eps = Math::sqrt(Math::eps<DataType>());
165 // #endif
166
167 // #ifdef __CUDACC__
168 // const bool need_streamline = (CudaMath::cuda_abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
169 // const bool need_convection = CudaMath::cuda_abs(beta) > DataType(0);
170 // #else
171 // const bool need_streamline = (Math::abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
172 // const bool need_convection = Math::abs(beta) > DataType(0);
173 // #endif
174
175
176 //define local sizes
177 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
178 //get number of nodes per element
179 // constexpr int num_loc_verts = SpaceType::MeshType::template IndexSet<dim, 0>::Type::num_indices;
180 // define local arrays
181 typename SpaceHelp::JacobianMatrixType loc_jac, loc_jac_inv;
182 typename SpaceHelp::EvalData basis_data;
183
184 // local vector and matrix defines
185 typedef Tiny::Vector<DataType, dim> VecValueType;
186 typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
187
188 VecValueType loc_v(DataType(0));
189 MatValueType loc_grad_v(DataType(0)), strain_rate_tensor_2(DataType(0));
190 DataType local_delta(DataType(0));
191 DataType nu_loc(DataType(0));
192 DataType gamma_dot(DataType(0));
193
194 loc_mat.format();
195
196
197 if constexpr(need_streamline) //need streamdiff? constexpr since we need this relatively rarely
198 {
199 VecValueType mean_v(DataType(0));
200 //set domain point to barycenter, which is zero for Hypercubes
201 const VecValueType barycenter(DataType(0));
202 // NewSpaceHelp::_map_point(img_point, barycenter, local_coeffs);
203 //only reserve memory for reference values
204 SpaceHelp::eval_ref_values(basis_data, barycenter);
205 SpaceHelp::trans_values(basis_data);
206 for(int i(0); i < num_loc_dofs; ++i)
207 {
208 mean_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
209 }
210 const DataType local_norm_v = mean_v.norm_euclid();
211
212 if(local_norm_v > tol_eps)
213 {
214 //we need this trafo data -> for what??
215 // SpaceHelp::calc_jac_mat(loc_jac, barycenter, local_coeffs);
216 // loc_jac_inv.set_inverse(loc_jac);
217
218 const DataType local_h = SpaceHelp::width_directed(mean_v, local_coeffs) * local_norm_v;
219 const DataType local_re = (local_norm_v * local_h) / sd_nu;
220 local_delta = sd_delta * (local_h / sd_v_norm) * (DataType(2) * local_re) / (DataType(1) + local_re);
221 }
222 }
223
224 for(int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
225 {
226 const typename SpaceHelp::DomainPointType& dom_point = cub_pt[cub_ind];
227 SpaceHelp::eval_ref_values(basis_data, dom_point);
228 SpaceHelp::trans_values(basis_data);
229 // NewSpaceHelp::_map_point(img_point, dom_point, local_coeffs);//not needed?
230 SpaceHelp::calc_jac_mat(loc_jac, dom_point, local_coeffs);
231 loc_jac_inv.set_inverse(loc_jac);
232
233 SpaceHelp::eval_ref_gradients(basis_data, dom_point);
234 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);
235
236 const DataType weight = loc_jac.det() * cub_wg[cub_ind];
237 if(need_convection || need_streamline) // need streamdiff or convection?
238 {
239 loc_v.format();
240 for(int i = 0; i < num_loc_dofs; ++i)
241 {
242 loc_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
243 }
244 }
245 // assemble loc_grad_v, always required for this assembler
246 {
247 loc_grad_v.format();
248 for(int i = 0; i < num_loc_dofs; ++i)
249 {
250 loc_grad_v.add_outer_product(local_conv_dofs[i], basis_data.phi[i].grad);
251 }
252 }
253 // required terms for carreau assembly
254 strain_rate_tensor_2.set_transpose(loc_grad_v);
255 strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
256 #ifdef __CUDACC__
257 gamma_dot = CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
258 #else
259 gamma_dot = Math::sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
260 #endif
261
262 //default to nu ?
263 nu_loc = visco_func(gamma_dot);
264
265
266 // deformation assembly, always required
267 for(int i(0); i < num_loc_dofs; ++i)
268 {
269 // trial function loop
270 for(int j(0); j < num_loc_dofs; ++j)
271 {
272 // compute scalar value
273 const DataType value = nu_loc * weight * Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
274
275 // update local matrix
276 loc_mat[i][j].add_scalar_main_diag(value);
277 loc_mat[i][j].add_outer_product(basis_data.phi[j].grad, basis_data.phi[i].grad, nu_loc * weight);
278 }
279 }
280 // frechet carreau term "only" makes sense in defo formulation
281 if(need_frechet_material)
282 {
283 const DataType fac = frechet_material * visco_d_func(gamma_dot)/(gamma_dot + reg_eps);
284 //std::cout << fac ;
285 // test function loop
286 for(int i(0); i < num_loc_dofs; ++i)
287 {
288 Tiny::Vector<DataType, dim> du_grad_phi;
289 du_grad_phi.set_mat_vec_mult(strain_rate_tensor_2, basis_data.phi[i].grad);
290
291 // trial function loop
292 for(int j(0); j < num_loc_dofs; ++j)
293 {
294 Tiny::Vector<DataType, dim> du_grad_psi;
295 du_grad_psi.set_mat_vec_mult(strain_rate_tensor_2, basis_data.phi[j].grad);
296 // add outer product of grad(phi) and grad(psi)
297 loc_mat[i][j].add_outer_product(du_grad_phi, du_grad_psi, fac * weight);
298 }
299 }
300 }
301
302 if(need_convection) // assemble convection?
303 {
304 // test function loop
305 for(int i(0); i < num_loc_dofs; ++i)
306 {
307 // trial function loop
308 for(int j(0); j < num_loc_dofs; ++j)
309 {
310 // compute scalar value
311 const DataType value = beta * weight * basis_data.phi[i].value * Tiny::dot(loc_v, basis_data.phi[j].grad);
312
313 // update local matrix
314 loc_mat[i][j].add_scalar_main_diag(value);
315 }
316 }
317 }
318
319 // assemble convection Frechet?
320 #ifdef __CUDACC__
321 if(CudaMath::cuda_abs(frechet_beta) > DataType(0))
322 #else
323 if(Math::abs(frechet_beta) > DataType(0))
324 #endif
325 {
326 // test function loop
327 for(int i(0); i < num_loc_dofs; ++i)
328 {
329 // trial function loop
330 for(int j(0); j < num_loc_dofs; ++j)
331 {
332 // compute scalar value
333 const DataType value = frechet_beta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
334
335 // update local matrix
336 loc_mat[i][j].axpy(value, loc_grad_v);
337 }
338 }
339 }
340
341 // assemble reaction?
342 #ifdef __CUDACC__
343 if(CudaMath::cuda_abs(theta) > DataType(0))
344 #else
345 if(Math::abs(theta) > DataType(0))
346 #endif
347 {
348 // test function loop
349 for(int i(0); i < num_loc_dofs; ++i)
350 {
351 // trial function loop
352 for(int j(0); j < num_loc_dofs; ++j)
353 {
354 // compute scalar value
355 const DataType value = theta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
356
357 // update local matrix
358 loc_mat[i][j].add_scalar_main_diag(value);
359 }
360 }
361 }
362
363 if constexpr(need_streamline) // need streamdiff?
364 {
365 Tiny::Vector<DataType, num_loc_dofs> streamdiff_coeffs(DataType(0));
366 for(int i = 0; i < num_loc_dofs; ++i)
367 {
368 streamdiff_coeffs[i] = Tiny::dot(loc_v, basis_data.phi[i].grad);
369 }
370
371
372 // assemble streamline diffusion?
373 if((local_delta > tol_eps))
374 {
375 // test function loop
376 for(int i(0); i < num_loc_dofs; ++i)
377 {
378 // trial function loop
379 for(int j(0); j < num_loc_dofs; ++j)
380 {
381 // compute scalar value
382 const DataType value = local_delta * weight * streamdiff_coeffs[i] * streamdiff_coeffs[j];
383
384 // update local matrix
385 loc_mat[i][j].add_scalar_main_diag(value);
386 }
387 }
388 }
389 }
390 // next cubature point
391 }
392 }
393
394 #if defined(__CUDACC__)
419 template<typename ThreadGroup_, typename SpaceHelp_, typename ViscFunc_, typename ViscDerFunc_, bool need_stream_diff_>
420 CUDA_DEVICE __forceinline__ void grouped_burgers_velo_material_mat_assembly_kernel(const ThreadGroup_& tg, BurgersMatSharedDataKernelWrapper<SpaceHelp_, need_stream_diff_>* shared_wrapper,
421 int loc_assemble_size, int assemble_offset, typename SpaceHelp_::DataType* loc_mat, const typename SpaceHelp_::DataType* local_conv_dofs,
423 const typename SpaceHelp_::DomainPointType* cub_pt, const typename SpaceHelp_::DataType* cub_wg, const int num_cubs,
426 const bool need_convection, const typename SpaceHelp_::DataType tol_eps,
427 ViscFunc_ visco_func, ViscDerFunc_ visco_d_func)
428 {
429 typedef SpaceHelp_ SpaceHelp;
430 constexpr int dim = SpaceHelp::dim;
431 typedef typename SpaceHelp::SpaceType SpaceType;
432 typedef typename SpaceHelp::DataType DataType;
433 constexpr bool need_streamline = need_stream_diff_;
434 // constexpr int num_loc_verts = SpaceHelp::num_verts;
435 const int t_idx = tg.thread_rank();
436 const int g_size = tg.num_threads();
437
438 // const DataType& nu{burgers_params.nu};
439 const DataType& theta{burgers_params.theta};
440 const DataType& beta{burgers_params.beta};
441 const DataType& frechet_beta{burgers_params.frechet_beta};
442 const DataType& sd_delta{burgers_params.sd_delta};
443 const DataType& sd_nu{burgers_params.sd_nu};
444 const DataType& sd_v_norm{burgers_params.sd_v_norm};
445
446 const DataType& frechet_material{material_params.frechet_material};
447 const DataType& reg_eps{material_params.reg_eps};
448 const bool& need_frechet_material{material_params.need_frechet_material};
449
450 // #ifdef __CUDACC__
451 // const DataType tol_eps = CudaMath::cuda_get_sqrt_eps<DataType>();
452 // #else
453 // const DataType tol_eps = Math::sqrt(Math::eps<DataType>());
454 // #endif
455
456 // #ifdef __CUDACC__
457 // const bool need_streamline = (CudaMath::cuda_abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
458 // const bool need_convection = CudaMath::cuda_abs(beta) > DataType(0);
459 // #else
460 // const bool need_streamline = (Math::abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
461 // const bool need_convection = Math::abs(beta) > DataType(0);
462 // #endif
463
464
465 //define local sizes
466 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
467 // local vector and matrix defines
468 typedef Tiny::Vector<DataType, dim> VecValueType;
469 typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
470 //get number of nodes per element
471 // constexpr int num_loc_verts = SpaceType::MeshType::template IndexSet<dim, 0>::Type::num_indices;
472 // define local arrays
473 // use extern shared for this?!
474 typedef BurgersMatSharedDataKernelWrapper<SpaceHelp, need_stream_diff_> BSDKWrapper;
475 typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
476 typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
477 MatValueType& loc_grad_v = shared_wrapper->loc_grad_v;
478 VecValueType& loc_v = shared_wrapper->loc_v;
479 VecValueType* mean_v_p = nullptr;
480 if constexpr(need_streamline) mean_v_p = &(shared_wrapper->mean_v);
481 typename SpaceHelp::DomainPointType& dom_point = shared_wrapper->dom_point;
482 Tiny::Vector<DataType, num_loc_dofs>* streamdiff_coeffs_p = nullptr;
483 if constexpr(need_streamline) streamdiff_coeffs_p = &(shared_wrapper->streamdiff_coeffs);
484 DataType& local_delta = shared_wrapper->local_delta;
485 DataType& nu_loc = shared_wrapper->nu_loc;
486 DataType& gamma_dot = shared_wrapper->gamma_dot;
487 DataType& det = shared_wrapper->det;
488 DataType& weight = shared_wrapper->weight;
489 typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
490 bool& need_frechet = shared_wrapper->need_frechet;
491
492 // local tmp strainrate value
493 MatValueType strain_rate_tensor_2(DataType(0));
494
495 VoxelAssembly::coalesced_format(tg, (unsigned int*) shared_wrapper, sizeof(BSDKWrapper)/(sizeof(unsigned int)));
496 tg.sync();
497
498 cg::invoke_one(tg, [&](){need_frechet = CudaMath::cuda_abs(frechet_beta) > DataType(0);});
499 tg.sync();
500
501
502 if constexpr(need_streamline) //need streamdiff?
503 {
504 cg::invoke_one(tg, [&]() {*mean_v_p = DataType(0);});
505 //set domain point to barycenter, which is zero for Hypercubes
506 // NewSpaceHelp::_map_point(img_point, barycenter, local_coeffs);
507 //only reserve memory for reference values
508 cg::invoke_one(tg, [&](){ const VecValueType bc(0);
509 SpaceHelp::eval_ref_values(basis_data, bc);
510 SpaceHelp::trans_values(basis_data);});
511
512 tg.sync();
513
514 for(int idx = t_idx; idx < num_loc_dofs; idx += g_size)
515 {
516 VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &(*mean_v_p)[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
517 }
518
519 tg.sync();
520
521 DataType local_norm_v = mean_v_p->norm_euclid();
522
523 if(local_norm_v > tol_eps)
524 {
525 cg::invoke_one(tg, [&](){
526 const DataType local_h = SpaceHelp::width_directed(*mean_v_p, local_coeffs) * local_norm_v;
527 const DataType local_re = (local_norm_v * local_h) / sd_nu;
528 local_delta = sd_delta * (local_h / sd_v_norm) * (DataType(2) * local_re) / (DataType(1) + local_re);
529 });
530 }
531 }
532
533 tg.sync();
534
535
536 for(int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
537 {
538 cg::invoke_one(tg, [&](){dom_point = cub_pt[cub_ind];
539 SpaceHelp::eval_ref_values(basis_data, dom_point);
540 SpaceHelp::trans_values(basis_data);});
541 // NewSpaceHelp::_map_point(img_point, dom_point, local_coeffs);//not needed?
542 tg.sync();
543 SpaceHelp::grouped_calc_jac_mat(tg, loc_jac, dom_point, &local_coeffs[0][0]);
544 tg.sync();
545 cg::invoke_one(tg, [&](){det = loc_jac.det();
546 weight = det * cub_wg[cub_ind];});
547 tg.sync();
548 loc_jac_inv.grouped_set_inverse(tg, loc_jac, det);
549 tg.sync();
550 cg::invoke_one(tg, [&](){
551 SpaceHelp::eval_ref_gradients(basis_data, dom_point);
552 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);});
553
554 tg.sync();
555 if(need_convection || need_streamline) // need streamdiff or convection?
556 {
557 VoxelAssembly::coalesced_format(tg, &loc_v[0], dim);
558 tg.sync();
559
560 for(int idx = t_idx; idx < num_loc_dofs; idx += g_size)
561 {
562 VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &loc_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
563 }
564 // tg.sync();
565 }
566
567 {
568 VoxelAssembly::coalesced_format(tg, &loc_grad_v[0][0], dim*dim);
569 tg.sync();
570 for(int i = t_idx; i < num_loc_dofs; i += g_size)
571 {
572 VoxelAssembly::grouped_add_outer_product(tg, &loc_grad_v[0][0], &local_conv_dofs[i*dim], basis_data.phi[i].grad);
573 }
574 // tg.sync();
575 }
576 tg.sync();
577 // assemble necessary values for material assembler
578 {
579 strain_rate_tensor_2.set_transpose(loc_grad_v);
580 strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
581 cg::invoke_one(tg, [&](){
582 // strain_rate_tensor_2.set_transpose(loc_grad_v);
583 // strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
584 gamma_dot = CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
585 nu_loc = visco_func(gamma_dot);
586 });
587 }
588
589
590 if constexpr(need_streamline) // need streamdiff?
591 {
592 tg.sync();
593 for(int i = t_idx; i < num_loc_dofs; i += g_size)
594 {
595 (*streamdiff_coeffs_p)[i] = Tiny::dot(loc_v, basis_data.phi[i].grad);
596 }
597 // tg.sync();
598 }
599
600 tg.sync();
601
602 const DataType fac = frechet_material * visco_d_func(gamma_dot)/(gamma_dot + reg_eps);
603
604 for(int idx = t_idx; idx < loc_assemble_size; idx += g_size)
605 {
606 // the thread local test and trial function indices
607 const int i = (idx+assemble_offset) / num_loc_dofs;
608 const int j = (idx+assemble_offset) % num_loc_dofs;
609
610 {
611 // compute scalar value
612 const DataType value = nu_loc * weight * Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
613
614 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(value);
615 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_outer_product(basis_data.phi[j].grad, basis_data.phi[i].grad, nu_loc * weight);
616 }
617
618 if(need_frechet_material)
619 {
620 Tiny::Vector<DataType, dim> du_grad_phi;
621 du_grad_phi.set_mat_vec_mult(strain_rate_tensor_2, basis_data.phi[i].grad);
622 Tiny::Vector<DataType, dim> du_grad_psi;
623 du_grad_psi.set_mat_vec_mult(strain_rate_tensor_2, basis_data.phi[j].grad);
624 // printf("from thread %i du grad psi %f, %f \n", idx, du_grad_psi[0], du_grad_psi[1]);
625 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_outer_product(du_grad_phi, du_grad_psi, fac*weight);
626
627 }
628
629 if(need_convection) // assemble convection?
630 {
631 // compute scalar value
632 const DataType value = beta * weight * basis_data.phi[i].value * VoxelAssembly::dot(basis_data.phi[j].grad, &loc_v[0]);
633
634 // update local matrix
635 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(value);
636 }
637
638 // assemble convection Frechet?
639 if(need_frechet)
640 {
641 // compute scalar value
642 const DataType value = frechet_beta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
643
644 // update local matrix
645 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->axpy(value, loc_grad_v);
646 }
647
648 // assemble reaction?
649 #ifdef __CUDACC__
650 if(CudaMath::cuda_abs(theta) > DataType(0))
651 #else
652 if(Math::abs(theta) > DataType(0))
653 #endif
654 {
655 // compute scalar value
656 const DataType value = theta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
657
658 // update local matrix
659 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(value);
660 }
661
662 // assemble streamline diffusion?
663 if constexpr(need_streamline)
664 {
665 if(local_delta > tol_eps)
666 {
667 // compute scalar value
668 const DataType value = local_delta * weight * (*streamdiff_coeffs_p)[i] * (*streamdiff_coeffs_p)[j];
669
670 // update local matrix
671 ((Tiny::Matrix<DataType, dim, dim>*)(loc_mat + dim*dim*idx))->add_scalar_main_diag(value);
672 }
673 }
674 }
675 // next cubature point
676 }
677 }
678 #endif
679
703 template<typename SpaceHelp_, typename LocVecType_, int dim_, int num_verts_, typename ViscFunc_>
704 CUDA_HOST_DEVICE void burgers_velo_material_defect_assembly_kernel(LocVecType_& loc_vec, const LocVecType_& local_prim_dofs, const LocVecType_& local_conv_dofs, const Tiny::Matrix<typename SpaceHelp_::DataType, dim_, num_verts_>& local_coeffs,
705 const typename SpaceHelp_::DomainPointType* cub_pt, const typename SpaceHelp_::DataType* cub_wg, const int num_cubs,
707 const bool need_convection, ViscFunc_ visco_func)
708 {
709 typedef SpaceHelp_ SpaceHelp;
710 constexpr int dim = SpaceHelp::dim;
711 typedef typename SpaceHelp::SpaceType SpaceType;
712 typedef typename SpaceHelp::DataType DataType;
713
714 // const DataType& nu{burgers_params.nu};
715 const DataType& theta{burgers_params.theta};
716 const DataType& beta{burgers_params.beta};
717 // const DataType& frechet_beta{burgers_params.frechet_beta};
718 // const DataType& sd_delta{burgers_params.sd_delta};
719 // const DataType& sd_nu{burgers_params.sd_nu};
720 // const DataType& sd_v_norm{burgers_params.sd_v_norm};
721 // const bool& deformation{burgers_params.deformation};
722
723 // #ifdef __CUDACC__
724 // const DataType tol_eps = CudaMath::cuda_get_sqrt_eps<DataType>();
725 // #else
726 // const DataType tol_eps = Math::sqrt(Math::eps<DataType>());
727 // #endif
728
729 // #ifdef __CUDACC__
730 // const bool need_streamline = (CudaMath::cuda_abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
731 // const bool need_convection = CudaMath::cuda_abs(beta) > DataType(0);
732 // #else
733 // const bool need_streamline = (Math::abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
734 // const bool need_convection = Math::abs(beta) > DataType(0);
735 // #endif
736
737
738 //define local sizes
739 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
740 //get number of nodes per element
741 // constexpr int num_loc_verts = SpaceType::MeshType::template IndexSet<dim, 0>::Type::num_indices;
742 // define local arrays
743 typename SpaceHelp::JacobianMatrixType loc_jac, loc_jac_inv;
744 typename SpaceHelp::EvalData basis_data;
745
746 // local vector and matrix defines
747 typedef Tiny::Vector<DataType, dim> VecValueType;
748 typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
749
750 VecValueType loc_v(DataType(0));
751 DataType nu_loc(DataType(0));
752
753 loc_vec.format();
754
755 Tiny::Vector<DataType, num_loc_dofs> streamdiff_coeffs(DataType(0));
756
757 for(int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
758 {
759 const typename SpaceHelp::DomainPointType& dom_point = cub_pt[cub_ind];
760 SpaceHelp::eval_ref_values(basis_data, dom_point);
761 SpaceHelp::trans_values(basis_data);
762 // NewSpaceHelp::_map_point(img_point, dom_point, local_coeffs);//not needed?
763 SpaceHelp::calc_jac_mat(loc_jac, dom_point, local_coeffs);
764 loc_jac_inv.set_inverse(loc_jac);
765
766 SpaceHelp::eval_ref_gradients(basis_data, dom_point);
767 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);
768
769 const DataType weight = loc_jac.det() * cub_wg[cub_ind];
770 if(need_convection) // need streamdiff or convection?
771 {
772 loc_v.format();
773 for(int i = 0; i < num_loc_dofs; ++i)
774 {
775 loc_v.axpy(basis_data.phi[i].value, local_conv_dofs[i]);
776 }
777 }
778
779 // assemble loc_grad_v, always required for this assembler
780 {
781 MatValueType loc_grad_v(DataType(0)), strain_rate_tensor_2(DataType(0));
782 loc_grad_v.format();
783 for(int i = 0; i < num_loc_dofs; ++i)
784 {
785 loc_grad_v.add_outer_product(local_conv_dofs[i], basis_data.phi[i].grad);
786 }
787 // required terms for carreau assembly
788 strain_rate_tensor_2.set_transpose(loc_grad_v);
789 strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
790 #ifdef __CUDACC__
791 nu_loc = visco_func(CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius());
792 #else
793 nu_loc = visco_func(Math::sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius());
794 #endif
795
796 }
797
798 // always deformation
799 for(int i(0); i < num_loc_dofs; ++i)
800 {
801 // trial function loop
802 for(int j(0); j < num_loc_dofs; ++j)
803 {
804 // compute scalar values
805 const DataType value1 = nu_loc * weight * Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad);
806 const DataType value2 = nu_loc * weight * Tiny::dot(local_prim_dofs[j], basis_data.phi[i].grad);
807 // update local vector
808 loc_vec[i].axpy(value1, local_prim_dofs[j]);
809 loc_vec[i].axpy(value2, basis_data.phi[j].grad);
810 }
811 }
812
813 if(need_convection) // assemble convection?
814 {
815 // test function loop
816 for(int i(0); i < num_loc_dofs; ++i)
817 {
818 // trial function loop
819 for(int j(0); j < num_loc_dofs; ++j)
820 {
821 // compute scalar value
822 const DataType value = beta * weight * basis_data.phi[i].value * Tiny::dot(loc_v, basis_data.phi[j].grad);
823
824 // update local matrix
825 loc_vec[i].axpy(value, local_prim_dofs[j]);
826 }
827 }
828 }
829
830 // assemble reaction?
831 #ifdef __CUDACC__
832 if(CudaMath::cuda_abs(theta) > DataType(0))
833 #else
834 if(Math::abs(theta) > DataType(0))
835 #endif
836 {
837 // test function loop
838 for(int i(0); i < num_loc_dofs; ++i)
839 {
840 // trial function loop
841 for(int j(0); j < num_loc_dofs; ++j)
842 {
843 // compute scalar value
844 const DataType value = theta * weight * basis_data.phi[i].value * basis_data.phi[j].value;
845
846 // update local matrix
847 loc_vec[i].axpy(value, local_prim_dofs[j]);
848 }
849 }
850 }
851 // next cubature point
852 }
853 }
854
855 #ifdef __CUDACC__
866 template<typename ThreadGroup_, typename SpaceHelp_, typename ViscFunc_>
867 CUDA_DEVICE void grouped_burgers_velo_material_defect_assembly_kernel(const ThreadGroup_& tg, BurgersMatSharedDataKernelWrapper<SpaceHelp_, false>* shared_wrapper, int loc_assemble_size, int assemble_offset,
868 typename SpaceHelp_::DataType* loc_vec, const typename SpaceHelp_::DataType* local_prim_dofs, const typename SpaceHelp_::DataType* local_conv_dofs,
870 const typename SpaceHelp_::DomainPointType* cub_pt, const typename SpaceHelp_::DataType* cub_wg, const int num_cubs,
872 const bool need_convection, ViscFunc_ visco_func)
873 {
874 typedef SpaceHelp_ SpaceHelp;
875 constexpr int dim = SpaceHelp::dim;
876 typedef typename SpaceHelp::SpaceType SpaceType;
877 typedef typename SpaceHelp::DataType DataType;
878 const int t_idx = tg.thread_rank();
879 const int g_size = tg.num_threads();
880
881 const DataType& nu{burgers_params.nu};
882 const DataType& theta{burgers_params.theta};
883 const DataType& beta{burgers_params.beta};
884 // const DataType& frechet_beta{burgers_params.frechet_beta};
885 // const DataType& sd_delta{burgers_params.sd_delta};
886 // const DataType& sd_nu{burgers_params.sd_nu};
887 // const DataType& sd_v_norm{burgers_params.sd_v_norm};
888 // const bool& deformation{burgers_params.deformation};
889
890 // #ifdef __CUDACC__
891 // const DataType tol_eps = CudaMath::cuda_get_sqrt_eps<DataType>();
892 // #else
893 // const DataType tol_eps = Math::sqrt(Math::eps<DataType>());
894 // #endif
895
896 // #ifdef __CUDACC__
897 // const bool need_streamline = (CudaMath::cuda_abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
898 // const bool need_convection = CudaMath::cuda_abs(beta) > DataType(0);
899 // #else
900 // const bool need_streamline = (Math::abs(sd_delta) > DataType(0)) && (sd_v_norm > tol_eps);
901 // const bool need_convection = Math::abs(beta) > DataType(0);
902 // #endif
903
904
905 //define local sizes
906 constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
907 //get number of nodes per element
908 // constexpr int num_loc_verts = SpaceType::MeshType::template IndexSet<dim, 0>::Type::num_indices;
909 // define local arrays
910 typedef BurgersMatSharedDataKernelWrapper<SpaceHelp, false> BSDKWrapper;
911 typename SpaceHelp::JacobianMatrixType& loc_jac = shared_wrapper->loc_jac;
912 typename SpaceHelp::JacobianMatrixType& loc_jac_inv = shared_wrapper->loc_jac_inv;
913
914 DataType& det = shared_wrapper->det;
915 DataType& weight = shared_wrapper->weight;
916 DataType& nu_loc = shared_wrapper->nu_loc;
917 DataType& gamma_dot = shared_wrapper->gamma_dot;
918 typename SpaceHelp::EvalData& basis_data = shared_wrapper->basis_data;
919 // local vector and matrix defines
920 typedef Tiny::Vector<DataType, dim> VecValueType;
921 typedef Tiny::Matrix<DataType, dim, dim> MatValueType;
922
923 MatValueType& loc_grad_v = shared_wrapper->loc_grad_v;
924 VecValueType& loc_v = shared_wrapper->loc_v;
925
926 typename SpaceHelp::DomainPointType& dom_point = shared_wrapper->dom_point;
927
928 VoxelAssembly::coalesced_format(tg, (unsigned int*) shared_wrapper, sizeof(BSDKWrapper)/(sizeof(unsigned int)));
929 tg.sync();
930 // 4 threads should work on one entry, requires block size to be mutliple of 4
931 cg::thread_block_tile<4> tile4 = cg::tiled_partition<4>(tg);
932
933 for(int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
934 {
935 tg.sync();
936 cg::invoke_one(tg, [&](){dom_point = cub_pt[cub_ind];
937 SpaceHelp::eval_ref_values(basis_data, dom_point);
938 SpaceHelp::trans_values(basis_data);});
939 // NewSpaceHelp::_map_point(img_point, dom_point, local_coeffs);//not needed?
940 tg.sync();
941 SpaceHelp::grouped_calc_jac_mat(tg, loc_jac, dom_point, &local_coeffs[0][0]);
942 tg.sync();
943 cg::invoke_one(tg, [&](){det = loc_jac.det();
944 weight = det * cub_wg[cub_ind];});
945 tg.sync();
946 loc_jac_inv.grouped_set_inverse(tg, loc_jac, det);
947 tg.sync();
948 cg::invoke_one(tg, [&](){
949 SpaceHelp::eval_ref_gradients(basis_data, dom_point);
950 SpaceHelp::trans_gradients(basis_data, loc_jac_inv);});
951
952 tg.sync();
953 if(need_convection) // need streamdiff or convection?
954 {
955 VoxelAssembly::coalesced_format(tg, &loc_v[0], dim);
956 tg.sync();
957
958 for(int idx = t_idx; idx < num_loc_dofs; idx += g_size)
959 {
960 VoxelAssembly::template grouped_axpy<DataType, cg::thread_group, dim>(tg, &loc_v[0], basis_data.phi[idx].value, &local_conv_dofs[idx*dim]);
961 }
962 }
963 tg.sync();
964 // assemble necessary values for material assembler
965 {
966 VoxelAssembly::coalesced_format(tg, &loc_grad_v[0][0], dim*dim);
967 tg.sync();
968 for(int i = t_idx; i < num_loc_dofs; i += g_size)
969 {
970 VoxelAssembly::grouped_add_outer_product(tg, &loc_grad_v[0][0], &local_conv_dofs[i*dim], basis_data.phi[i].grad);
971 }
972 tg.sync();
973 cg::invoke_one(tg, [&](){
974 MatValueType strain_rate_tensor_2(DataType(0));
975 strain_rate_tensor_2.set_transpose(loc_grad_v);
976 strain_rate_tensor_2.axpy(DataType(1.0), loc_grad_v);
977 gamma_dot = CudaMath::cuda_sqrt(DataType(0.5))*strain_rate_tensor_2.norm_frobenius();
978 nu_loc = visco_func(gamma_dot);
979 });
980 }
981
982 tg.sync();
983
984 for(int idx = tile4.meta_group_rank(); idx < loc_assemble_size*dim; idx += tile4.meta_group_size())
985 {
986 // compute scalar value
987 DataType val = DataType(0);
988 // the thread local test function index
989 const int i = (idx/dim+assemble_offset);
990 const int ii = idx%dim;
991 {
992 for(int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
993 {
994 // compute scalar values
995 val += nu_loc * weight * (Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad) * local_prim_dofs[j*dim + ii]
996 + Tiny::dot(((Tiny::Vector<DataType, dim>*)local_prim_dofs)[j], basis_data.phi[i].grad) * basis_data.phi[j].grad[ii]);
997 }
998 }
999
1000 if(need_convection) // assemble convection?
1001 {
1002 for(int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
1003 {
1004 // compute scalar values
1005 val += beta * weight * basis_data.phi[i].value * Tiny::dot(loc_v, basis_data.phi[j].grad) * local_prim_dofs[j*dim + ii];
1006 }
1007 }
1008
1009 // assemble reaction?
1010 #ifdef __CUDACC__
1011 if(CudaMath::cuda_abs(theta) > DataType(0))
1012 #else
1013 if(Math::abs(theta) > DataType(0))
1014 #endif
1015 {
1016 for(int j = tile4.thread_rank(); j < num_loc_dofs; j += tile4.size())
1017 {
1018 // compute scalar values
1019 val += theta * weight * basis_data.phi[i].value * basis_data.phi[j].value * local_prim_dofs[j*dim + ii];
1020 }
1021 }
1022 // reduce result and store value with async call
1023 // tile4.sync();
1024 cuda::atomic_ref<DataType, cuda::thread_scope_block> a_ref(*(loc_vec+idx));
1025 cg::reduce_update_async(tile4, a_ref, val, cg::plus<DataType>());
1026 }
1027 // next cubature point
1028 }
1029 }
1030 #endif
1031 } // namespace Kernel
1032
1033 namespace Arch
1034 {
1035 #if defined(FEAT_HAVE_CUDA) || defined(DOXYGEN)
1057 template<typename Space_, typename DT_, typename IT_>
1058 void assemble_burgers_velo_material_csr(const Space_& space,
1059 const CSRMatrixData<DT_, IT_>& matrix_data,
1060 const DT_* conv_data,
1061 const AssemblyCubatureData<DT_>& cubature,
1062 const AssemblyMappingData<DT_, IT_>& dof_mapping,
1063 const std::vector<int*>& coloring_maps,
1064 const std::vector<Index>& coloring_map_sizes,
1065 DT_ alpha, const AssemblyBurgersData<DT_>& burgers_params,
1066 const AssemblyMaterialData<DT_>& material_params,
1067 MaterialType mat_type, int shared_mem, int blocksize, int gridsize, bool print_occupancy);
1068
1091 template<typename Space_, typename DT_, typename IT_>
1093 DT_* vector_data,
1094 const DT_* conv_data,
1095 const DT_* primal_data,
1096 const AssemblyCubatureData<DT_>& cubature,
1097 const AssemblyMappingData<DT_, IT_>& dof_mapping,
1098 const std::vector<int*>& coloring_maps,
1099 const std::vector<Index>& coloring_map_sizes,
1100 DT_ alpha, const AssemblyBurgersData<DT_>& burgers_params,
1101 const AssemblyMaterialData<DT_>& material_params,
1102 MaterialType material_type,
1103 int shared_mem, int blocksize, int gridsize, bool print_occupancy);
1104
1105 #endif
1106
1128 template<typename Space_, typename DT_, typename IT_>
1129 void assemble_burgers_velo_material_csr_host(const Space_& space,
1130 const CSRMatrixData<DT_, IT_>& matrix_data,
1131 const DT_* conv_data,
1132 const AssemblyCubatureData<DT_>& cubature,
1133 const AssemblyMappingData<DT_, IT_>& dof_mapping,
1134 const std::vector<int*>& coloring_maps,
1135 const std::vector<Index>& coloring_map_sizes,
1136 DT_ alpha, const AssemblyBurgersData<DT_>& burgers_params,
1137 const AssemblyMaterialData<DT_>& material_params,
1138 MaterialType material_type);
1139
1162 template<typename Space_, typename DT_, typename IT_>
1163 void assemble_burgers_velo_material_defect_host(const Space_& space,
1164 DT_* vector_data,
1165 const DT_* conv_data,
1166 const DT_* primal_data,
1167 const AssemblyCubatureData<DT_>& cubature,
1168 const AssemblyMappingData<DT_, IT_>& dof_mapping,
1169 const std::vector<int*>& coloring_maps,
1170 const std::vector<Index>& coloring_map_sizes,
1171 DT_ alpha, const AssemblyBurgersData<DT_>& burgers_params,
1172 const AssemblyMaterialData<DT_>& material_params,
1173 MaterialType material_type);
1174
1175
1176 }
1177
1178
1179#ifndef __CUDACC__
1192 template<int dim_, typename DT_, typename IT_>
1194 public VoxelBurgersAssembler<Q2StandardHyperCube<dim_>, DT_, IT_>
1195 {
1196 public:
1200 typedef typename BaseClass::DataHandler DataHandler;
1201 typedef typename BaseClass::SpaceHelp SpaceHelp;
1202 typedef typename SpaceHelp::ShapeType ShapeType;
1203 typedef typename SpaceHelp::DataType DataType;
1204 typedef typename SpaceHelp::IndexType IndexType;
1205
1207 static constexpr int dim = SpaceHelp::dim;
1208
1209 typedef typename SpaceHelp::DomainPointType DomainPointType;
1210 typedef typename SpaceHelp::ImagePointType ImagePointType;
1211 typedef typename SpaceHelp::ValueType ValueType;
1212 typedef typename SpaceHelp::JacobianMatrixType JacobianMatrixType;
1213
1216
1218 DataType reg_eps;
1219
1221 DataType mu_0;
1222 DataType exp;
1223 DataType lambda;
1224 DataType a_T;
1225 DataType yasuda_a;
1226 DataType mu_inf;
1227
1230
1231
1232 public:
1233 explicit VoxelBurgersVeloMaterialAssembler() = default;
1234
1248 template<typename ColoringType_>
1249 explicit VoxelBurgersVeloMaterialAssembler(const SpaceType& space, const ColoringType_& coloring, int hint = -1) :
1250 BaseClass(space, coloring, hint),
1251 frechet_material(DataType(0)), reg_eps(DataType(1E-100)), material_type(MaterialType::carreau)
1252 {
1253 }
1254
1255 // rule of 5
1257
1259
1261
1263
1265
1272 {
1273 return VoxelAssembly::AssemblyMaterialData<DataType>{frechet_material, reg_eps, mu_0, exp, lambda, a_T, yasuda_a, mu_inf, frechet_material>Math::pow(Math::eps<DataType>(), DataType(0.9))};
1274 }
1275
1287 template<typename CubatureFactory_>
1289 const SpaceType& space, const CubatureFactory_& cubature_factory, DataType alpha = DataType(1)) const
1290 {
1291 XASSERTM(matrix.rows() == space.get_num_dofs(), "invalid matrix dimensions");
1292 XASSERTM(matrix.columns() == space.get_num_dofs(), "invalid matrix dimensions");
1293 XASSERTM(convect.size() == space.get_num_dofs(), "invalid vector size");
1294
1295 //define cubature
1296 typedef Cubature::Rule<ShapeType, DataType, DataType> CubatureRuleType;
1297 CubatureRuleType cubature(Cubature::ctor_factory, cubature_factory);
1298
1299 //get cubature points and weights
1300 int num_cubs = cubature.get_num_points();
1301 typename CubatureRuleType::PointType* cub_pt = cubature.get_points();
1302 DataType* cub_wg = cubature.get_weights();
1303
1304 BACKEND_SKELETON_VOID(assemble_matrix1_cuda, assemble_matrix1_generic, assemble_matrix1_generic, matrix, convect, space, cub_pt, cub_wg, num_cubs, alpha)
1305 }
1306
1319 template<typename CubatureFactory_>
1321 const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& primal, const SpaceType& space, const CubatureFactory_& cubature_factory, DataType alpha = DataType(1)) const
1322 {
1323 XASSERTM(vector.size() == space.get_num_dofs(), "invalid vector size");
1324 XASSERTM(convect.size() == space.get_num_dofs(), "invalid vector size");
1325
1326 //define cubature
1327 typedef Cubature::Rule<ShapeType, DataType, DataType> CubatureRuleType;
1328 CubatureRuleType cubature(Cubature::ctor_factory, cubature_factory);
1329
1330 //get cubature points and weights
1331 int num_cubs = cubature.get_num_points();
1332 typename CubatureRuleType::PointType* cub_pt = cubature.get_points();
1333 DataType* cub_wg = cubature.get_weights();
1334
1335 BACKEND_SKELETON_VOID(assemble_vector_cuda, assemble_vector_generic, assemble_vector_generic, vector, convect, primal, space, cub_pt, cub_wg, num_cubs, alpha)
1336 }
1337
1339 const SpaceType& space, typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, const DataType* cub_wg, int num_cubs, DataType alpha) const
1340 {
1341 VoxelAssembly::CSRMatrixData<DataType, IndexType> mat_data = {matrix.template val<LAFEM::Perspective::pod>(), matrix.row_ptr(), matrix.col_ind(), matrix.rows(), matrix.columns()};
1342 const DataType* vec_data = convect.template elements<LAFEM::Perspective::pod>();
1343
1344 VoxelAssembly::AssemblyCubatureData<DataType> cub_data = {(void*)cub_pt, cub_wg, num_cubs};
1345 VoxelAssembly::AssemblyMappingData<DataType, IndexType> mapping_data = this->mesh_data.get_assembly_field();
1346 auto burgers_params = this->wrap_burgers_params();
1347 auto material_params = this->wrap_material_params();
1348
1349
1350 VoxelAssembly::Arch::assemble_burgers_velo_material_csr_host(space, mat_data, vec_data, cub_data, mapping_data,
1351 this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha,
1352 burgers_params, material_params, material_type);
1353 }
1354
1355 void assemble_vector_generic(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& vector, const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect, const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& primal,
1356 const SpaceType& space, typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, const DataType* cub_wg, int num_cubs, DataType alpha) const
1357 {
1358 DataType* vec_data = vector.template elements<LAFEM::Perspective::pod>();
1359 const DataType* conv_data = convect.template elements<LAFEM::Perspective::pod>();
1360 const DataType* primal_data = primal.template elements<LAFEM::Perspective::pod>();
1361
1362 VoxelAssembly::AssemblyCubatureData<DataType> cub_data = {(void*)cub_pt, cub_wg, num_cubs};
1363 VoxelAssembly::AssemblyMappingData<DataType, IndexType> mapping_data = this->mesh_data.get_assembly_field();
1364 auto burgers_params = this->wrap_burgers_params();
1365 auto material_params = wrap_material_params();
1366
1367
1368 VoxelAssembly::Arch::assemble_burgers_velo_material_defect_host(space, vec_data, conv_data, primal_data, cub_data, mapping_data,
1369 this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha,
1370 burgers_params, material_params, material_type);
1371 //free resources
1372 }
1373
1374 #ifdef FEAT_HAVE_CUDA
1375 void assemble_matrix1_cuda(LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, dim>& matrix, const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect,
1376 const SpaceType& space, typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, const DataType* cub_wg, int num_cubs, DataType alpha) const
1377 {
1378 VoxelAssembly::CSRMatrixData<DataType, IndexType> mat_data = {matrix.template val<LAFEM::Perspective::pod>(), matrix.row_ptr(), matrix.col_ind(), matrix.rows(), matrix.columns()};
1379 const DataType* vec_data = convect.template elements<LAFEM::Perspective::pod>();
1380
1381 typedef typename Cubature::Rule<ShapeType, DataType, DataType>::PointType CubPointType;
1382 //initialize all necessary pointer arrays and values //maybe more sense to specify cubature rule and set this to a const mem location?
1383 void* cub_pt_device = Util::cuda_malloc(Index(num_cubs) * sizeof(CubPointType));
1384 Util::cuda_copy_host_to_device(cub_pt_device, (void*)cub_pt, Index(num_cubs) * sizeof(CubPointType));
1385
1386 void* cub_wg_device = Util::cuda_malloc(Index(num_cubs) * sizeof(DataType));
1387 Util::cuda_copy_host_to_device(cub_wg_device, (void*)cub_wg, Index(num_cubs) * sizeof(DataType));
1388
1389 VoxelAssembly::AssemblyCubatureData<DataType> d_cub_data = {cub_pt_device, (DataType*)cub_wg_device, num_cubs};
1390 VoxelAssembly::AssemblyMappingData<DataType, IndexType> d_mapping_data = this->mesh_data.get_assembly_field();
1391 auto burgers_params = this->wrap_burgers_params();
1392 auto material_params = wrap_material_params();
1393
1394 VoxelAssembly::Arch::assemble_burgers_velo_material_csr(space, mat_data, vec_data, d_cub_data, d_mapping_data,
1395 this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha, burgers_params, material_params, material_type,
1396 this->shared_mem, this->blocksize, this->gridsize, this->print_occupancy);
1397 Util::cuda_free(cub_wg_device);
1398 Util::cuda_free(cub_pt_device);
1399 }
1400
1401
1402 void assemble_vector_cuda(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& vector, const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& convect, const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>& primal,
1403 const SpaceType& space, typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, const DataType* cub_wg, int num_cubs, DataType alpha) const
1404 {
1405 DataType* vec_data = vector.template elements<LAFEM::Perspective::pod>();
1406 const DataType* conv_data = convect.template elements<LAFEM::Perspective::pod>();
1407 const DataType* primal_data = primal.template elements<LAFEM::Perspective::pod>();
1408
1409 typedef typename Cubature::Rule<ShapeType, DataType, DataType>::PointType CubPointType;
1410 //initialize all necessary pointer arrays and values //maybe more sense to specify cubature rule and set this to a const mem location?
1411 void* cub_pt_device = Util::cuda_malloc(Index(num_cubs) * sizeof(CubPointType));
1412 Util::cuda_copy_host_to_device(cub_pt_device, (void*)cub_pt, Index(num_cubs) * sizeof(CubPointType));
1413
1414 void* cub_wg_device = Util::cuda_malloc(Index(num_cubs) * sizeof(DataType));
1415 Util::cuda_copy_host_to_device(cub_wg_device, (void*)cub_wg, Index(num_cubs) * sizeof(DataType));
1416
1417 VoxelAssembly::AssemblyCubatureData<DataType> d_cub_data = {cub_pt_device, (DataType*)cub_wg_device, num_cubs};
1418 VoxelAssembly::AssemblyMappingData<DataType, IndexType> d_mapping_data = this->mesh_data.get_assembly_field();
1419 auto burgers_params = this->wrap_burgers_params();
1420 auto material_params = wrap_material_params();
1421
1422 VoxelAssembly::Arch::assemble_burgers_velo_material_defect(space, vec_data, conv_data, primal_data, d_cub_data, d_mapping_data,
1423 this->mesh_data.get_coloring_maps(), this->mesh_data.get_color_map_sizes(), alpha, burgers_params, material_params, material_type,
1424 this->shared_mem, this->blocksize, this->gridsize, this->print_occupancy);
1425 Util::cuda_free(cub_wg_device);
1426 Util::cuda_free(cub_pt_device);
1427 }
1428
1429 #else //FEAT_HAVE_CUDA
1430 void assemble_matrix1_cuda(LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, dim>& DOXY(matrix), const SpaceType& DOXY(space), typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* DOXY(cub_pt), const DataType* DOXY(cub_wg), int DOXY(num_cubs), DataType DOXY(alpha)) const
1431 {
1432 XABORTM("What in the nine hells are you doing here?");
1433 }
1434
1435 void assemble_vector_cuda(LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&, const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&, const LAFEM::DenseVectorBlocked<DataType, IndexType, dim>&,
1436 const SpaceType&, typename Cubature::Rule<ShapeType, DataType, DataType>::PointType*, const DataType*, int, DataType) const
1437 {
1438 XABORTM("This call was logged and your local FBI agent was informed about your transgression");
1439 }
1440
1441 #endif
1442
1443 }; // class VoxelBurgersAssembler<Q2StandardCube>
1444#endif
1445 }
1446}
1447
1448#endif
#define XABORTM(msg)
Abortion macro definition with custom message.
Definition: assertion.hpp:192
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Definition: assertion.hpp:263
FEAT Kernel base header.
Cubature Rule class template.
Definition: rule.hpp:38
Blocked Dense data vector class template.
Index size() const
The number of elements.
CSR based blocked sparse matrix.
Index rows() const
Retrieve matrix row count.
IT_ * col_ind()
Retrieve column indices array.
IT_ * row_ptr()
Retrieve row start index array.
Index columns() const
Retrieve matrix column count.
Standard Lagrange-2 Finite-Element space class template.
Definition: element.hpp:39
Index get_num_dofs() const
Returns the number of dofs.
Definition: element.hpp:121
Tiny Matrix class template.
Tiny Vector class template.
CUDA_HOST_DEVICE Vector & set_mat_vec_mult(const Matrix< T_, n_, m_, sma_, sna_ > &a, const Vector< T_, m_, sx_ > &x)
Sets this vector to the result of a matrix-vector product.
Q2Lagrange thread parallel assembly class for the burgers operator.
Burgers Voxel Assembly template.
VoxelBurgersVeloMaterialAssembler(const SpaceType &space, const ColoringType_ &coloring, int hint=-1)
Constructor for burgers velo material assembler.
void assemble_matrix1(LAFEM::SparseMatrixBCSR< DataType, IndexType, dim, dim > &matrix, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &convect, const SpaceType &space, const CubatureFactory_ &cubature_factory, DataType alpha=DataType(1)) const
Assembles the burgers velo material operator for finitie element space with same test and trial space...
void assemble_vector(LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &vector, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &convect, const LAFEM::DenseVectorBlocked< DataType, IndexType, dim > &primal, const SpaceType &space, const CubatureFactory_ &cubature_factory, DataType alpha=DataType(1)) const
Assembles the application of the burgers operator to a given primal vector.
VoxelAssembly::AssemblyMaterialData< DataType > wrap_material_params() const
Wraps the set burgers parameter into convenient struct.
Burgers Velocity Material Voxel Assembly template.
T_ sqrt(T_ x)
Returns the square-root of a value.
Definition: math.hpp:300
T_ abs(T_ x)
Returns the absolute value.
Definition: math.hpp:275
T_ pow(T_ x, T_ y)
Returns x raised to the power of y.
Definition: math.hpp:643
CUDA_HOST_DEVICE T_ dot(const T_ &a, const T_ &b)
Computes the dot-product of two scalars.
CUDA_HOST_DEVICE void axpy(T_ &y, const T_ &x, const T_ &alpha)
Performs an AXPY of two scalars.
void assemble_burgers_velo_material_defect_host(const Space_ &space, DT_ *vector_data, const DT_ *conv_data, const DT_ *primal_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType material_type)
Host kernel wrapper for the defect burgers assembler.
void assemble_burgers_velo_material_csr(const Space_ &space, const CSRMatrixData< DT_, IT_ > &matrix_data, const DT_ *conv_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType mat_type, int shared_mem, int blocksize, int gridsize, bool print_occupancy)
Device kernel wrapper for the full matrix burgers assembler.
void assemble_burgers_velo_material_csr_host(const Space_ &space, const CSRMatrixData< DT_, IT_ > &matrix_data, const DT_ *conv_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType material_type)
Host kernel wrapper for the full matrix burgers assembler.
void assemble_burgers_velo_material_defect(const Space_ &space, DT_ *vector_data, const DT_ *conv_data, const DT_ *primal_data, const AssemblyCubatureData< DT_ > &cubature, const AssemblyMappingData< DT_, IT_ > &dof_mapping, const std::vector< int * > &coloring_maps, const std::vector< Index > &coloring_map_sizes, DT_ alpha, const AssemblyBurgersData< DT_ > &burgers_params, const AssemblyMaterialData< DT_ > &material_params, MaterialType material_type, int shared_mem, int blocksize, int gridsize, bool print_occupancy)
Device kernel wrapper for the defect burgers assembler.
CUDA_HOST_DEVICE void burgers_velo_material_mat_assembly_kernel(LocMatType_ &loc_mat, const LocVecType_ &local_conv_dofs, const Tiny::Matrix< typename SpaceHelp_::DataType, dim_, num_verts_ > &local_coeffs, const typename SpaceHelp_::DomainPointType *cub_pt, const typename SpaceHelp_::DataType *cub_wg, const int num_cubs, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const VoxelAssembly::AssemblyMaterialData< typename SpaceHelp_::DataType > &material_params, const bool need_convection, const typename SpaceHelp_::DataType tol_eps, ViscFunc_ visco_func, ViscDerFunc_ visco_d_func)
Burgers Velo Material Matrix assembly kernel.
CUDA_HOST_DEVICE void burgers_velo_material_defect_assembly_kernel(LocVecType_ &loc_vec, const LocVecType_ &local_prim_dofs, const LocVecType_ &local_conv_dofs, const Tiny::Matrix< typename SpaceHelp_::DataType, dim_, num_verts_ > &local_coeffs, const typename SpaceHelp_::DomainPointType *cub_pt, const typename SpaceHelp_::DataType *cub_wg, const int num_cubs, const VoxelAssembly::AssemblyBurgersData< typename SpaceHelp_::DataType > &burgers_params, const bool need_convection, ViscFunc_ visco_func)
Burgers Vector assembly kernel.
MaterialType
Enum for different material types.
FEAT namespace.
Definition: adjactor.hpp:12
@ value
specifies whether the space should supply basis function values
std::uint64_t Index
Index data type.
@ dom_point
specifies whether the trafo should supply domain point coordinates
A data field for all necessary values that define the dof mapping for assembly.
Data for material burgers assembler.
Helper struct wrapping the data required as shared data.