9#include <kernel/backend.hpp> 
   10#include <kernel/voxel_assembly/voxel_assembly_common.hpp> 
   11#include <kernel/voxel_assembly/helper/data_handler.hpp> 
   12#include <kernel/voxel_assembly/helper/space_helper.hpp> 
   13#include <kernel/cubature/rule.hpp> 
   14#include <kernel/lafem/sparse_matrix_bcsr.hpp> 
   15#include <kernel/trafo/standard/mapping.hpp> 
   16#include <kernel/geometry/conformal_mesh.hpp> 
   17#include <kernel/util/tiny_algebra.hpp> 
   20#include <kernel/util/cuda_util.hpp> 
   25  namespace VoxelAssembly
 
   36    template<
typename Space_, 
typename DT_, 
typename IT_>
 
   41      template<
typename SpaceHelp_, 
typename LocMatType_, 
int dim_, 
int num_verts_>
 
   43                                          const typename SpaceHelp_::DomainPointType* cub_pt, 
const typename SpaceHelp_::DataType* cub_wg, 
const int num_cubs,
 
   44                                          const typename SpaceHelp_::DataType nu)
 
   46        typedef SpaceHelp_ SpaceHelp;
 
   48        typedef typename SpaceHelp::SpaceType SpaceType;
 
   49        typedef typename SpaceHelp::DataType DataType;
 
   52        constexpr int num_loc_dofs = SpaceType::DofMappingType::dof_count;
 
   56        typename SpaceHelp::JacobianMatrixType loc_jac, loc_jac_inv;
 
   57        typename SpaceHelp::DomainPointType 
dom_point;
 
   58        typename SpaceHelp::EvalData basis_data;
 
   61        for(
int cub_ind = 0; cub_ind < num_cubs; ++cub_ind)
 
   65          SpaceHelp::calc_jac_mat(loc_jac, 
dom_point, local_coeffs);
 
   66          loc_jac_inv.set_inverse(loc_jac);
 
   68          SpaceHelp::eval_ref_gradients(basis_data, 
dom_point);
 
   69          SpaceHelp::trans_gradients(basis_data, loc_jac_inv);
 
   71          const DataType weight = loc_jac.det() * cub_wg[cub_ind];
 
   72          for(
int i = 0; i < num_loc_dofs; ++i)
 
   74            for(
int j = 0; j < num_loc_dofs; ++j)
 
   77              loc_mat[i][j].add_scalar_main_diag(nu * weight * 
Tiny::dot(basis_data.phi[i].grad, basis_data.phi[j].grad));
 
   78              loc_mat[i][j].add_outer_product(basis_data.phi[j].grad, basis_data.phi[i].grad, nu * weight);
 
   90      template<
typename Space_, 
typename DT_, 
typename IT_>
 
   91      void assemble_defo_csr(
const Space_& space,
 
   92              const CSRMatrixData<DT_, IT_>& matrix_data,
 
   93              const AssemblyCubatureData<DT_>& cubature,
 
   94              const AssemblyMappingData<DT_, IT_>& dof_mapping,
 
   95              const std::vector<int*>& coloring_maps,
 
   96              const std::vector<Index>& coloring_map_sizes,
 
  101      template<
typename Space_, 
typename DT_, 
typename IT_>
 
  102      void assemble_defo_csr_host(
const Space_& space,
 
  103              const CSRMatrixData<DT_, IT_>& matrix_data,
 
  104              const AssemblyCubatureData<DT_>& cubature,
 
  105              const AssemblyMappingData<DT_, IT_>& dof_mapping,
 
  106              const std::vector<int*>& coloring_maps,
 
  107              const std::vector<Index>& coloring_map_sizes,
 
  114    template<
int dim_, 
typename DT_, 
typename IT_>
 
  122      typedef typename SpaceHelp::ShapeType ShapeType;
 
  123      typedef typename SpaceHelp::DataType DataType;
 
  124      typedef typename SpaceHelp::IndexType IndexType;
 
  127      static constexpr int dim = SpaceHelp::dim;
 
  129      typedef typename SpaceHelp::DomainPointType DomainPointType;
 
  130      typedef typename SpaceHelp::ImagePointType ImagePointType;
 
  131      typedef typename SpaceHelp::ValueType ValueType;
 
  132      typedef typename SpaceHelp::JacobianMatrixType JacobianMatrixType;
 
  144      template<
typename ColoringType_>
 
  146      mesh_data(space, coloring, hint), nu(DataType(0.))
 
  148        #ifdef FEAT_HAVE_CUDA 
  150        const std::size_t stack_limit = Util::cuda_get_max_cache_thread();
 
  151        const std::size_t stack_limit_target = 
sizeof(DataType) * (dim == 3 ? 8096u : 1012u);
 
  152        if(stack_limit < stack_limit_target)
 
  153          Util::cuda_set_max_cache_thread(stack_limit_target);
 
  170      template<
typename CubatureFactory_>
 
  173        XASSERTM(matrix.
rows() == space.get_num_dofs(), 
"invalid matrix dimensions");
 
  174        XASSERTM(matrix.
columns() == space.get_num_dofs(), 
"invalid matrix dimensions");
 
  178        CubatureRuleType cubature(Cubature::ctor_factory, cubature_factory);
 
  181        int num_cubs = cubature.get_num_points();
 
  182        typename CubatureRuleType::PointType* cub_pt = cubature.get_points();
 
  183        DataType* cub_wg = cubature.get_weights();
 
  185        BACKEND_SKELETON_VOID(assemble_matrix1_cuda, assemble_matrix1_generic, assemble_matrix1_generic, matrix, space, cub_pt, cub_wg, num_cubs, alpha)
 
  189      void assemble_matrix1_generic(LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, dim>& matrix, 
const SpaceType& space, 
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, 
const DataType* cub_wg, 
int num_cubs, DataType alpha)
 const 
  191        CSRMatrixData<DataType, IndexType> mat_data = {matrix.template val<LAFEM::Perspective::pod>(), matrix.row_ptr(), matrix.col_ind(), matrix.rows(), matrix.columns()};
 
  193        AssemblyCubatureData<DataType> cub_data = {(
void*)cub_pt, cub_wg, num_cubs};
 
  194        AssemblyMappingData<DataType, IndexType> mapping_data = mesh_data.get_assembly_field();
 
  197        VoxelAssembly::Arch::assemble_defo_csr_host(space, mat_data, cub_data, mapping_data, mesh_data.get_coloring_maps(), mesh_data.get_color_map_sizes(), alpha, nu);
 
  200      #ifdef FEAT_HAVE_CUDA 
  201      void assemble_matrix1_cuda(LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, dim>& matrix, 
const SpaceType& space, 
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* cub_pt, 
const DataType* cub_wg, 
int num_cubs, DataType alpha)
 const 
  203        VoxelAssembly::CSRMatrixData<DataType, IndexType> mat_data = {matrix.template val<LAFEM::Perspective::pod>(), matrix.row_ptr(), matrix.col_ind(), matrix.rows(), matrix.columns()};
 
  205        typedef typename Cubature::Rule<ShapeType, DataType, DataType>::PointType CubPointType;
 
  207        void* cub_pt_device = Util::cuda_malloc(num_cubs * 
sizeof(CubPointType));
 
  208        Util::cuda_copy_host_to_device(cub_pt_device, (
void*)cub_pt, num_cubs * 
sizeof(CubPointType));
 
  210        void* cub_wg_device = Util::cuda_malloc(num_cubs * 
sizeof(DataType));
 
  211        Util::cuda_copy_host_to_device(cub_wg_device, (
void*)cub_wg, num_cubs * 
sizeof(DataType));
 
  213        VoxelAssembly::AssemblyCubatureData<DataType> d_cub_data = {cub_pt_device, (DataType*)cub_wg_device, num_cubs};
 
  214        VoxelAssembly::AssemblyMappingData<DataType, IndexType> d_mapping_data = mesh_data.get_assembly_field();
 
  217        VoxelAssembly::Arch::assemble_defo_csr(space, mat_data, d_cub_data, d_mapping_data,  mesh_data.get_coloring_maps(), mesh_data.get_color_map_sizes(), alpha, nu);
 
  219        Util::cuda_free(cub_wg_device);
 
  220        Util::cuda_free(cub_pt_device);
 
  224      void assemble_matrix1_cuda(LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, dim>& DOXY(matrix), 
const SpaceType& DOXY(space), 
typename Cubature::Rule<ShapeType, DataType, DataType>::PointType* DOXY(cub_pt), 
const DataType* DOXY(cub_wg), 
int DOXY(num_cubs), DataType DOXY(alpha))
 const 
  226        XABORTM(
"What in the nine hells are you doing here?");
 
#define XABORTM(msg)
Abortion macro definition with custom message.
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Cubature Rule class template.
CSR based blocked sparse matrix.
Index rows() const
Retrieve matrix row count.
Index columns() const
Retrieve matrix column count.
Standard Lagrange-2 Finite-Element space class template.
Tiny Matrix class template.
Data handler for Lagrange based FE spaces.
DataType nu
Scaling parameter.
Q2StandardHyperCube< dim_ > SpaceType
typedefs
DataHandler mesh_data
The datahandler.
Deformation Voxel Assembly template.
CUDA_HOST_DEVICE T_ dot(const T_ &a, const T_ &b)
Computes the dot-product of two scalars.
@ dom_point
specifies whether the trafo should supply domain point coordinates