FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
mirror.cu
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6// includes, FEAT
7#include <kernel/base_header.hpp>
8#include <kernel/lafem/arch/mirror.hpp>
9#include <kernel/util/exception.hpp>
10#include <kernel/util/memory_pool.hpp>
11
12/// \cond internal
13namespace FEAT
14{
15 namespace LAFEM
16 {
17 namespace Intern
18 {
19 template<typename DT_, typename IT_>
20 __global__ void cuda_mirror_gather_dv(const Index boff, const Index nidx, const IT_* idx, DT_* buf, const DT_* vec)
21 {
22 Index ii = threadIdx.x + blockDim.x * blockIdx.x;
23 if (ii >= nidx)
24 return;
25
26 buf[boff+ii] = vec[idx[ii]];
27 }
28
29 template<typename DT_, typename IT_>
30 __global__ void cuda_mirror_scatter_dv(const Index boff, const Index nidx, const IT_* idx, const DT_* buf, DT_* vec, const DT_ alpha)
31 {
32 Index ii = threadIdx.x + blockDim.x * blockIdx.x;
33 if (ii >= nidx)
34 return;
35
36 vec[idx[ii]] += alpha*buf[boff+ii];
37 }
38
39 template<typename DT_, typename IT_>
40 __global__ void cuda_mirror_gather_dvb(const Index bs, const Index boff, const Index nidx, const IT_* idx, DT_* buf, const DT_* vec)
41 {
42 Index ii = threadIdx.x + blockDim.x * blockIdx.x;
43 if (ii >= nidx)
44 return;
45
46 for(Index k(0); k < bs; ++k)
47 {
48 buf[boff+ii*bs+k] = vec[idx[ii]*bs+k];
49 }
50 }
51
52 template<typename DT_, typename IT_>
53 __global__ void cuda_mirror_scatter_dvb(const Index bs, const Index boff, const Index nidx, const IT_* idx, const DT_* buf, DT_* vec, const DT_ alpha)
54 {
55 Index ii = threadIdx.x + blockDim.x * blockIdx.x;
56 if (ii >= nidx)
57 return;
58
59 for(Index k(0); k < bs; ++k)
60 {
61 vec[idx[ii]*bs+k] += alpha*buf[boff+ii*bs+k];
62 }
63 }
64 } // namespace Intern
65
66 namespace Arch
67 {
68 template<typename DT_, typename IT_>
69 void Mirror::gather_dv_cuda(const Index boff, const Index nidx, const IT_* idx, DT_* buf, const DT_* vec)
70 {
71 Index blocksize = Util::cuda_blocksize_misc;
72 dim3 grid;
73 dim3 block;
74 block.x = (unsigned)blocksize;
75 grid.x = (unsigned)ceil((nidx)/(double)(block.x));
76
77 FEAT::LAFEM::Intern::cuda_mirror_gather_dv<<<grid, block>>>(boff, nidx, idx, buf, vec);
78
79 cudaDeviceSynchronize();
80#ifdef FEAT_DEBUG_MODE
81 cudaError_t last_error(cudaGetLastError());
82 if (cudaSuccess != last_error)
83 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
84#endif
85 }
86
87 template void Mirror::gather_dv_cuda(const Index, const Index, const std::uint64_t*, float*, const float*);
88 template void Mirror::gather_dv_cuda(const Index, const Index, const std::uint64_t*, double*, const double*);
89 template void Mirror::gather_dv_cuda(const Index, const Index, const std::uint32_t*, float*, const float*);
90 template void Mirror::gather_dv_cuda(const Index, const Index, const std::uint32_t*, double*, const double*);
91
92 template<typename DT_, typename IT_>
93 void Mirror::scatter_dv_cuda(const Index boff, const Index nidx, const IT_* idx, const DT_* buf, DT_* vec, const DT_ alpha)
94 {
95 Index blocksize = Util::cuda_blocksize_misc;
96 dim3 grid;
97 dim3 block;
98 block.x = (unsigned)blocksize;
99 grid.x = (unsigned)ceil((nidx)/(double)(block.x));
100
101 FEAT::LAFEM::Intern::cuda_mirror_scatter_dv<<<grid, block>>>(boff, nidx, idx, buf, vec, alpha);
102
103 cudaDeviceSynchronize();
104#ifdef FEAT_DEBUG_MODE
105 cudaError_t last_error(cudaGetLastError());
106 if (cudaSuccess != last_error)
107 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
108#endif
109 }
110
111 template void Mirror::scatter_dv_cuda(const Index, const Index, const std::uint64_t*, const float*, float*, float);
112 template void Mirror::scatter_dv_cuda(const Index, const Index, const std::uint64_t*, const double*, double*, double);
113 template void Mirror::scatter_dv_cuda(const Index, const Index, const std::uint32_t*, const float*, float*, float);
114 template void Mirror::scatter_dv_cuda(const Index, const Index, const std::uint32_t*, const double*, double*, double);
115
116 template<typename DT_, typename IT_>
117 void Mirror::gather_dvb_cuda(const Index bs, const Index boff, const Index nidx, const IT_* idx, DT_* buf, const DT_* vec)
118 {
119 Index blocksize = Util::cuda_blocksize_misc;
120 dim3 grid;
121 dim3 block;
122 block.x = (unsigned)blocksize;
123 grid.x = (unsigned)ceil((nidx)/(double)(block.x));
124
125 FEAT::LAFEM::Intern::cuda_mirror_gather_dvb<<<grid, block>>>(bs, boff, nidx, idx, buf, vec);
126
127 cudaDeviceSynchronize();
128#ifdef FEAT_DEBUG_MODE
129 cudaError_t last_error(cudaGetLastError());
130 if (cudaSuccess != last_error)
131 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
132#endif
133 }
134
135 template void Mirror::gather_dvb_cuda(const Index, const Index, const Index, const std::uint64_t*, float*, const float*);
136 template void Mirror::gather_dvb_cuda(const Index, const Index, const Index, const std::uint64_t*, double*, const double*);
137 template void Mirror::gather_dvb_cuda(const Index, const Index, const Index, const std::uint32_t*, float*, const float*);
138 template void Mirror::gather_dvb_cuda(const Index, const Index, const Index, const std::uint32_t*, double*, const double*);
139
140 template<typename DT_, typename IT_>
141 void Mirror::scatter_dvb_cuda(const Index bs, const Index boff, const Index nidx, const IT_* idx, const DT_* buf, DT_* vec, const DT_ alpha)
142 {
143 Index blocksize = Util::cuda_blocksize_misc;
144 dim3 grid;
145 dim3 block;
146 block.x = (unsigned)blocksize;
147 grid.x = (unsigned)ceil((nidx)/(double)(block.x));
148
149 FEAT::LAFEM::Intern::cuda_mirror_scatter_dvb<<<grid, block>>>(bs, boff, nidx, idx, buf, vec, alpha);
150
151 cudaDeviceSynchronize();
152#ifdef FEAT_DEBUG_MODE
153 cudaError_t last_error(cudaGetLastError());
154 if (cudaSuccess != last_error)
155 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
156#endif
157 }
158
159 template void Mirror::scatter_dvb_cuda(const Index, const Index, const Index, const std::uint64_t*, const float*, float*, float);
160 template void Mirror::scatter_dvb_cuda(const Index, const Index, const Index, const std::uint64_t*, const double*, double*, double);
161 template void Mirror::scatter_dvb_cuda(const Index, const Index, const Index, const std::uint32_t*, const float*, float*, float);
162 template void Mirror::scatter_dvb_cuda(const Index, const Index, const Index, const std::uint32_t*, const double*, double*, double);
163 }
164 } // namespace LAFEM
165} // namespace FEAT
166
167/// \endcond