FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
memory_pool.hpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#pragma once
7
8// includes, FEAT
12#include <kernel/util/cuda_util.hpp>
13#include <kernel/util/random.hpp>
14#include <kernel/backend.hpp>
15
16#include <map>
17#include <cstring>
18#include <typeinfo>
19#include <cstdio>
20#include <cstddef>
21
22#ifdef FEAT_HAVE_MKL
23FEAT_DISABLE_WARNINGS
24#include <mkl.h>
25FEAT_RESTORE_WARNINGS
26#endif
27
28
29namespace FEAT
30{
31 namespace Util
32 {
34 namespace Intern
35 {
36 struct MemoryInfo
37 {
38 Index counter;
39 Index size;
40 };
41 }
43 } // namespace Util
44
53 {
54 private:
56 static std::map<void*, Util::Intern::MemoryInfo> _pool;
57
58 public:
59
61 static void initialize()
62 {
63 }
64
66 static void finalize()
67 {
68 if (_pool.size() > 0)
69 {
70 std::cout << stderr << " Error: MemoryPool still contains memory chunks on deconstructor call\n";
71 std::exit(1);
72 }
73
74#ifdef FEAT_HAVE_MKL
75 mkl_free_buffers();
76#endif
77 }
78
80 template <typename DT_>
81 static DT_ * allocate_memory(Index count)
82 {
83 DT_ * memory(nullptr);
84
85 if (count == 0)
86 return memory;
87
88 if (count%4 != 0)
89 count = count + (4ul - count%4);
90
91#ifdef FEAT_HAVE_CUDA
92 memory = (DT_*)Util::cuda_malloc_managed(count * sizeof(DT_));
93#else
94 memory = (DT_*)::malloc(count * sizeof(DT_));
95#endif
96 if (memory == nullptr)
97 XABORTM("MemoryPool allocation error!");
98
99 Util::Intern::MemoryInfo mi;
100 mi.counter = 1;
101 mi.size = count * sizeof(DT_);
102 _pool.insert(std::pair<void*, Util::Intern::MemoryInfo>(memory, mi));
103
104 return memory;
105 }
106
108 static void increase_memory(void * address)
109 {
110 XASSERT(address != nullptr);
111
112 std::map<void*, Util::Intern::MemoryInfo>::iterator it(_pool.find(address));
113 if (it != _pool.end())
114 {
115 it->second.counter = it->second.counter + 1;
116 return;
117 }
118
119 XABORTM("MemoryPool::increase_memory: Memory address not found!");
120 }
121
123 static void release_memory(void * address)
124 {
125 if (address == nullptr)
126 return;
127
128 std::map<void*, Util::Intern::MemoryInfo>::iterator it(_pool.find(address));
129 if (it != _pool.end())
130 {
131 if(it->second.counter == 1)
132 {
133#ifdef FEAT_HAVE_CUDA
134 Util::cuda_free(address);
135#else
136 ::free(address);
137#endif
138 _pool.erase(it);
139 }
140 else
141 {
142 it->second.counter = it->second.counter - 1;
143 }
144 return;
145 }
146
147 XABORTM("MemoryPool::release_memory: Memory address not found!");
148 }
149
151 template <typename DT_>
152 [[deprecated("no download necessary in unified memory environment.")]]
153 inline static void download(DT_ * dest, const DT_ * const src, const Index count)
154 {
155 if (dest == src)
156 return;
157
158 ::memcpy(dest, src, count * sizeof(DT_));
159 }
160
162 template <typename DT_>
163 [[deprecated("no upload necessary in unified memory environment.")]]
164 inline static void upload(DT_ * dest, const DT_ * const src, const Index count)
165 {
166 if (dest == src)
167 return;
168
169 ::memcpy(dest, src, count * sizeof(DT_));
170 }
171
173 template <typename DT_>
174 [[deprecated("no get_element necessary in unified memory environment.")]]
175 inline static const DT_ & get_element(const DT_ * data, const Index index)
176 {
177 return data[index];
178 }
179
181 template <typename DT_>
182 static void set_memory(DT_ * address, const DT_ val, const Index count = 1)
183 {
185 {
186#ifdef FEAT_HAVE_CUDA
188 {
189 FEAT::Util::cuda_set_memory(address, val, count);
190 return;
191 }
192#endif
194 default:
195 {
196 FEAT_PRAGMA_OMP(parallel for)
197 for (Index i = 0 ; i < count ; ++i)
198 {
199 address[i] = val;
200 }
201 return;
202 }
203 }
204 }
205
207 template <typename DT_>
208 static void set_memory(Random& rng, DT_ min, DT_ max, DT_ * address, const Index count = 1)
209 {
211 {
212#ifdef FEAT_HAVE_CUDA
214 {
215 std::vector<DT_> tmp(count);
216 std::generate(tmp.begin(), tmp.end(), [&](){return rng(min,max);});
217 FEAT::Util::cuda_copy(address, tmp.data(), count*sizeof(DT_));
218 return;
219 }
220#endif
222 default:
223 {
224 // we don't use OpenMP here, because this would result in a non-deterministic vector
225 for (Index i(0) ; i < count ; ++i)
226 {
227 address[i] = rng(min, max);
228 }
229 return;
230 }
231 }
232 }
233
235 template <typename DT_>
236 static void copy(DT_ * dest, const DT_ * src, const Index count)
237 {
238 if (dest == src)
239 return;
240
242 {
243#ifdef FEAT_HAVE_CUDA
245 {
246 FEAT::Util::cuda_copy(dest, src, count * sizeof(DT_));
247 return;
248 }
249#endif
251 default:
252 {
253#ifdef FEAT_HAVE_OMP
254 FEAT_PRAGMA_OMP(parallel for)
255 for (Index i = 0 ; i < count ; ++i)
256 {
257 dest[i] = src[i];
258 }
259#else
260 ::memcpy(dest, src, count * sizeof(DT_));
261#endif
262 return;
263 }
264 }
265 }
266
268 template <typename DT_>
269 static void convert(DT_ * dest, const DT_ * src, const Index count)
270 {
271 if (dest == src)
272 return;
273
274#ifdef FEAT_HAVE_OMP
275 FEAT_PRAGMA_OMP(parallel for)
276 for (Index i = 0 ; i < count ; ++i)
277 {
278 dest[i] = src[i];
279 }
280#else
281 ::memcpy(dest, src, count * sizeof(DT_));
282#endif
283 }
284
286 template <typename DT1_, typename DT2_>
287 static void convert(DT1_ * dest, const DT2_ * src, const Index count)
288 {
289
291 {
292#ifdef FEAT_HAVE_CUDA
294 {
295 FEAT::Util::cuda_convert(dest, src, count);
296 return;
297 }
298#endif
300 default:
301 {
302 FEAT_PRAGMA_OMP(parallel for)
303 for (Index i = 0 ; i < count ; ++i)
304 {
305 dest[i] = DT1_(src[i]);
306 }
307 return;
308 }
309 }
310 }
311
312 NOINLINE static void synchronize()
313 {
314#ifdef FEAT_HAVE_CUDA
315 //avoid calling expensive synchronize if cuda is not actively used
317 FEAT::Util::cuda_synchronize();
318#endif
319 }
320
321 static Index allocated_memory()
322 {
323 Index bytes(0);
324 for (auto& i : _pool)
325 {
326 bytes += i.second.size;
327 }
328 return bytes;
329 }
330
331 static Index allocated_size(void * address)
332 {
333 std::map<void*, Util::Intern::MemoryInfo>::iterator it(_pool.find(address));
334 if (it != _pool.end())
335 {
336 return it->second.size;
337 }
338 else
339 XABORTM("MemoryPool::allocated_size: Memory address not found!");
340 }
341 };
342
343} // namespace FEAT
#define XABORTM(msg)
Abortion macro definition with custom message.
Definition: assertion.hpp:192
#define XASSERT(expr)
Assertion macro definition.
Definition: assertion.hpp:262
FEAT Kernel base header.
static PreferredBackend get_preferred_backend()
get current preferred backend
Definition: backend.cpp:27
Memory management.
Definition: memory_pool.hpp:53
static void copy(DT_ *dest, const DT_ *src, const Index count)
Copy memory area from src to dest.
static void convert(DT_ *dest, const DT_ *src, const Index count)
Copy memory area from src to dest.
static const DT_ & get_element(const DT_ *data, const Index index)
receive element
static void initialize()
Setup memory pools.
Definition: memory_pool.hpp:61
static std::map< void *, Util::Intern::MemoryInfo > _pool
Map of all memory chunks in use.
Definition: memory_pool.hpp:56
static void convert(DT1_ *dest, const DT2_ *src, const Index count)
Convert datatype DT2_ from src into DT1_ in dest.
static void download(DT_ *dest, const DT_ *const src, const Index count)
download memory chunk to host memory
static void set_memory(DT_ *address, const DT_ val, const Index count=1)
set memory to specific value
static void upload(DT_ *dest, const DT_ *const src, const Index count)
upload memory chunk from host memory to device memory
static void set_memory(Random &rng, DT_ min, DT_ max, DT_ *address, const Index count=1)
set memory to specific value
static void finalize()
Shutdown memory pool and clean up allocated memory pools.
Definition: memory_pool.hpp:66
static DT_ * allocate_memory(Index count)
allocate new memory
Definition: memory_pool.hpp:81
static void release_memory(void *address)
release memory or decrease reference counter
static void increase_memory(void *address)
increase memory counter
Pseudo-Random Number Generator.
Definition: random.hpp:54
FEAT namespace.
Definition: adjactor.hpp:12
std::uint64_t Index
Index data type.