FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
memory_pool.hpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#pragma once
7
8// includes, FEAT
12#include <kernel/util/cuda_util.hpp>
13#include <kernel/util/random.hpp>
14#include <kernel/backend.hpp>
15
16#include <map>
17#include <cstring>
18#include <memory>
19#include <typeinfo>
20#include <cstdio>
21#include <cstddef>
22
23#ifdef FEAT_HAVE_MKL
24FEAT_DISABLE_WARNINGS
25#include <mkl.h>
26FEAT_RESTORE_WARNINGS
27#endif
28
29
30namespace FEAT
31{
32 namespace Util
33 {
35 namespace Intern
36 {
37 struct MemoryInfo
38 {
39 Index counter;
40 Index size;
41 };
42 }
44 } // namespace Util
45
54 {
55 private:
57 static std::map<void*, Util::Intern::MemoryInfo> _pool;
58
59 public:
60
62 static void initialize()
63 {
64 }
65
67 static void finalize()
68 {
69 if (_pool.size() > 0)
70 {
71 std::cout << stderr << " Error: MemoryPool still contains memory chunks on deconstructor call\n";
72 std::exit(1);
73 }
74
75#ifdef FEAT_HAVE_MKL
76 mkl_free_buffers();
77#endif
78 }
79
81 template <typename DT_>
82 static DT_ * allocate_memory(Index count)
83 {
84 DT_ * memory(nullptr);
85
86 if (count == 0)
87 return memory;
88
89 if (count%4 != 0)
90 count = count + (4ul - count%4);
91
92#ifdef FEAT_HAVE_CUDA
93 memory = (DT_*)Util::cuda_malloc_managed(count * sizeof(DT_));
94#else
95 memory = (DT_*)::malloc(count * sizeof(DT_));
96#endif
97 if (memory == nullptr)
98 XABORTM("MemoryPool allocation error!");
99
100 Util::Intern::MemoryInfo mi;
101 mi.counter = 1;
102 mi.size = count * sizeof(DT_);
103 _pool.insert(std::pair<void*, Util::Intern::MemoryInfo>(memory, mi));
104
105 std::uninitialized_fill(memory, memory + count, DT_(42));
106
107 return memory;
108 }
109
111 static void increase_memory(void * address)
112 {
113 XASSERT(address != nullptr);
114
115 std::map<void*, Util::Intern::MemoryInfo>::iterator it(_pool.find(address));
116 if (it != _pool.end())
117 {
118 it->second.counter = it->second.counter + 1;
119 return;
120 }
121
122 XABORTM("MemoryPool::increase_memory: Memory address not found!");
123 }
124
126 static void release_memory(void * address)
127 {
128 if (address == nullptr)
129 return;
130
131 std::map<void*, Util::Intern::MemoryInfo>::iterator it(_pool.find(address));
132 if (it != _pool.end())
133 {
134 if(it->second.counter == 1)
135 {
136#ifdef FEAT_HAVE_CUDA
137 Util::cuda_free(address);
138#else
139 ::free(address);
140#endif
141 _pool.erase(it);
142 }
143 else
144 {
145 it->second.counter = it->second.counter - 1;
146 }
147 return;
148 }
149
150 XABORTM("MemoryPool::release_memory: Memory address not found!");
151 }
152
154 template <typename DT_>
155 [[deprecated("no download necessary in unified memory environment.")]]
156 inline static void download(DT_ * dest, const DT_ * const src, const Index count)
157 {
158 if (dest == src)
159 return;
160
161 ::memcpy(dest, src, count * sizeof(DT_));
162 }
163
165 template <typename DT_>
166 [[deprecated("no upload necessary in unified memory environment.")]]
167 inline static void upload(DT_ * dest, const DT_ * const src, const Index count)
168 {
169 if (dest == src)
170 return;
171
172 ::memcpy(dest, src, count * sizeof(DT_));
173 }
174
176 template <typename DT_>
177 [[deprecated("no get_element necessary in unified memory environment.")]]
178 inline static const DT_ & get_element(const DT_ * data, const Index index)
179 {
180 return data[index];
181 }
182
184 template <typename DT_>
185 static void set_memory(DT_ * address, const DT_ val, const Index count = 1)
186 {
188 {
189#ifdef FEAT_HAVE_CUDA
191 {
192 FEAT::Util::cuda_set_memory(address, val, count);
193 return;
194 }
195#endif
197 default:
198 {
199 FEAT_PRAGMA_OMP(parallel for)
200 for (Index i = 0 ; i < count ; ++i)
201 {
202 address[i] = val;
203 }
204 return;
205 }
206 }
207 }
208
210 template <typename DT_>
211 static void set_memory(Random& rng, DT_ min, DT_ max, DT_ * address, const Index count = 1)
212 {
214 {
215#ifdef FEAT_HAVE_CUDA
217 {
218 std::vector<DT_> tmp(count);
219 std::generate(tmp.begin(), tmp.end(), [&](){return rng(min,max);});
220 FEAT::Util::cuda_copy(address, tmp.data(), count*sizeof(DT_));
221 return;
222 }
223#endif
225 default:
226 {
227 // we don't use OpenMP here, because this would result in a non-deterministic vector
228 for (Index i(0) ; i < count ; ++i)
229 {
230 address[i] = rng(min, max);
231 }
232 return;
233 }
234 }
235 }
236
238 template <typename DT_>
239 static void copy(DT_ * dest, const DT_ * src, const Index count)
240 {
241 if (dest == src)
242 return;
243
245 {
246#ifdef FEAT_HAVE_CUDA
248 {
249 FEAT::Util::cuda_copy(dest, src, count * sizeof(DT_));
250 return;
251 }
252#endif
254 default:
255 {
256#ifdef FEAT_HAVE_OMP
257 FEAT_PRAGMA_OMP(parallel for)
258 for (Index i = 0 ; i < count ; ++i)
259 {
260 dest[i] = src[i];
261 }
262#else
263 ::memcpy(dest, src, count * sizeof(DT_));
264#endif
265 return;
266 }
267 }
268 }
269
271 template <typename DT_>
272 static void convert(DT_ * dest, const DT_ * src, const Index count)
273 {
274 if (dest == src)
275 return;
276
277#ifdef FEAT_HAVE_OMP
278 FEAT_PRAGMA_OMP(parallel for)
279 for (Index i = 0 ; i < count ; ++i)
280 {
281 dest[i] = src[i];
282 }
283#else
284 ::memcpy(dest, src, count * sizeof(DT_));
285#endif
286 }
287
289 template <typename DT1_, typename DT2_>
290 static void convert(DT1_ * dest, const DT2_ * src, const Index count)
291 {
292
294 {
295#ifdef FEAT_HAVE_CUDA
297 {
298 FEAT::Util::cuda_convert(dest, src, count);
299 return;
300 }
301#endif
303 default:
304 {
305 FEAT_PRAGMA_OMP(parallel for)
306 for (Index i = 0 ; i < count ; ++i)
307 {
308 dest[i] = DT1_(src[i]);
309 }
310 return;
311 }
312 }
313 }
314
315 NOINLINE static void synchronize()
316 {
317#ifdef FEAT_HAVE_CUDA
318 //avoid calling expensive synchronize if cuda is not actively used
320 FEAT::Util::cuda_synchronize();
321#endif
322 }
323
324 static Index allocated_memory()
325 {
326 Index bytes(0);
327 for (auto& i : _pool)
328 {
329 bytes += i.second.size;
330 }
331 return bytes;
332 }
333
334 static Index allocated_size(void * address)
335 {
336 std::map<void*, Util::Intern::MemoryInfo>::iterator it(_pool.find(address));
337 if (it != _pool.end())
338 {
339 return it->second.size;
340 }
341 else
342 XABORTM("MemoryPool::allocated_size: Memory address not found!");
343 }
344 };
345
346} // namespace FEAT
#define XABORTM(msg)
Abortion macro definition with custom message.
Definition: assertion.hpp:192
#define XASSERT(expr)
Assertion macro definition.
Definition: assertion.hpp:262
FEAT Kernel base header.
static PreferredBackend get_preferred_backend()
get current preferred backend
Definition: backend.cpp:27
Memory management.
Definition: memory_pool.hpp:54
static void copy(DT_ *dest, const DT_ *src, const Index count)
Copy memory area from src to dest.
static void convert(DT_ *dest, const DT_ *src, const Index count)
Copy memory area from src to dest.
static const DT_ & get_element(const DT_ *data, const Index index)
receive element
static void initialize()
Setup memory pools.
Definition: memory_pool.hpp:62
static std::map< void *, Util::Intern::MemoryInfo > _pool
Map of all memory chunks in use.
Definition: memory_pool.hpp:57
static void convert(DT1_ *dest, const DT2_ *src, const Index count)
Convert datatype DT2_ from src into DT1_ in dest.
static void download(DT_ *dest, const DT_ *const src, const Index count)
download memory chunk to host memory
static void set_memory(DT_ *address, const DT_ val, const Index count=1)
set memory to specific value
static void upload(DT_ *dest, const DT_ *const src, const Index count)
upload memory chunk from host memory to device memory
static void set_memory(Random &rng, DT_ min, DT_ max, DT_ *address, const Index count=1)
set memory to specific value
static void finalize()
Shutdown memory pool and clean up allocated memory pools.
Definition: memory_pool.hpp:67
static DT_ * allocate_memory(Index count)
allocate new memory
Definition: memory_pool.hpp:82
static void release_memory(void *address)
release memory or decrease reference counter
static void increase_memory(void *address)
increase memory counter
Pseudo-Random Number Generator.
Definition: random.hpp:54
FEAT namespace.
Definition: adjactor.hpp:12
std::uint64_t Index
Index data type.