12#include <kernel/util/cuda_util.hpp>
13#include <kernel/util/random.hpp>
14#include <kernel/backend.hpp>
57 static std::map<void*, Util::Intern::MemoryInfo>
_pool;
71 std::cout << stderr <<
" Error: MemoryPool still contains memory chunks on deconstructor call\n";
81 template <
typename DT_>
84 DT_ * memory(
nullptr);
90 count = count + (4ul - count%4);
93 memory = (DT_*)Util::cuda_malloc_managed(count *
sizeof(DT_));
95 memory = (DT_*)::malloc(count *
sizeof(DT_));
97 if (memory ==
nullptr)
98 XABORTM(
"MemoryPool allocation error!");
100 Util::Intern::MemoryInfo mi;
102 mi.size = count *
sizeof(DT_);
103 _pool.insert(std::pair<void*, Util::Intern::MemoryInfo>(memory, mi));
105 std::uninitialized_fill(memory, memory + count, DT_(42));
115 std::map<void*, Util::Intern::MemoryInfo>::iterator it(
_pool.find(address));
116 if (it !=
_pool.end())
118 it->second.counter = it->second.counter + 1;
122 XABORTM(
"MemoryPool::increase_memory: Memory address not found!");
128 if (address ==
nullptr)
131 std::map<void*, Util::Intern::MemoryInfo>::iterator it(
_pool.find(address));
132 if (it !=
_pool.end())
134 if(it->second.counter == 1)
137 Util::cuda_free(address);
145 it->second.counter = it->second.counter - 1;
150 XABORTM(
"MemoryPool::release_memory: Memory address not found!");
154 template <
typename DT_>
155 [[deprecated(
"no download necessary in unified memory environment.")]]
156 inline static void download(DT_ * dest,
const DT_ *
const src,
const Index count)
161 ::memcpy(dest, src, count *
sizeof(DT_));
165 template <
typename DT_>
166 [[deprecated(
"no upload necessary in unified memory environment.")]]
167 inline static void upload(DT_ * dest,
const DT_ *
const src,
const Index count)
172 ::memcpy(dest, src, count *
sizeof(DT_));
176 template <
typename DT_>
177 [[deprecated(
"no get_element necessary in unified memory environment.")]]
184 template <
typename DT_>
192 FEAT::Util::cuda_set_memory(address, val, count);
199 FEAT_PRAGMA_OMP(parallel
for)
200 for (
Index i = 0 ; i < count ; ++i)
210 template <
typename DT_>
218 std::vector<DT_> tmp(count);
219 std::generate(tmp.begin(), tmp.end(), [&](){return rng(min,max);});
220 FEAT::Util::cuda_copy(address, tmp.data(), count*
sizeof(DT_));
228 for (
Index i(0) ; i < count ; ++i)
230 address[i] = rng(min, max);
238 template <
typename DT_>
239 static void copy(DT_ * dest,
const DT_ * src,
const Index count)
249 FEAT::Util::cuda_copy(dest, src, count *
sizeof(DT_));
257 FEAT_PRAGMA_OMP(parallel
for)
258 for (
Index i = 0 ; i < count ; ++i)
263 ::memcpy(dest, src, count *
sizeof(DT_));
271 template <
typename DT_>
278 FEAT_PRAGMA_OMP(parallel
for)
279 for (
Index i = 0 ; i < count ; ++i)
284 ::memcpy(dest, src, count *
sizeof(DT_));
289 template <
typename DT1_,
typename DT2_>
298 FEAT::Util::cuda_convert(dest, src, count);
305 FEAT_PRAGMA_OMP(parallel
for)
306 for (
Index i = 0 ; i < count ; ++i)
308 dest[i] = DT1_(src[i]);
315 NOINLINE
static void synchronize()
320 FEAT::Util::cuda_synchronize();
324 static Index allocated_memory()
327 for (
auto& i :
_pool)
329 bytes += i.second.size;
334 static Index allocated_size(
void * address)
336 std::map<void*, Util::Intern::MemoryInfo>::iterator it(
_pool.find(address));
337 if (it !=
_pool.end())
339 return it->second.size;
342 XABORTM(
"MemoryPool::allocated_size: Memory address not found!");
#define XABORTM(msg)
Abortion macro definition with custom message.
#define XASSERT(expr)
Assertion macro definition.
static PreferredBackend get_preferred_backend()
get current preferred backend
static void copy(DT_ *dest, const DT_ *src, const Index count)
Copy memory area from src to dest.
static void convert(DT_ *dest, const DT_ *src, const Index count)
Copy memory area from src to dest.
static const DT_ & get_element(const DT_ *data, const Index index)
receive element
static void initialize()
Setup memory pools.
static std::map< void *, Util::Intern::MemoryInfo > _pool
Map of all memory chunks in use.
static void convert(DT1_ *dest, const DT2_ *src, const Index count)
Convert datatype DT2_ from src into DT1_ in dest.
static void download(DT_ *dest, const DT_ *const src, const Index count)
download memory chunk to host memory
static void set_memory(DT_ *address, const DT_ val, const Index count=1)
set memory to specific value
static void upload(DT_ *dest, const DT_ *const src, const Index count)
upload memory chunk from host memory to device memory
static void set_memory(Random &rng, DT_ min, DT_ max, DT_ *address, const Index count=1)
set memory to specific value
static void finalize()
Shutdown memory pool and clean up allocated memory pools.
static DT_ * allocate_memory(Index count)
allocate new memory
static void release_memory(void *address)
release memory or decrease reference counter
static void increase_memory(void *address)
increase memory counter
Pseudo-Random Number Generator.
std::uint64_t Index
Index data type.