12#include <kernel/util/cuda_util.hpp> 
   13#include <kernel/util/random.hpp> 
   14#include <kernel/backend.hpp> 
   56        static std::map<void*, Util::Intern::MemoryInfo> 
_pool;
 
   70            std::cout << stderr << 
" Error: MemoryPool still contains memory chunks on deconstructor call\n";
 
   80        template <
typename DT_>
 
   83          DT_ * memory(
nullptr);
 
   89            count = count + (4ul - count%4);
 
   92          memory = (DT_*)Util::cuda_malloc_managed(count * 
sizeof(DT_));
 
   94          memory = (DT_*)::malloc(count * 
sizeof(DT_));
 
   96          if (memory == 
nullptr)
 
   97            XABORTM(
"MemoryPool allocation error!");
 
   99          Util::Intern::MemoryInfo mi;
 
  101          mi.size = count * 
sizeof(DT_);
 
  102          _pool.insert(std::pair<void*, Util::Intern::MemoryInfo>(memory, mi));
 
  112          std::map<void*, Util::Intern::MemoryInfo>::iterator it(
_pool.find(address));
 
  113          if (it != 
_pool.end())
 
  115            it->second.counter = it->second.counter + 1;
 
  119          XABORTM(
"MemoryPool::increase_memory: Memory address not found!");
 
  125          if (address == 
nullptr)
 
  128          std::map<void*, Util::Intern::MemoryInfo>::iterator it(
_pool.find(address));
 
  129          if (it != 
_pool.end())
 
  131            if(it->second.counter == 1)
 
  134              Util::cuda_free(address);
 
  142              it->second.counter = it->second.counter - 1;
 
  147          XABORTM(
"MemoryPool::release_memory: Memory address not found!");
 
  151        template <
typename DT_>
 
  152        [[deprecated(
"no download necessary in unified memory environment.")]]
 
  153        inline static void download(DT_ * dest, 
const DT_ * 
const src, 
const Index count)
 
  158          ::memcpy(dest, src, count * 
sizeof(DT_));
 
  162        template <
typename DT_>
 
  163        [[deprecated(
"no upload necessary in unified memory environment.")]]
 
  164        inline static void upload(DT_ * dest, 
const DT_ * 
const src, 
const Index count)
 
  169          ::memcpy(dest, src, count * 
sizeof(DT_));
 
  173        template <
typename DT_>
 
  174        [[deprecated(
"no get_element necessary in unified memory environment.")]]
 
  181        template <
typename DT_>
 
  189              FEAT::Util::cuda_set_memory(address, val, count);
 
  196              FEAT_PRAGMA_OMP(parallel 
for)
 
  197              for (
Index i = 0 ; i < count ; ++i)
 
  207        template <
typename DT_>
 
  215              std::vector<DT_> tmp(count);
 
  216              std::generate(tmp.begin(), tmp.end(), [&](){return rng(min,max);});
 
  217              FEAT::Util::cuda_copy(address, tmp.data(), count*
sizeof(DT_));
 
  225              for (
Index i(0) ; i < count ; ++i)
 
  227                address[i] = rng(min, max);
 
  235        template <
typename DT_>
 
  236        static void copy(DT_ * dest, 
const DT_ * src, 
const Index count)
 
  246              FEAT::Util::cuda_copy(dest, src, count * 
sizeof(DT_));
 
  254              FEAT_PRAGMA_OMP(parallel 
for)
 
  255              for (
Index i = 0 ; i < count ; ++i)
 
  260              ::memcpy(dest, src, count * 
sizeof(DT_));
 
  268        template <
typename DT_>
 
  275          FEAT_PRAGMA_OMP(parallel 
for)
 
  276          for (
Index i = 0 ; i < count ; ++i)
 
  281          ::memcpy(dest, src, count * 
sizeof(DT_));
 
  286        template <
typename DT1_, 
typename DT2_>
 
  295              FEAT::Util::cuda_convert(dest, src, count);
 
  302              FEAT_PRAGMA_OMP(parallel 
for)
 
  303              for (
Index i = 0 ; i < count ; ++i)
 
  305                dest[i] = DT1_(src[i]);
 
  312        NOINLINE 
static void synchronize()
 
  317            FEAT::Util::cuda_synchronize();
 
  321        static Index allocated_memory()
 
  324          for (
auto& i : 
_pool)
 
  326            bytes += i.second.size;
 
  331        static Index allocated_size(
void * address)
 
  333          std::map<void*, Util::Intern::MemoryInfo>::iterator it(
_pool.find(address));
 
  334          if (it != 
_pool.end())
 
  336            return it->second.size;
 
  339            XABORTM(
"MemoryPool::allocated_size: Memory address not found!");
 
#define XABORTM(msg)
Abortion macro definition with custom message.
#define XASSERT(expr)
Assertion macro definition.
static PreferredBackend get_preferred_backend()
get current preferred backend
static void copy(DT_ *dest, const DT_ *src, const Index count)
Copy memory area from src to dest.
static void convert(DT_ *dest, const DT_ *src, const Index count)
Copy memory area from src to dest.
static const DT_ & get_element(const DT_ *data, const Index index)
receive element
static void initialize()
Setup memory pools.
static std::map< void *, Util::Intern::MemoryInfo > _pool
Map of all memory chunks in use.
static void convert(DT1_ *dest, const DT2_ *src, const Index count)
Convert datatype DT2_ from src into DT1_ in dest.
static void download(DT_ *dest, const DT_ *const src, const Index count)
download memory chunk to host memory
static void set_memory(DT_ *address, const DT_ val, const Index count=1)
set memory to specific value
static void upload(DT_ *dest, const DT_ *const src, const Index count)
upload memory chunk from host memory to device memory
static void set_memory(Random &rng, DT_ min, DT_ max, DT_ *address, const Index count=1)
set memory to specific value
static void finalize()
Shutdown memory pool and clean up allocated memory pools.
static DT_ * allocate_memory(Index count)
allocate new memory
static void release_memory(void *address)
release memory or decrease reference counter
static void increase_memory(void *address)
increase memory counter
Pseudo-Random Number Generator.
std::uint64_t Index
Index data type.