1// FEAT3: Finite Element Analysis Toolbox, Version 3
 
    2// Copyright (C) 2010 by Stefan Turek & the FEAT group
 
    3// FEAT3 is released under the GNU General Public License version 3,
 
    4// see the file 'copyright.txt' in the top level directory for details.
 
    7#include <kernel/base_header.hpp>
 
    8#include <kernel/lafem/arch/transpose.hpp>
 
    9#include <kernel/util/exception.hpp>
 
   10#include <kernel/util/memory_pool.hpp>
 
   13using namespace FEAT::LAFEM;
 
   14using namespace FEAT::LAFEM::Arch;
 
   16void Transpose::value_cuda(float * r, const float * const x, Index rows_x, Index columns_x)
 
   18  cublasStatus_t status;
 
   21  float* temp = nullptr;
 
   25    temp = (float*)Util::cuda_malloc(rows_x * columns_x * sizeof(float));
 
   26    Util::cuda_copy_device_to_device(temp, x, rows_x * columns_x * sizeof(float));
 
   27    status = cublasSgeam(Util::Intern::cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, int(rows_x), int(columns_x), &one, temp, int(columns_x), &zero, nullptr, int(columns_x), r, int(rows_x));
 
   31    status = cublasSgeam(Util::Intern::cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, int(rows_x), int(columns_x), &one, x, int(columns_x), &zero, nullptr, int(columns_x), r, int(rows_x));
 
   33  if (status != CUBLAS_STATUS_SUCCESS)
 
   34    throw InternalError(__func__, __FILE__, __LINE__, "cuda error: " + stringify(cublasGetStatusString(status)));
 
   37  cudaDeviceSynchronize();
 
   39  cudaError_t last_error(cudaGetLastError());
 
   40  if (cudaSuccess != last_error)
 
   41    throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
 
   44  Util::cuda_free(temp);
 
   47void Transpose::value_cuda(double * r, const double * const x, Index rows_x, Index columns_x)
 
   49  cublasStatus_t status;
 
   52  double *temp = nullptr;
 
   56    temp = (double*)Util::cuda_malloc(rows_x * columns_x * sizeof(double));
 
   57    Util::cuda_copy(temp, x, rows_x * columns_x * sizeof(double));
 
   58    status = cublasDgeam(Util::Intern::cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, int(rows_x), int(columns_x), &one, temp, int(columns_x), &zero, nullptr, int(columns_x), r, int(rows_x));
 
   62    status = cublasDgeam(Util::Intern::cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, int(rows_x), int(columns_x), &one, x, int(columns_x), &zero, nullptr, int(columns_x), r, int(rows_x));
 
   65  if (status != CUBLAS_STATUS_SUCCESS)
 
   66    throw InternalError(__func__, __FILE__, __LINE__, "cuda error: " + stringify(cublasGetStatusString(status)));
 
   68  cudaDeviceSynchronize();
 
   70  cudaError_t last_error(cudaGetLastError());
 
   71  if (cudaSuccess != last_error)
 
   72    throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
 
   74  Util::cuda_free(temp);