1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
7#include <kernel/base_header.hpp>
8#include <kernel/lafem/arch/transpose.hpp>
9#include <kernel/util/exception.hpp>
10#include <kernel/util/memory_pool.hpp>
13using namespace FEAT::LAFEM;
14using namespace FEAT::LAFEM::Arch;
16void Transpose::value_cuda(float * r, const float * const x, Index rows_x, Index columns_x)
18 cublasStatus_t status;
21 float* temp = nullptr;
25 temp = (float*)Util::cuda_malloc(rows_x * columns_x * sizeof(float));
26 Util::cuda_copy_device_to_device(temp, x, rows_x * columns_x * sizeof(float));
27 status = cublasSgeam(Util::Intern::cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, int(rows_x), int(columns_x), &one, temp, int(columns_x), &zero, nullptr, int(columns_x), r, int(rows_x));
31 status = cublasSgeam(Util::Intern::cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, int(rows_x), int(columns_x), &one, x, int(columns_x), &zero, nullptr, int(columns_x), r, int(rows_x));
33 if (status != CUBLAS_STATUS_SUCCESS)
34 throw InternalError(__func__, __FILE__, __LINE__, "cuda error: " + stringify(cublasGetStatusString(status)));
37 cudaDeviceSynchronize();
39 cudaError_t last_error(cudaGetLastError());
40 if (cudaSuccess != last_error)
41 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
44 Util::cuda_free(temp);
47void Transpose::value_cuda(double * r, const double * const x, Index rows_x, Index columns_x)
49 cublasStatus_t status;
52 double *temp = nullptr;
56 temp = (double*)Util::cuda_malloc(rows_x * columns_x * sizeof(double));
57 Util::cuda_copy(temp, x, rows_x * columns_x * sizeof(double));
58 status = cublasDgeam(Util::Intern::cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, int(rows_x), int(columns_x), &one, temp, int(columns_x), &zero, nullptr, int(columns_x), r, int(rows_x));
62 status = cublasDgeam(Util::Intern::cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, int(rows_x), int(columns_x), &one, x, int(columns_x), &zero, nullptr, int(columns_x), r, int(rows_x));
65 if (status != CUBLAS_STATUS_SUCCESS)
66 throw InternalError(__func__, __FILE__, __LINE__, "cuda error: " + stringify(cublasGetStatusString(status)));
68 cudaDeviceSynchronize();
70 cudaError_t last_error(cudaGetLastError());
71 if (cudaSuccess != last_error)
72 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
74 Util::cuda_free(temp);