FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
transpose.cu
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6// includes, FEAT
7#include <kernel/base_header.hpp>
8#include <kernel/lafem/arch/transpose.hpp>
9#include <kernel/util/exception.hpp>
10#include <kernel/util/memory_pool.hpp>
11
12using namespace FEAT;
13using namespace FEAT::LAFEM;
14using namespace FEAT::LAFEM::Arch;
15
16void Transpose::value_cuda(float * r, const float * const x, Index rows_x, Index columns_x)
17{
18 cublasStatus_t status;
19 float one(1);
20 float zero(0);
21 float* temp = nullptr;
22
23 if (r == x)
24 {
25 temp = (float*)Util::cuda_malloc(rows_x * columns_x * sizeof(float));
26 Util::cuda_copy_device_to_device(temp, x, rows_x * columns_x * sizeof(float));
27 status = cublasSgeam(Util::Intern::cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, int(rows_x), int(columns_x), &one, temp, int(columns_x), &zero, nullptr, int(columns_x), r, int(rows_x));
28 }
29 else
30 {
31 status = cublasSgeam(Util::Intern::cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, int(rows_x), int(columns_x), &one, x, int(columns_x), &zero, nullptr, int(columns_x), r, int(rows_x));
32 }
33 if (status != CUBLAS_STATUS_SUCCESS)
34 throw InternalError(__func__, __FILE__, __LINE__, "cuda error: " + stringify(cublasGetStatusString(status)));
35
36
37 cudaDeviceSynchronize();
38#ifdef FEAT_DEBUG_MODE
39 cudaError_t last_error(cudaGetLastError());
40 if (cudaSuccess != last_error)
41 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
42#endif
43 //free ptr
44 Util::cuda_free(temp);
45}
46
47void Transpose::value_cuda(double * r, const double * const x, Index rows_x, Index columns_x)
48{
49 cublasStatus_t status;
50 double one(1);
51 double zero(0);
52 double *temp = nullptr;
53
54 if (r == x)
55 {
56 temp = (double*)Util::cuda_malloc(rows_x * columns_x * sizeof(double));
57 Util::cuda_copy(temp, x, rows_x * columns_x * sizeof(double));
58 status = cublasDgeam(Util::Intern::cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, int(rows_x), int(columns_x), &one, temp, int(columns_x), &zero, nullptr, int(columns_x), r, int(rows_x));
59 }
60 else
61 {
62 status = cublasDgeam(Util::Intern::cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, int(rows_x), int(columns_x), &one, x, int(columns_x), &zero, nullptr, int(columns_x), r, int(rows_x));
63 }
64
65 if (status != CUBLAS_STATUS_SUCCESS)
66 throw InternalError(__func__, __FILE__, __LINE__, "cuda error: " + stringify(cublasGetStatusString(status)));
67
68 cudaDeviceSynchronize();
69#ifdef FEAT_DEBUG_MODE
70 cudaError_t last_error(cudaGetLastError());
71 if (cudaSuccess != last_error)
72 throw InternalError(__func__, __FILE__, __LINE__, "CUDA error occurred in execution!\n" + stringify(cudaGetErrorString(last_error)));
73#endif
74 Util::cuda_free(temp);
75}