cfitsTK
|
This structure holds the GPU computation setup for matrix multiplication. More...
#include <cudacomp.h>
Data Fields | |
int_fast8_t | init |
int_fast8_t * | refWFSinit |
int_fast8_t | alloc |
long | CM_ID |
long | CM_cnt |
long | timerID |
uint_fast32_t | M |
uint_fast32_t | N |
int_fast8_t | sem |
synchronization More... | |
int_fast8_t | gpuinit |
sem_t ** | semptr1 |
one semaphore per thread More... | |
sem_t ** | semptr2 |
sem_t ** | semptr3 |
sem_t ** | semptr4 |
sem_t ** | semptr5 |
float * | cMat |
float ** | cMat_part |
float * | wfsVec |
float ** | wfsVec_part |
float * | wfsRef |
float ** | wfsRef_part |
float * | dmVec |
float * | dmVecTMP |
float ** | dmVec_part |
float ** | dmRef_part |
float ** | d_cMat |
float ** | d_wfsVec |
float ** | d_dmVec |
float ** | d_wfsRef |
float ** | d_dmRef |
THDATA * | thdata |
int * | iret |
pthread_t * | threadarray |
int_fast8_t | NBstreams |
cudaStream_t * | stream |
cublasHandle_t * | handle |
uint_fast32_t * | Nsize |
uint_fast32_t * | Noffset |
int * | GPUdevice |
int_fast8_t | orientation |
long | IDout |
This structure holds the GPU computation setup for matrix multiplication.
By declaring an array of these structures, several parallel computations can be executed
int_fast8_t GPUMATMULTCONF::alloc |
1 if memory has been allocated
long GPUMATMULTCONF::CM_cnt |
long GPUMATMULTCONF::CM_ID |
float * GPUMATMULTCONF::cMat |
float ** GPUMATMULTCONF::cMat_part |
float ** GPUMATMULTCONF::d_cMat |
float ** GPUMATMULTCONF::d_dmRef |
float ** GPUMATMULTCONF::d_dmVec |
float ** GPUMATMULTCONF::d_wfsRef |
float ** GPUMATMULTCONF::d_wfsVec |
float ** GPUMATMULTCONF::dmRef_part |
float * GPUMATMULTCONF::dmVec |
float ** GPUMATMULTCONF::dmVec_part |
float * GPUMATMULTCONF::dmVecTMP |
int * GPUMATMULTCONF::GPUdevice |
int_fast8_t GPUMATMULTCONF::gpuinit |
cublasHandle_t * GPUMATMULTCONF::handle |
long GPUMATMULTCONF::IDout |
int_fast8_t GPUMATMULTCONF::init |
1 if initialized
int * GPUMATMULTCONF::iret |
uint_fast32_t GPUMATMULTCONF::M |
uint_fast32_t GPUMATMULTCONF::N |
int_fast8_t GPUMATMULTCONF::NBstreams |
uint_fast32_t * GPUMATMULTCONF::Noffset |
uint_fast32_t * GPUMATMULTCONF::Nsize |
int_fast8_t GPUMATMULTCONF::orientation |
int_fast8_t * GPUMATMULTCONF::refWFSinit |
reference init
int_fast8_t GPUMATMULTCONF::sem |
synchronization
if sem = 1, wait for semaphore to perform computation
sem_t ** GPUMATMULTCONF::semptr1 |
one semaphore per thread
command to start matrix multiplication (input)
sem_t ** GPUMATMULTCONF::semptr2 |
memory transfer to device completed (output)
sem_t ** GPUMATMULTCONF::semptr3 |
computation done (output)
sem_t ** GPUMATMULTCONF::semptr4 |
command to start transfer to host (input)
sem_t ** GPUMATMULTCONF::semptr5 |
output transfer to host completed (output)
cudaStream_t * GPUMATMULTCONF::stream |
THDATA * GPUMATMULTCONF::thdata |
pthread_t * GPUMATMULTCONF::threadarray |
long GPUMATMULTCONF::timerID |
float * GPUMATMULTCONF::wfsRef |
float ** GPUMATMULTCONF::wfsRef_part |
float * GPUMATMULTCONF::wfsVec |
float ** GPUMATMULTCONF::wfsVec_part |