27 #ifndef MPBLOCKS_CUDA_WRAP_H_
28 #define MPBLOCKS_CUDA_WRAP_H_
39 public cudaFuncAttributes
59 void checkResult( cudaError_t result,
const std::string& where);
65 void*
mallocPitch(
size_t& pitch,
size_t cols,
size_t rows );
74 T*
mallocPitchT(
size_t& pitch,
size_t obsPerRow,
size_t cols );
77 void free(
void* devPtr);
104 template <
typename T>
108 void memset(
void* devPtr,
int value,
size_t count );
111 template <
typename T>
112 void memset( T* devPtr,
int value,
size_t nObs );
123 template <
typename T>
150 template <
class oiter>
154 for(
int dev=0; dev < nDevices; dev++)
157 *(out++) = prop.name;
void printKernelReport(std::ostream &out, const fattrMap_t &attr)
void * malloc(size_t size)
wraps cudaMalloc
cudaMemcpyKind MemcpyKind
void deviceSynchronize()
blocks the host thread until kernels are done executing
T * mallocPitchT(size_t &pitch, size_t obsPerRow, size_t cols)
allocates nObjs objects of type T
void free(void *devPtr)
wraps cudaFree
T * mallocT(size_t nObjs)
allocates nObjs objects of type T
void checkResult(cudaError_t result, const std::string &where)
throws an exception if result is not success
void checkLastError(const std::string &msg="checkLastError")
wraps getLastError
void memcpy2DT(T *dst, size_t dpitchBytes, const T *src, size_t spitchBytes, size_t widthObs, size_t height, MemcpyKind kind)
wraps cudaMemcpy2D
void getDeviceList(oiter &out)
build a list of device names
int getDeviceCount()
wraps cudaGetDeviceCount
void memcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, MemcpyKind kind)
wraps cudaMemcpy2D
void memcpyT(T *dst, const T *src, size_t nObjs, MemcpyKind kind)
allocates nObjs objects of type T
void printDeviceReport(std::ostream &out, int dev)
print a formatted report about the specified device
void memset(void *devPtr, int value, size_t count)
wraps cudaMemset
void memset2DT(T *devPtr, size_t pitchBytes, int value, size_t widthObjs, size_t height)
wraps cudaMemset2D
void setDevice(int dev)
wraps cudaSetDevice
DeviceProp getDeviceProperties(int dev)
wraps cudaGetDeviceProperties
void memcpy(void *dst, const void *src, size_t count, MemcpyKind kind)
wraps cudaMemcpy
std::map< std::string, FuncAttributes > fattrMap_t
print a formatted report about the specified kernels
void memset2D(void *devPtr, size_t pitch, int value, size_t width, size_t height)
wraps cudaMemset2D
int getDevice()
wraps cudaGetDevice
void * mallocPitch(size_t &pitch, size_t cols, size_t rows)
wraps cudaMallocPitch