cheshirekow  v0.1.0
mpblocks::cuda Namespace Reference

Namespaces

 bitonic
 
 linalg
 
 linalg2
 
 polynomial
 

Classes

class  CudaException
 an exception which carries a standard error code More...
 
class  CudaExceptionStream
 used to simplify the process of generating an exception message More...
 
struct  DeviceProp
 
class  ExceptionStream
 used to simplify the process of generating an exception message More...
 
struct  FuncAttributes
 

Typedefs

typedef CudaExceptionStream cudaEx
 
typedef ExceptionStream
< std::runtime_error > 
ex
 
typedef std::map< std::string,
FuncAttributes
fattrMap_t
 print a formatted report about the specified kernels More...
 
typedef cudaMemcpyKind MemcpyKind
 

Functions

void checkLastError (const std::string &msg="checkLastError")
 wraps getLastError More...
 
void checkResult (cudaError_t result, const std::string &where)
 throws an exception if result is not success More...
 
void deviceSynchronize ()
 blocks the host thread until kernels are done executing More...
 
template<typename T >
divideBy2 (const T &x)
 returns x/2 (using bit shift), if x is odd then the result is floor(x/2) More...
 
template<typename T >
dividePow2 (T x, T y)
 returns x/y if x and y are both powers of two, otherwise the result is undefined More...
 
void free (void *devPtr)
 wraps cudaFree More...
 
template<unsigned int I, typename T >
getBit (T x)
 returns the value of the specified bit More...
 
int getDevice ()
 wraps cudaGetDevice More...
 
int getDeviceCount ()
 wraps cudaGetDeviceCount More...
 
template<class oiter >
void getDeviceList (oiter &out)
 build a list of device names More...
 
DeviceProp getDeviceProperties (int dev)
 wraps cudaGetDeviceProperties More...
 
template<typename T >
intDivideRoundUp (T x, T y)
 integer divide with round up More...
 
template<typename T >
intPow (T x, T p)
 returns x to the power of p More...
 
template<typename T >
isEven (T x)
 returns true if the number is even More...
 
template<typename T >
isOdd (T x)
 returns true if the number is odd More...
 
template<typename T >
bool isPow2 (T x)
 returns true if the parameter is an exact power of two More...
 
template<typename T >
log2 (T x)
 if x is a power of two then it returns the log of x with base 2 More...
 
void * malloc (size_t size)
 wraps cudaMalloc More...
 
void * mallocPitch (size_t &pitch, size_t cols, size_t rows)
 wraps cudaMallocPitch More...
 
template<typename T >
T * mallocPitchT (size_t &pitch, size_t obsPerRow, size_t cols)
 allocates nObjs objects of type T More...
 
template<typename T >
T * mallocT (size_t nObjs)
 allocates nObjs objects of type T More...
 
void memcpy (void *dst, const void *src, size_t count, MemcpyKind kind)
 wraps cudaMemcpy More...
 
void memcpy2D (void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, MemcpyKind kind)
 wraps cudaMemcpy2D More...
 
template<typename T >
void memcpy2DT (T *dst, size_t dpitchBytes, const T *src, size_t spitchBytes, size_t widthObs, size_t height, MemcpyKind kind)
 wraps cudaMemcpy2D More...
 
template<typename T >
void memcpyT (T *dst, const T *src, size_t nObjs, MemcpyKind kind)
 allocates nObjs objects of type T More...
 
void memset (void *devPtr, int value, size_t count)
 wraps cudaMemset More...
 
template<typename T >
void memset (T *devPtr, int value, size_t nObs)
 wraps cudaMemset More...
 
void memset2D (void *devPtr, size_t pitch, int value, size_t width, size_t height)
 wraps cudaMemset2D More...
 
template<typename T >
void memset2DT (T *devPtr, size_t pitchBytes, int value, size_t widthObjs, size_t height)
 wraps cudaMemset2D More...
 
template<typename T >
nextPow2 (T x)
 returns the smallest power of two that is not less than x More...
 
template<typename T >
prevPow2 (T x)
 returns the largest power of two that is not greater than x More...
 
void printDeviceReport (std::ostream &out, int dev)
 print a formatted report about the specified device More...
 
void printDeviceReport (std::ostream &out)
 
void printKernelReport (std::ostream &out, const fattrMap_t &attr)
 
void setDevice (int dev)
 wraps cudaSetDevice More...
 
template<typename T >
times2 (const T &x)
 returns x*2 (using bit shift) More...
 
template<typename T >
twoPow (T x)
 returns 2 to the power of x (2^x) More...
 

Typedef Documentation

Definition at line 120 of file ExceptionStream.h.

typedef ExceptionStream<std::runtime_error> mpblocks::cuda::ex

Definition at line 119 of file ExceptionStream.h.

typedef std::map<std::string,FuncAttributes> mpblocks::cuda::fattrMap_t

print a formatted report about the specified kernels

Definition at line 166 of file wrap.h.

typedef cudaMemcpyKind mpblocks::cuda::MemcpyKind

Definition at line 36 of file wrap.h.

Function Documentation

void mpblocks::cuda::checkLastError ( const std::string &  msg = "checkLastError")

wraps getLastError

void mpblocks::cuda::checkResult ( cudaError_t  result,
const std::string &  where 
)

throws an exception if result is not success

void mpblocks::cuda::deviceSynchronize ( )

blocks the host thread until kernels are done executing

void mpblocks::cuda::free ( void *  devPtr)

wraps cudaFree

int mpblocks::cuda::getDevice ( )

wraps cudaGetDevice

int mpblocks::cuda::getDeviceCount ( )

wraps cudaGetDeviceCount

template<class oiter >
void mpblocks::cuda::getDeviceList ( oiter &  out)

build a list of device names

Definition at line 151 of file wrap.h.

DeviceProp mpblocks::cuda::getDeviceProperties ( int  dev)

wraps cudaGetDeviceProperties

void* mpblocks::cuda::malloc ( size_t  size)

wraps cudaMalloc

void* mpblocks::cuda::mallocPitch ( size_t &  pitch,
size_t  cols,
size_t  rows 
)

wraps cudaMallocPitch

template<typename T >
T * mpblocks::cuda::mallocPitchT ( size_t &  pitch,
size_t  obsPerRow,
size_t  cols 
)

allocates nObjs objects of type T

Note
the pitch returned is in bytes, not in nObjs

Definition at line 49 of file wrap.hpp.

template<typename T >
T * mpblocks::cuda::mallocT ( size_t  nObjs)

allocates nObjs objects of type T

Definition at line 42 of file wrap.hpp.

void mpblocks::cuda::memcpy ( void *  dst,
const void *  src,
size_t  count,
MemcpyKind  kind 
)

wraps cudaMemcpy

void mpblocks::cuda::memcpy2D ( void *  dst,
size_t  dpitch,
const void *  src,
size_t  spitch,
size_t  width,
size_t  height,
MemcpyKind  kind 
)

wraps cudaMemcpy2D

template<typename T >
void mpblocks::cuda::memcpy2DT ( T *  dst,
size_t  dpitchBytes,
const T *  src,
size_t  spitchBytes,
size_t  widthObs,
size_t  height,
MemcpyKind  kind 
)

wraps cudaMemcpy2D

Definition at line 62 of file wrap.hpp.

template<typename T >
void mpblocks::cuda::memcpyT ( T *  dst,
const T *  src,
size_t  nObjs,
MemcpyKind  kind 
)

allocates nObjs objects of type T

Definition at line 56 of file wrap.hpp.

void mpblocks::cuda::memset ( void *  devPtr,
int  value,
size_t  count 
)

wraps cudaMemset

template<typename T >
void mpblocks::cuda::memset ( T *  devPtr,
int  value,
size_t  nObs 
)

wraps cudaMemset

Definition at line 81 of file wrap.hpp.

void mpblocks::cuda::memset2D ( void *  devPtr,
size_t  pitch,
int  value,
size_t  width,
size_t  height 
)

wraps cudaMemset2D

template<typename T >
void mpblocks::cuda::memset2DT ( T *  devPtr,
size_t  pitchBytes,
int  value,
size_t  widthObjs,
size_t  height 
)

wraps cudaMemset2D

Definition at line 88 of file wrap.hpp.

template<typename T >
T mpblocks::cuda::prevPow2 ( x)
inline

returns the largest power of two that is not greater than x

If x is a power of two, then it returns x, otherwise, it returns the next lower one

For example:

    x    prev     next
 ------ ------   ------
    0 :     0        0
    1 :     1        1
    2 :     2        2
    3 :     2        4
    4 :     4        4
    5 :     4        8
    6 :     4        8
    7 :     4        8
    8 :     8        8

Definition at line 137 of file powersOfTwo.h.

void mpblocks::cuda::printDeviceReport ( std::ostream &  out,
int  dev 
)

print a formatted report about the specified device

void mpblocks::cuda::printDeviceReport ( std::ostream &  out)
void mpblocks::cuda::printKernelReport ( std::ostream &  out,
const fattrMap_t &  attr 
)
void mpblocks::cuda::setDevice ( int  dev)

wraps cudaSetDevice