cheshirekow  v0.1.0
kernels.cu.hpp File Reference
#include <cassert>
#include <algorithm>

Go to the source code of this file.

Namespaces

 mpblocks
 
 mpblocks::cuda
 
 mpblocks::cuda::bitonic
 

Macros

#define UMAD(a, b, c)   ( UMUL((a), (b)) + (c) )
 
#define UMUL(a, b)   __umul24((a), (b))
 

Typedefs

typedef unsigned int mpblocks::cuda::bitonic::uint_t
 

Functions

template<typename KeyType , typename ValueType >
__device__ void mpblocks::cuda::bitonic::compareSwap (KeyType &keyA, ValueType &valA, KeyType &keyB, ValueType &valB, Direction dir)
 implements a "comparator": compares to keys and swaps them if they are not in the desired order More...
 
template<typename KeyType >
__device__ void mpblocks::cuda::bitonic::compareSwap (KeyType &keyA, KeyType &keyB, Direction dir)
 compares to keys and swaps them if they are not in the desired order More...
 
template<typename KeyType , typename ValueType >
__global__ void mpblocks::cuda::bitonic::mergeGlobal (KeyType *d_DstKey, ValueType *d_DstVal, KeyType *d_SrcKey, ValueType *d_SrcVal, uint_t arrayLength, uint_t size, uint_t stride, Direction dir)
 sorts a bitonic series, this kernel is for a stride >= SHARED_SIZE_LIMIT More...
 
template<typename KeyType >
__global__ void mpblocks::cuda::bitonic::mergeGlobal (KeyType *d_DstKey, KeyType *d_SrcKey, uint_t arrayLength, uint_t size, uint_t stride, Direction dir)
 sorts a bitonic series, this kernel is for a stride >= SHARED_SIZE_LIMIT More...
 
template<typename KeyType , typename ValueType >
__global__ void mpblocks::cuda::bitonic::mergeShared (KeyType *d_DstKey, ValueType *d_DstVal, KeyType *d_SrcKey, ValueType *d_SrcVal, uint_t arrayLength, uint_t sharedLength, uint_t size, Direction dir)
 sorts a bitonic series, this kernel is for size > SHARED_SIZE_LIMIT and for a stride in [1, SHARED_SIZE_LIMIT/2] More...
 
template<typename KeyType >
__global__ void mpblocks::cuda::bitonic::mergeShared (KeyType *d_DstKey, KeyType *d_SrcKey, uint_t arrayLength, uint_t sharedLength, uint_t size, Direction dir)
 sorts a bitonic series, this kernel is for size > SHARED_SIZE_LIMIT and for a stride in [1, SHARED_SIZE_LIMIT/2] More...
 
template<typename KeyType >
__global__ void mpblocks::cuda::bitonic::prepare (KeyType *d_SrcKey, KeyType init, uint_t arrayLength)
 used when arrayLength is not a power of two, it writes to all values of d_SrcKey (which is an offset from of the actual source buffer) More...
 
template<typename KeyType , typename ValueType >
uint_t mpblocks::cuda::bitonic::sort (KeyType *d_DstKey, ValueType *d_DstVal, KeyType *d_SrcKey, ValueType *d_SrcVal, uint_t arrayLength, uint_t sharedLength, Direction dir, uint_t globalThread)
 kernel launcher, sorts an array of key/value pairs using the bitonic sort algorithm More...
 
template<typename KeyType >
uint_t mpblocks::cuda::bitonic::sort (KeyType *d_DstKey, KeyType *d_SrcKey, uint_t arrayLength, uint_t sharedLength, Direction dir, uint_t globalThread)
 kernel launcher, sorts an array of key/value pairs using the bitonic sort algorithm More...
 
template<typename KeyType , typename ValueType >
__global__ void mpblocks::cuda::bitonic::sortShared (KeyType *d_DstKey, ValueType *d_DstVal, KeyType *d_SrcKey, ValueType *d_SrcVal, uint_t arrayLength, Direction dir)
 single kernel (unified) bitonic sort More...
 
template<typename KeyType >
__global__ void mpblocks::cuda::bitonic::sortShared (KeyType *d_DstKey, KeyType *d_SrcKey, uint_t arrayLength, Direction dir)
 single kernel (unified) bitonic sort More...
 
template<typename KeyType , typename ValueType >
__global__ void mpblocks::cuda::bitonic::sortSharedInc (KeyType *d_DstKey, ValueType *d_DstVal, KeyType *d_SrcKey, ValueType *d_SrcVal, uint_t sharedLength)
 bottom level of the bitonic sort More...
 
template<typename KeyType >
__global__ void mpblocks::cuda::bitonic::sortSharedInc (KeyType *d_DstKey, KeyType *d_SrcKey, uint_t sharedLength)
 bottom level of the bitonic sort More...
 

Macro Definition Documentation

#define UMAD (   a,
  b,
 
)    ( UMUL((a), (b)) + (c) )

Definition at line 31 of file kernels.cu.hpp.

#define UMUL (   a,
 
)    __umul24((a), (b))

Definition at line 30 of file kernels.cu.hpp.