27 #ifndef MPBLOCKS_CUDA_NN_RECT_DIST_CU_HPP_
28 #define MPBLOCKS_CUDA_NN_RECT_DIST_CU_HPP_
38 template<
bool Pseudo,
typename Scalar,
unsigned int NDim >
41 g_out = cuda::mallocT<Scalar>(16);
44 template<
bool Pseudo,
typename Scalar,
unsigned int NDim >
50 template<
bool Pseudo,
typename Scalar,
unsigned int NDim >
59 kernels::so3_distance<Pseudo,Scalar,NDim><<<blocks,threads>>>(query,g_out);
61 cuda::memcpyT<float>(h_out,g_out,16,cudaMemcpyDeviceToHost);
65 template<
bool Pseudo,
typename Scalar,
unsigned int NDim >
68 g_out = cuda::mallocT<Scalar>(16);
71 template<
bool Pseudo,
typename Scalar,
unsigned int NDim >
77 template<
bool Pseudo,
typename Scalar,
unsigned int NDim >
84 int blocks = 0x01 << NDim;
86 kernels::euclidean_distance<Pseudo,Scalar,NDim><<<blocks,threads>>>(query,g_out);
88 cuda::memcpyT<float>(h_out,g_out,blocks,cudaMemcpyDeviceToHost);
97 #endif // RECT_DIST_H_
void deviceSynchronize()
blocks the host thread until kernels are done executing
void free(void *devPtr)
wraps cudaFree
void operator()(RectangleQuery< Scalar, NDim > query, Scalar *h_out)
void operator()(RectangleQuery< Scalar, NDim > query, Scalar *h_out)