cheshirekow  v0.1.0
rect_dist.cu.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2012 Josh Bialkowski (jbialk@mit.edu)
3  *
4  * This file is part of mpblocks.
5  *
6  * mpblocks is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * mpblocks is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with mpblocks. If not, see <http://www.gnu.org/licenses/>.
18  */
27 #ifndef MPBLOCKS_CUDA_NN_RECT_DIST_CU_HPP_
28 #define MPBLOCKS_CUDA_NN_RECT_DIST_CU_HPP_
29 
30 #include <mpblocks/cuda.hpp>
34 
35 namespace mpblocks {
36 namespace cudaNN {
37 
38 template< bool Pseudo, typename Scalar, unsigned int NDim >
40 {
41  g_out = cuda::mallocT<Scalar>(16);
42 }
43 
44 template< bool Pseudo, typename Scalar, unsigned int NDim >
46 {
47  cuda::free(g_out);
48 }
49 
50 template< bool Pseudo, typename Scalar, unsigned int NDim >
53  Scalar* h_out
54  )
55 {
56  int threads = 192;
57  int blocks = 16;
58 
59  kernels::so3_distance<Pseudo,Scalar,NDim><<<blocks,threads>>>(query,g_out);
61  cuda::memcpyT<float>(h_out,g_out,16,cudaMemcpyDeviceToHost);
62 }
63 
64 
65 template< bool Pseudo, typename Scalar, unsigned int NDim >
67 {
68  g_out = cuda::mallocT<Scalar>(16);
69 }
70 
71 template< bool Pseudo, typename Scalar, unsigned int NDim >
73 {
74  cuda::free(g_out);
75 }
76 
77 template< bool Pseudo, typename Scalar, unsigned int NDim >
80  Scalar* h_out
81  )
82 {
83  int threads = NDim;
84  int blocks = 0x01 << NDim;
85 
86  kernels::euclidean_distance<Pseudo,Scalar,NDim><<<blocks,threads>>>(query,g_out);
88  cuda::memcpyT<float>(h_out,g_out,blocks,cudaMemcpyDeviceToHost);
89 }
90 
91 
92 
93 } // cudaNN
94 } // mpblocks
95 
96 
97 #endif // RECT_DIST_H_
void deviceSynchronize()
blocks the host thread until kernels are done executing
void free(void *devPtr)
wraps cudaFree
void operator()(RectangleQuery< Scalar, NDim > query, Scalar *h_out)
void operator()(RectangleQuery< Scalar, NDim > query, Scalar *h_out)