cheshirekow  v0.1.0
r2s1.cu.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2012 Josh Bialkowski (jbialk@mit.edu)
3  *
4  * This file is part of openbook.
5  *
6  * openbook is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * openbook is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with openbook. If not, see <http://www.gnu.org/licenses/>.
18  */
27 #ifndef MPBLOCKS_CUDANN_KERNELS_R2S1_CU_HPP_
28 #define MPBLOCKS_CUDANN_KERNELS_R2S1_CU_HPP_
29 
30 #include <mpblocks/cuda/linalg2.h>
33 
34 namespace mpblocks {
35 namespace cudaNN {
36 namespace kernels {
37 
38 namespace linalg = cuda::linalg2;
39 
40 
41 
42 
43 template< typename Format_t, unsigned int NDim>
45  Format_t weight,
47  Format_t* g_in,
48  unsigned int pitchIn,
49  Format_t* g_out,
50  unsigned int pitchOut,
51  unsigned int n
52  )
53 {
54  int threadId = threadIdx.x;
55  int blockId = blockIdx.x;
56  int N = blockDim.x;
57 
58  // which data point we work on
59  int idx = blockId * N + threadId;
60 
61  // if our idx is greater than the number of data points then we are a
62  // left-over thread so just bail
63  // @todo is this OK with non-power of
64  // two array sizes and the fact that we syncthreads after this point?
65  if( idx > n )
66  return;
67 
68  // compose the query object
70  Format_t t0,t1;
71 
72  // read in the query point q0, no synchronization between reads
73  linalg::set<0>(x0) = q.data[0];
74  linalg::set<1>(x0) = q.data[1];
75  t0 = q.data[2];
76  linalg::set<0>(x1) = g_in[idx + 0*pitchIn]; __syncthreads();
77  linalg::set<1>(x1) = g_in[idx + 1*pitchIn]; __syncthreads();
78  t1 = g_in[idx + 2*pitchIn]; __syncthreads();
79 
80  // now compute the distance for this point
81  Format_t da = __fsqrt_rn(linalg::norm_squared(x1-x0))
82  + weight*( t1 - t0 );
83  Format_t db = __fsqrt_rn(linalg::norm_squared(x1-x0))
84  + weight*( 2*M_PI - (t1 - t0) );
85  Format_t d = fminf(da,db);
86  __syncthreads();
87  g_out[0*pitchOut + idx] = d;
88  __syncthreads();
89  g_out[1*pitchOut + idx] = idx;
90 }
91 
92 
93 
94 
95 
96 } // kernels
97 } // cudaNN
98 } // mpblocks
99 
100 
101 #endif
102 
__device__ __host__ Scalar norm_squared(const RValue< Scalar, ROWS, COLS, Exp > &M)
compute the norm
Definition: Norm.h:130
int x
Definition: fakecuda.h:44
#define __global__
Definition: fakecuda.h:33
__global__ void r2s1_distance(Format_t weight, QueryPoint< Format_t, NDim > query, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
vector norm (2-norm) between two points
Definition: r2s1.cu.hpp:44
Dim3 threadIdx
Dim3 blockIdx
Dim3 blockDim
void __syncthreads()