27 #ifndef MPBLOCKS_CUDANN_KERNELS_SE3_CU_HPP_
28 #define MPBLOCKS_CUDANN_KERNELS_SE3_CU_HPP_
38 namespace linalg = cuda::linalg2;
41 template<
typename Format_t,
unsigned int NDim>
48 unsigned int pitchOut,
52 using namespace linalg;
59 int idx = blockId * N + threadId;
75 set<0>(t0) = q.
data[0];
76 set<1>(t0) = q.
data[1];
77 set<2>(t0) = q.
data[2];
78 set<0>(q0) = q.
data[3];
79 set<1>(q0) = q.
data[4];
80 set<2>(q0) = q.
data[5];
81 set<3>(q0) = q.
data[6];
83 set<0>(t1) = g_in[0*pitchIn + idx];
85 set<1>(t1) = g_in[1*pitchIn + idx];
87 set<2>(t1) = g_in[2*pitchIn + idx];
89 set<0>(q1) = g_in[3*pitchIn + idx];
91 set<1>(q1) = g_in[4*pitchIn + idx];
93 set<2>(q1) = g_in[5*pitchIn + idx];
95 set<3>(q1) = g_in[6*pitchIn + idx];
102 g_out[0*pitchOut + idx] = d;
104 g_out[1*pitchOut + idx] = idx;
110 template<
typename Format_t,
unsigned int NDim>
115 unsigned int pitchIn,
117 unsigned int pitchOut,
121 using namespace linalg;
128 int idx = blockId * N + threadId;
144 set<0>(t0) = q.
data[0];
145 set<1>(t0) = q.
data[1];
146 set<2>(t0) = q.
data[2];
147 set<0>(q0) = q.
data[3];
148 set<1>(q0) = q.
data[4];
149 set<2>(q0) = q.
data[5];
150 set<3>(q0) = q.
data[6];
152 set<0>(t1) = g_in[0*pitchIn + idx];
154 set<1>(t1) = g_in[1*pitchIn + idx];
156 set<2>(t1) = g_in[2*pitchIn + idx];
158 set<0>(q1) = g_in[3*pitchIn + idx];
160 set<1>(q1) = g_in[4*pitchIn + idx];
162 set<2>(q1) = g_in[5*pitchIn + idx];
164 set<3>(q1) = g_in[6*pitchIn + idx];
169 Format_t arg = 2*dq*dq - 1;
170 arg = fmaxf(-0.999999999999999999999999999f,
171 fminf(arg, 0.9999999999999999999999999f));
175 g_out[0*pitchOut + idx] = d;
177 g_out[1*pitchOut + idx] = idx;
__device__ __host__ Scalar norm_squared(const RValue< Scalar, ROWS, COLS, Exp > &M)
compute the norm
__global__ void se3_distance(Format_t weight, QueryPoint< Format_t, NDim > query, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
weighted distance between elements of se3, points are interpreted as 3 values of position and 4 value...
__global__ void se3_pseudo_distance(Format_t weight, QueryPoint< Format_t, NDim > query, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
weighted distance between elements of se3, points are interpreted as 3 values of position and 4 value...
__device__ __host__ Scalar dot(const RValue< Scalar, ROWS, 1, ExpA > &A, const RValue< Scalar, ROWS, 1, ExpB > &B)
compute the DOT