27 #ifndef MPBLOCKS_DUBINS_CURVES_CUDA2_KERNELS_CU_HPP_
28 #define MPBLOCKS_DUBINS_CURVES_CUDA2_KERNELS_CU_HPP_
33 namespace curves_cuda {
36 namespace linalg = cuda::linalg2;
41 template< SolutionId Id,
typename Format_t >
49 if( soln.
f && soln.
d < best.
d )
56 template< SolutionId Id,
typename Format_t >
64 if( soln.
f && soln.
d < best )
78 template<
typename Format_t >
95 for(
int i=0; i < 3; i++)
98 for(
int j=0; j < 2; j++)
100 const int k = i*2 + j;
102 g_out[ (off + k)*pitch +idx ] = soln.
c[i][j];
107 for(
int i=0; i < 3; i++)
111 g_out[ (off + k)*pitch + idx ] = soln.
l[i];
115 g_out[ (off + 9)*pitch + idx ] = result.
d;
117 g_out[ (off + 10)*pitch + idx ] = result.
f ? 1 : 0;
123 template<
typename Format_t >
144 for(
int i=0; i < 2; i++)
147 for(
int j=0; j < 2; j++)
149 const int k = i*2 + j;
151 g_out[ (off + k)*pitch +idx ] = soln.
c[i][j];
156 for(
int i=0; i < 2; i++)
159 for(
int j=0; j < 2; j++)
161 const int k = 4 + i*2 + j;
163 g_out[ (off + k)*pitch +idx ] = soln.
c[i][j];
168 for(
int i=0; i < 3; i++)
172 g_out[ (off + k)*pitch + idx ] = soln.
l[i];
176 g_out[ (off + 11)*pitch + idx ] = result.
d;
178 g_out[ (off + 12)*pitch + idx ] = result.
f ? 1 : 0;
192 template<
typename Format_t>
196 unsigned int pitchIn,
198 unsigned int pitchOut,
202 using namespace linalg;
209 int idx = blockId * N + threadId;
218 set<0>( q0 ) = p.
q[0];
219 set<1>( q0 ) = p.
q[1];
220 set<2>( q0 ) = p.
q[2];
225 set<0>(q1) = g_in[0*pitchIn + idx];
227 set<1>(q1) = g_in[1*pitchIn + idx];
229 set<2>(q1) = g_in[2*pitchIn + idx];
242 applySolver<LSR, Format_t>(q0,q1,r,dBest);
243 applySolver<RSR, Format_t>(q0,q1,r,dBest);
244 applySolver<RSL, Format_t>(q0,q1,r,dBest);
245 applySolver<RLRa,Format_t>(q0,q1,r,dBest);
246 applySolver<RLRb,Format_t>(q0,q1,r,dBest);
247 applySolver<LRLa,Format_t>(q0,q1,r,dBest);
248 applySolver<LRLb,Format_t>(q0,q1,r,dBest);
251 g_out[0*pitchOut + idx] = dBest;
264 template<
typename Format_t >
268 unsigned int pitchIn,
270 unsigned int pitchOut,
274 using namespace cuda::linalg2;
281 int idx = blockId * N + threadId;
290 set<0>( q0 ) = p.
q[0];
291 set<1>( q0 ) = p.
q[1];
292 set<2>( q0 ) = p.
q[2];
297 set<0>(q1) = g_in[0*pitchIn + idx];
299 set<1>(q1) = g_in[1*pitchIn + idx];
301 set<2>(q1) = g_in[2*pitchIn + idx];
314 applySolver<LSR, Format_t>(q1,q0,r,dBest);
315 applySolver<RSR, Format_t>(q1,q0,r,dBest);
316 applySolver<RSL, Format_t>(q1,q0,r,dBest);
317 applySolver<RLRa,Format_t>(q1,q0,r,dBest);
318 applySolver<RLRb,Format_t>(q1,q0,r,dBest);
319 applySolver<LRLa,Format_t>(q1,q0,r,dBest);
320 applySolver<LRLb,Format_t>(q1,q0,r,dBest);
323 g_out[0*pitchOut + idx] = dBest;
336 template<
typename Format_t>
340 unsigned int pitchIn,
342 unsigned int pitchOut,
346 using namespace cuda::linalg2;
353 int idx = blockId * N + threadId;
362 set<0>( q0 ) = p.
q[0];
363 set<1>( q0 ) = p.
q[1];
364 set<2>( q0 ) = p.
q[2];
369 set<0>(q1) = g_in[0*pitchIn + idx];
371 set<1>(q1) = g_in[1*pitchIn + idx];
373 set<2>(q1) = g_in[2*pitchIn + idx];
384 applySolver<LSR, Format_t>(q0,q1,r,dBest);
385 applySolver<RSR, Format_t>(q0,q1,r,dBest);
386 applySolver<RSL, Format_t>(q0,q1,r,dBest);
387 applySolver<RLRa,Format_t>(q0,q1,r,dBest);
388 applySolver<RLRb,Format_t>(q0,q1,r,dBest);
389 applySolver<LRLa,Format_t>(q0,q1,r,dBest);
390 applySolver<LRLb,Format_t>(q0,q1,r,dBest);
393 Unsigned pack = (idx << 4 ) | dBest.
id;
394 Unsigned* out = reinterpret_cast<Unsigned*>(g_out + pitchOut);
397 g_out[idx] = dBest.
d;
412 template<
typename Format_t >
416 unsigned int pitchIn,
418 unsigned int pitchOut,
422 using namespace cuda::linalg2;
429 int idx = blockId * N + threadId;
438 set<0>( q0 ) = p.
q[0];
439 set<1>( q0 ) = p.
q[1];
440 set<2>( q0 ) = p.
q[2];
445 set<0>(q1) = g_in[0*pitchIn + idx];
447 set<1>(q1) = g_in[1*pitchIn + idx];
449 set<2>(q1) = g_in[2*pitchIn + idx];
460 applySolver<LSR, Format_t>(q1,q0,r,dBest);
461 applySolver<RSR, Format_t>(q1,q0,r,dBest);
462 applySolver<RSL, Format_t>(q1,q0,r,dBest);
463 applySolver<RLRa,Format_t>(q1,q0,r,dBest);
464 applySolver<RLRb,Format_t>(q1,q0,r,dBest);
465 applySolver<LRLa,Format_t>(q1,q0,r,dBest);
466 applySolver<LRLb,Format_t>(q1,q0,r,dBest);
469 Unsigned pack = (idx << 4 ) | dBest.
id;
470 Unsigned* out = reinterpret_cast<Unsigned*>(g_out + pitchOut);
473 g_out[idx] = dBest.
d;
488 template<
typename Format_t>
492 unsigned int pitchIn,
494 unsigned int pitchOut,
498 using namespace cuda::linalg2;
505 int idx = blockId * N + threadId;
514 set<0>( q0 ) = p.
q[0];
515 set<1>( q0 ) = p.
q[1];
516 set<2>( q0 ) = p.
q[2];
521 set<0>(q1) = g_in[0*pitchIn + idx];
523 set<1>(q1) = g_in[1*pitchIn + idx];
525 set<2>(q1) = g_in[2*pitchIn + idx];
550 g_out += 44*pitchOut;
579 template<
typename Format_t >
583 unsigned int pitchIn,
585 unsigned int pitchOut,
589 using namespace cuda::linalg2;
596 int idx = blockId * N + threadId;
605 set<0>( q0 ) = p.
q[0];
606 set<1>( q0 ) = p.
q[1];
607 set<2>( q0 ) = p.
q[2];
612 set<0>(q1) = g_in[0*pitchIn + idx];
614 set<1>(q1) = g_in[1*pitchIn + idx];
616 set<2>(q1) = g_in[2*pitchIn + idx];
641 g_out += 44*pitchOut;
668 template<
typename Format_t >
672 unsigned int pitchIn,
674 unsigned int pitchOut,
678 using namespace cuda::linalg2;
685 int idx = blockId * N + threadId;
693 Matrix<Format_t,3,1> q0, q1, diff;
694 set<0>( q0 ) = p.
q[0];
695 set<1>( q0 ) = p.
q[1];
696 set<2>( q0 ) = p.
q[2];
700 set<0>( q1 ) = g_in[0*pitchIn + idx];
702 set<1>( q1 ) = g_in[1*pitchIn + idx];
704 set<2>( q1 ) = g_in[2*pitchIn + idx];
711 const Format_t _PI =
static_cast<Format_t
>(M_PI);
712 if( get<2>( diff ) > _PI )
713 set<2>( diff ) -= 2*_PI;
714 if( get<2>( diff ) < _PI )
715 set<2>( diff ) += 2*_PI;
720 g_out[0*pitchOut + idx] = dist2;
733 template<
typename Format_t>
737 unsigned int pitchIn,
739 unsigned int pitchOut,
743 using namespace cuda::linalg2;
750 int idx = blockId * N + threadId;
758 Matrix<Format_t,3,1> q0, q1, diff;
759 set<0>( q0 ) = p.
q[0];
760 set<1>( q0 ) = p.
q[1];
761 set<2>( q0 ) = p.
q[2];
765 set<0>( q1 ) = g_in[0*pitchIn + idx];
767 set<1>( q1 ) = g_in[1*pitchIn + idx];
769 set<2>( q1 ) = g_in[2*pitchIn + idx];
776 const Format_t _PI =
static_cast<Format_t
>(M_PI);
777 if( get<2>( diff ) > _PI )
778 set<2>( diff ) -= 2*_PI;
779 if( get<2>( diff ) < _PI )
780 set<2>( diff ) += 2*_PI;
785 Unsigned* out =
reinterpret_cast<Unsigned*
>(g_out + pitchOut);
__device__ __host__ Scalar norm_squared(const RValue< Scalar, ROWS, COLS, Exp > &M)
compute the norm
__global__ void group_distance_to_set(EuclideanParams< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the euclidean distance from a single dubins state to a batch of many dubins states ...
__device__ void applySolver(const linalg::Matrix< Format_t, 3, 1 > &q0, const linalg::Matrix< Format_t, 3, 1 > &q1, const Format_t r, DistanceAndId< Format_t > &best)
__global__ void distance_from_set(Params< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the distance from a batch of many dubins states to a single dubins state ...
__global__ void distance_to_set_debug(Params< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the distance from a single dubins state to a batch of many dubins states ...
Encapsulates the solution distance along with a feasibility bit for a particular primitive solution...
static Result< Format_t > solve(const Vector3d_t &q0, const Vector3d_t &q1, const Format_t r)
basic interface returns only the total distance
__device__ void writeSolution(DebugCurved< Format_t > &soln, Result< Format_t > &result, int off, int pitch, int idx, Format_t *g_out)
Format_t q[3]
the query state
__global__ void distance_from_set_debug(Params< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the distance from a batch of many dubins states to a single dubins state ...
Format_t q[3]
the query state
__global__ void group_distance_to_set_with_id(EuclideanParams< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the euclidean distance from a single dubins state to a batch of many dubins states ...
__global__ void distance_to_set_with_id(Params< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the distance from a single dubins state to a batch of many dubins states ...
__global__ void distance_to_set(Params< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the distance from a single dubins state to a batch of many dubins states ...
__global__ void distance_from_set_with_id(Params< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the distance from a batch of many dubins states to a single dubins state ...
interface for different solutions
Encapsulates a solution distance along with the id of the path type, identifying the nature of the th...