dubins_2curves__cuda_2kernels_8cu_8hpp_source.html

 /*

  *  Copyright (C) 2012 Josh Bialkowski (jbialk@mit.edu)

  *

  *  This file is part of openbook.

  *

  *  openbook is free software: you can redistribute it and/or modify

  *  it under the terms of the GNU General Public License as published by

  *  the Free Software Foundation, either version 3 of the License, or

  *  (at your option) any later version.

  *

  *  openbook is distributed in the hope that it will be useful,

  *  but WITHOUT ANY WARRANTY; without even the implied warranty of

  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

  *  GNU General Public License for more details.

  *

  *  You should have received a copy of the GNU General Public License

  *  along with openbook.  If not, see <http://www.gnu.org/licenses/>.

  */

 #ifndef MPBLOCKS_DUBINS_CURVES_CUDA2_KERNELS_CU_HPP_

 #define MPBLOCKS_DUBINS_CURVES_CUDA2_KERNELS_CU_HPP_


 namespace    mpblocks {

 namespace      dubins {

 namespace curves_cuda {

 namespace     kernels {


 namespace linalg = cuda::linalg2;


 template< SolutionId Id, typename Format_t >

 __device__  void applySolver(

         const linalg::Matrix<Format_t,3,1>& q0,

         const linalg::Matrix<Format_t,3,1>& q1,

         const Format_t r,

         DistanceAndId<Format_t>& best )

 {

     Result<Format_t> soln = Solver<Id,Format_t>::solve(q0,q1,r);

     if( soln.f && soln.d < best.d )

     {

         best.d  = soln.d;

         best.id = Id;

     }

 }


 template< SolutionId Id, typename Format_t >

 __device__  void applySolver(

         const linalg::Matrix<Format_t,3,1>& q0,

         const linalg::Matrix<Format_t,3,1>& q1,

         const Format_t r,

         Format_t& best )

 {

     Result<Format_t> soln = Solver<Id,Format_t>::solve(q0,q1,r);

     if( soln.f && soln.d < best )

         best = soln.d;

 }


 template< typename Format_t >

 __device__  void writeSolution(

         DebugCurved<Format_t>& soln,

         Result<Format_t>& result,

         int       off,

         int       pitch,

         int       idx,

         Format_t* g_out)

 {

     // DebugCurved has 11 elements

     //      3 x 2ea center points

     //      3 x 1ea distances

     //      1       total distance

     //      1       feasible

     off *= 11;


     #pragma unroll

     for(int i=0; i < 3; i++)

     {

         #pragma unroll

         for(int j=0; j < 2; j++)

         {

             const int k = i*2 + j;

             __syncthreads();

             g_out[ (off + k)*pitch +idx ] = soln.c[i][j];

         }

     }


     #pragma unroll

     for(int i=0; i < 3; i++)

     {

         const int k = 6+i;

         __syncthreads();

         g_out[ (off + k)*pitch + idx ] = soln.l[i];

     }


     __syncthreads();

     g_out[ (off +  9)*pitch + idx ] = result.d;

     __syncthreads();

     g_out[ (off + 10)*pitch + idx ] = result.f ? 1 : 0;

 }


 template< typename Format_t >

 __device__  void writeSolution(

         DebugStraight<Format_t>& soln,

         Result<Format_t>& result,

         int       off,

         int       pitch,

         int       idx,

         Format_t* g_out)

 {

     // DebugStraight has 13 elements

     //      2 x 2ea center points

     //      2 x 2ea tangent points

     //      3 x 1ea distances

     //      1       total distance

     //      1       feasible

     // In addition, the DebugStraight block is written after the DebugCurved

     // block. g_out is moved to point to the DebugStraight block but the

     // first Enum value corresponding to a straight output is four, not zero

     off = (off - 4)*13;


     #pragma unroll

     for(int i=0; i < 2; i++)

     {

         #pragma unroll

         for(int j=0; j < 2; j++)

         {

             const int k = i*2 + j;

             __syncthreads();

             g_out[ (off + k)*pitch +idx ] = soln.c[i][j];

         }

     }


     #pragma unroll

     for(int i=0; i < 2; i++)

     {

         #pragma unroll

         for(int j=0; j < 2; j++)

         {

             const int k = 4 + i*2 + j;

             __syncthreads();

             g_out[ (off + k)*pitch +idx ] = soln.c[i][j];

         }

     }


     #pragma unroll

     for(int i=0; i < 3; i++)

     {

         const int k = 8+i;

         __syncthreads();

         g_out[ (off + k)*pitch + idx ] = soln.l[i];

     }


     __syncthreads();

     g_out[ (off + 11)*pitch + idx ] = result.d;

     __syncthreads();

     g_out[ (off + 12)*pitch + idx ] = result.f ? 1 : 0;

 }


 template< typename Format_t>

 __global__  void distance_to_set(

        Params<Format_t> p,

        Format_t*    g_in,

        unsigned int pitchIn,

        Format_t*    g_out,

        unsigned int pitchOut,

        unsigned int n

        )

 {

     using namespace linalg;


     int threadId = threadIdx.x;

     int blockId  = blockIdx.x;

     int N        = blockDim.x;


     // which data point we work on

     int idx      = blockId * N + threadId;


     // if our idx is greater than the number of data points then we are a

     // left-over thread so just bail

     if( idx > n )

         return;


     // compose the query object

     linalg::Matrix<Format_t,3,1> q0,q1;

     set<0>( q0 ) = p.q[0];

     set<1>( q0 ) = p.q[1];

     set<2>( q0 ) = p.q[2];

     Format_t r  = p.r;


     // read in the target point q1, we synchronize between reads so that

     // reads are coallesced for maximum throughput

     set<0>(q1) = g_in[0*pitchIn + idx];

     __syncthreads();

     set<1>(q1) = g_in[1*pitchIn + idx];

     __syncthreads();

     set<2>(q1) = g_in[2*pitchIn + idx];

     __syncthreads();


     // best solution found

     Format_t    dBest;


     // now compute the distance for each of the solvers, and then record

     // the minimum one


     // the solutions with straight segments are always feasible so let's start

     // with one of them

     Result<Format_t> soln = Solver<LSL,Format_t>::solve(q0,q1,r);


     applySolver<LSR, Format_t>(q0,q1,r,dBest);

     applySolver<RSR, Format_t>(q0,q1,r,dBest);

     applySolver<RSL, Format_t>(q0,q1,r,dBest);

     applySolver<RLRa,Format_t>(q0,q1,r,dBest);

     applySolver<RLRb,Format_t>(q0,q1,r,dBest);

     applySolver<LRLa,Format_t>(q0,q1,r,dBest);

     applySolver<LRLb,Format_t>(q0,q1,r,dBest);


     __syncthreads();

     g_out[0*pitchOut + idx] = dBest;

 }


 template< typename Format_t >

 __global__  void distance_from_set(

        Params<Format_t> p,

        Format_t*    g_in,

        unsigned int pitchIn,

        Format_t*    g_out,

        unsigned int pitchOut,

        unsigned int n

        )

 {

     using namespace cuda::linalg2;


     int threadId = threadIdx.x;

     int blockId  = blockIdx.x;

     int N        = blockDim.x;


     // which data point we work on

     int idx      = blockId * N + threadId;


     // if our idx is greater than the number of data points then we are a

     // left-over thread so just bail

     if( idx > n )

         return;


     // compose the query object

     linalg::Matrix<Format_t,3,1> q0,q1;

     set<0>( q0 ) = p.q[0];

     set<1>( q0 ) = p.q[1];

     set<2>( q0 ) = p.q[2];

     Format_t r  = p.r;


     // read in the target point q1, we synchronize between reads so that

     // reads are coallesced for maximum throughput

     set<0>(q1) = g_in[0*pitchIn + idx];

     __syncthreads();

     set<1>(q1) = g_in[1*pitchIn + idx];

     __syncthreads();

     set<2>(q1) = g_in[2*pitchIn + idx];

     __syncthreads();


     // best solution found

     Format_t    dBest;


     // now compute the distance for each of the solvers, and then record

     // the minimum one


     // the solutions with straight segments are always feasible so let's start

     // with one of them

     Result<Format_t> soln = Solver<LSL,Format_t>::solve(q1,q0,r);


     applySolver<LSR, Format_t>(q1,q0,r,dBest);

     applySolver<RSR, Format_t>(q1,q0,r,dBest);

     applySolver<RSL, Format_t>(q1,q0,r,dBest);

     applySolver<RLRa,Format_t>(q1,q0,r,dBest);

     applySolver<RLRb,Format_t>(q1,q0,r,dBest);

     applySolver<LRLa,Format_t>(q1,q0,r,dBest);

     applySolver<LRLb,Format_t>(q1,q0,r,dBest);


     __syncthreads();

     g_out[0*pitchOut + idx] = dBest;

 }


 template< typename Format_t>

 __global__  void distance_to_set_with_id(

        Params<Format_t> p,

        Format_t*    g_in,

        unsigned int pitchIn,

        Format_t*    g_out,

        unsigned int pitchOut,

        unsigned int n

        )

 {

     using namespace cuda::linalg2;


     int threadId = threadIdx.x;

     int blockId  = blockIdx.x;

     int N        = blockDim.x;


     // which data point we work on

     int idx      = blockId * N + threadId;


     // if our idx is greater than the number of data points then we are a

     // left-over thread so just bail

     if( idx > n )

         return;


     // compose the query object

     linalg::Matrix<Format_t,3,1> q0,q1;

     set<0>( q0 ) = p.q[0];

     set<1>( q0 ) = p.q[1];

     set<2>( q0 ) = p.q[2];

     Format_t r  = p.r;


     // read in the target point q1, we synchronize between reads so that

     // reads are coallesced for maximum throughput

     set<0>(q1) = g_in[0*pitchIn + idx];

     __syncthreads();

     set<1>(q1) = g_in[1*pitchIn + idx];

     __syncthreads();

     set<2>(q1) = g_in[2*pitchIn + idx];

     __syncthreads();


     // now compute the distance for each of the solvers, and then record

     // the minimum one


     // the solutions with straight segments are always feasible so let's start

     // with one of them

     Result<Format_t> soln = Solver<LSL,Format_t>::solve(q0,q1,r);

     DistanceAndId<Format_t> dBest(soln.d,LSL);


     applySolver<LSR, Format_t>(q0,q1,r,dBest);

     applySolver<RSR, Format_t>(q0,q1,r,dBest);

     applySolver<RSL, Format_t>(q0,q1,r,dBest);

     applySolver<RLRa,Format_t>(q0,q1,r,dBest);

     applySolver<RLRb,Format_t>(q0,q1,r,dBest);

     applySolver<LRLa,Format_t>(q0,q1,r,dBest);

     applySolver<LRLb,Format_t>(q0,q1,r,dBest);


     typedef typename PackedStorage<sizeof(Format_t)>::Result Unsigned;

     Unsigned  pack = (idx << 4 ) | dBest.id;

     Unsigned* out  = reinterpret_cast<Unsigned*>(g_out + pitchOut);


     __syncthreads();

     g_out[idx] = dBest.d;

     __syncthreads();

     out[idx]   = pack;

 }


 template< typename Format_t >

 __global__  void distance_from_set_with_id(

        Params<Format_t> p,

        Format_t*    g_in,

        unsigned int pitchIn,

        Format_t*    g_out,

        unsigned int pitchOut,

        unsigned int n

        )

 {

     using namespace cuda::linalg2;


     int threadId = threadIdx.x;

     int blockId  = blockIdx.x;

     int N        = blockDim.x;


     // which data point we work on

     int idx      = blockId * N + threadId;


     // if our idx is greater than the number of data points then we are a

     // left-over thread so just bail

     if( idx > n )

         return;


     // compose the query object

     linalg::Matrix<Format_t,3,1> q0,q1;

     set<0>( q0 ) = p.q[0];

     set<1>( q0 ) = p.q[1];

     set<2>( q0 ) = p.q[2];

     Format_t r  = p.r;


     // read in the target point q1, we synchronize between reads so that

     // reads are coallesced for maximum throughput

     set<0>(q1) = g_in[0*pitchIn + idx];

     __syncthreads();

     set<1>(q1) = g_in[1*pitchIn + idx];

     __syncthreads();

     set<2>(q1) = g_in[2*pitchIn + idx];

     __syncthreads();


     // now compute the distance for each of the solvers, and then record

     // the minimum one


     // the solutions with straight segments are always feasible so let's start

     // with one of them

     Result<Format_t> soln = Solver<LSL,Format_t>::solve(q0,q1,r);

     DistanceAndId<Format_t> dBest(soln.d,LSL);


     applySolver<LSR, Format_t>(q1,q0,r,dBest);

     applySolver<RSR, Format_t>(q1,q0,r,dBest);

     applySolver<RSL, Format_t>(q1,q0,r,dBest);

     applySolver<RLRa,Format_t>(q1,q0,r,dBest);

     applySolver<RLRb,Format_t>(q1,q0,r,dBest);

     applySolver<LRLa,Format_t>(q1,q0,r,dBest);

     applySolver<LRLb,Format_t>(q1,q0,r,dBest);


     typedef typename PackedStorage<sizeof(Format_t)>::Result Unsigned;

     Unsigned  pack = (idx << 4 ) | dBest.id;

     Unsigned* out  = reinterpret_cast<Unsigned*>(g_out + pitchOut);


     __syncthreads();

     g_out[idx] = dBest.d;

     __syncthreads();

     out[idx]   = pack;

 }


 template< typename Format_t>

 __global__  void distance_to_set_debug(

        Params<Format_t> p,

        Format_t*    g_in,

        unsigned int pitchIn,

        Format_t*    g_out,

        unsigned int pitchOut,

        unsigned int n

        )

 {

     using namespace cuda::linalg2;


     int threadId = threadIdx.x;

     int blockId  = blockIdx.x;

     int N        = blockDim.x;


     // which data point we work on

     int idx      = blockId * N + threadId;


     // if our idx is greater than the number of data points then we are a

     // left-over thread so just bail

     if( idx > n )

         return;


     // compose the query object

     linalg::Matrix<Format_t,3,1> q0,q1;

     set<0>( q0 ) = p.q[0];

     set<1>( q0 ) = p.q[1];

     set<2>( q0 ) = p.q[2];

     Format_t r  = p.r;


     // read in the target point q1, we synchronize between reads so that

     // reads are coallesced for maximum throughput

     set<0>(q1) = g_in[0*pitchIn + idx];

     __syncthreads();

     set<1>(q1) = g_in[1*pitchIn + idx];

     __syncthreads();

     set<2>(q1) = g_in[2*pitchIn + idx];

     __syncthreads();


     // storage for the solution

     Result<Format_t> soln;


     {

         DebugCurved<Format_t> debug;


         // now compute the distance for each of the solve_debugrs

         soln = Solver<LRLa,Format_t>::solve_debug(q0,q1,r,debug);

         writeSolution(debug,soln,LRLa,pitchOut,idx,g_out);


         soln = Solver<LRLb,Format_t>::solve_debug(q0,q1,r,debug);

         writeSolution(debug,soln,LRLb,pitchOut,idx,g_out);


         soln = Solver<RLRa,Format_t>::solve_debug(q0,q1,r,debug);

         writeSolution(debug,soln,RLRa,pitchOut,idx,g_out);


         soln = Solver<RLRb,Format_t>::solve_debug(q0,q1,r,debug);

         writeSolution(debug,soln,RLRb,pitchOut,idx,g_out);

     }


     // there are 4 curved solutions, each one 11 elements, and pitchOut cols

     // so we advance the write head by

     g_out += 44*pitchOut;

     {

         DebugStraight<Format_t> debug;

         soln = Solver<LSL,Format_t>::solve_debug(q0,q1,r,debug);

         writeSolution(debug,soln,LSL,pitchOut,idx,g_out);


         soln = Solver<RSL,Format_t>::solve_debug(q0,q1,r,debug);

         writeSolution(debug,soln,RSL,pitchOut,idx,g_out);


         soln = Solver<RSR,Format_t>::solve_debug(q0,q1,r,debug);

         writeSolution(debug,soln,RSR,pitchOut,idx,g_out);


         soln = Solver<LSR,Format_t>::solve_debug(q0,q1,r,debug);

         writeSolution(debug,soln,LSR,pitchOut,idx,g_out);

     }


 }


 template< typename Format_t >

 __global__  void distance_from_set_debug(

        Params<Format_t> p,

        Format_t*    g_in,

        unsigned int pitchIn,

        Format_t*    g_out,

        unsigned int pitchOut,

        unsigned int n

        )

 {

     using namespace cuda::linalg2;


     int threadId = threadIdx.x;

     int blockId  = blockIdx.x;

     int N        = blockDim.x;


     // which data point we work on

     int idx      = blockId * N + threadId;


     // if our idx is greater than the number of data points then we are a

     // left-over thread so just bail

     if( idx > n )

         return;


     // compose the query object

     linalg::Matrix<Format_t,3,1> q0,q1;

     set<0>( q0 ) = p.q[0];

     set<1>( q0 ) = p.q[1];

     set<2>( q0 ) = p.q[2];

     Format_t r  = p.r;


     // read in the target point q1, we synchronize between reads so that

     // reads are coallesced for maximum throughput

     set<0>(q1) = g_in[0*pitchIn + idx];

     __syncthreads();

     set<1>(q1) = g_in[1*pitchIn + idx];

     __syncthreads();

     set<2>(q1) = g_in[2*pitchIn + idx];

     __syncthreads();


     // storage for the solution

     Result<Format_t> soln;


     {

         DebugCurved<Format_t> debug;


         // now compute the distance for each of the solvers

         soln = Solver<LRLa,Format_t>::solve_debug(q1,q0,r,debug);

         writeSolution(debug,soln,LRLa,pitchOut,idx,g_out);


         soln = Solver<LRLb,Format_t>::solve_debug(q1,q0,r,debug);

         writeSolution(debug,soln,LRLb,pitchOut,idx,g_out);


         soln = Solver<RLRa,Format_t>::solve_debug(q1,q0,r,debug);

         writeSolution(debug,soln,RLRa,pitchOut,idx,g_out);


         soln = Solver<RLRb,Format_t>::solve_debug(q1,q0,r,debug);

         writeSolution(debug,soln,RLRb,pitchOut,idx,g_out);

     }


     // there are 4 curved solutions, each one 11 elements, and pitchOut cols

     // so we advance the write head by

     g_out += 44*pitchOut;

     {

         DebugStraight<Format_t> debug;

         soln = Solver<LSL,Format_t>::solve_debug(q1,q0,r,debug);

         writeSolution(debug,soln,LSL,pitchOut,idx,g_out);


         soln = Solver<RSL,Format_t>::solve_debug(q1,q0,r,debug);

         writeSolution(debug,soln,RSL,pitchOut,idx,g_out);


         soln = Solver<RSR,Format_t>::solve_debug(q1,q0,r,debug);

         writeSolution(debug,soln,RSR,pitchOut,idx,g_out);


         soln = Solver<LSR,Format_t>::solve_debug(q1,q0,r,debug);

         writeSolution(debug,soln,LSR,pitchOut,idx,g_out);

     }

 }


 template< typename Format_t >

 __global__  void group_distance_to_set(

        EuclideanParams<Format_t> p,

        Format_t*    g_in,

        unsigned int pitchIn,

        Format_t*    g_out,

        unsigned int pitchOut,

        unsigned int n

        )

 {

     using namespace cuda::linalg2;


     int threadId = threadIdx.x;

     int blockId  = blockIdx.x;

     int N        = blockDim.x;


     // which data point we work on

     int idx      = blockId * N + threadId;


     // if our idx is greater than the number of data points then we are a

     // left-over thread so just bail

     if( idx > n )

         return;


     // compose the query object

     Matrix<Format_t,3,1> q0, q1, diff;

     set<0>( q0 ) = p.q[0];

     set<1>( q0 ) = p.q[1];

     set<2>( q0 ) = p.q[2];


     // read in the target point q1, we synchronize between reads so that

     // reads are coallesced for maximum throughput

     set<0>( q1 ) = g_in[0*pitchIn + idx];

     __syncthreads();

     set<1>( q1 ) = g_in[1*pitchIn + idx];

     __syncthreads();

     set<2>( q1 ) = g_in[2*pitchIn + idx];

     __syncthreads();


     // find the difference

     diff = q1 - q0;


     // correct the rotation

     const Format_t _PI = static_cast<Format_t>(M_PI);

     if( get<2>( diff ) > _PI )

         set<2>( diff ) -= 2*_PI;

     if( get<2>( diff ) < _PI )

         set<2>( diff ) += 2*_PI;


     Format_t dist2 = norm_squared(diff);


     __syncthreads();

     g_out[0*pitchOut + idx] = dist2;

 }


 template< typename Format_t>

 __global__  void group_distance_to_set_with_id(

        EuclideanParams<Format_t> p,

        Format_t*    g_in,

        unsigned int pitchIn,

        Format_t*    g_out,

        unsigned int pitchOut,

        unsigned int n

        )

 {

     using namespace cuda::linalg2;


     int threadId = threadIdx.x;

     int blockId  = blockIdx.x;

     int N        = blockDim.x;


     // which data point we work on

     int idx      = blockId * N + threadId;


     // if our idx is greater than the number of data points then we are a

     // left-over thread so just bail

     if( idx > n )

         return;


     // compose the query object

     Matrix<Format_t,3,1> q0, q1, diff;

     set<0>( q0 ) = p.q[0];

     set<1>( q0 ) = p.q[1];

     set<2>( q0 ) = p.q[2];


     // read in the target point q1, we synchronize between reads so that

     // reads are coallesced for maximum throughput

     set<0>( q1 ) = g_in[0*pitchIn + idx];

     __syncthreads();

     set<1>( q1 ) = g_in[1*pitchIn + idx];

     __syncthreads();

     set<2>( q1 ) = g_in[2*pitchIn + idx];

     __syncthreads();


     // find the difference

     diff = q1 - q0;


     // correct the rotation

     const Format_t _PI = static_cast<Format_t>(M_PI);

     if( get<2>( diff ) > _PI )

         set<2>( diff ) -= 2*_PI;

     if( get<2>( diff ) < _PI )

         set<2>( diff ) += 2*_PI;


     Format_t dist2 = norm_squared(diff);


     typedef typename PackedStorage<sizeof(Format_t)>::Result Unsigned;

     Unsigned* out  = reinterpret_cast<Unsigned*>(g_out + pitchOut);


     __syncthreads();

     g_out[idx] = dist2;

     __syncthreads();

     out[idx]   = idx;

 }


 } // kernels

 } // curves

 } // dubins

 } // mpblocks


 #endif


mpblocks::dubins::curves_cuda::DebugCurved::c
Vector2d_t c[3]
Definition: Solution.h:52

mpblocks::cuda::linalg2::norm_squared
__device__ __host__ Scalar norm_squared(const RValue< Scalar, ROWS, COLS, Exp > &M)
compute the norm
Definition: Norm.h:130

mpblocks::dubins::DistanceAndId::id
int id
id
Definition: result.h:84

mpblocks::dubins::RLRb
Definition: types.h:54

mpblocks::dubins::Result::d
Format_t d
distance
Definition: result.h:45

Dim3::x
int x
Definition: fakecuda.h:44

mpblocks::dubins::DistanceAndId::d
Format_t d
distance
Definition: result.h:83

__global__
#define __global__
Definition: fakecuda.h:33

mpblocks::dubins::LSR
Definition: types.h:57

mpblocks::dubins::curves_cuda::Params
Definition: Params.h:41

mpblocks::dubins::LRLb
Definition: types.h:52

mpblocks::dubins::RLRa
Definition: types.h:53

mpblocks::dubins::LRLa
Definition: types.h:51

mpblocks::dubins::RSL
Definition: types.h:58

mpblocks::dubins::curves_cuda::PackedStorage::Result
uint32_t Result
Definition: PackedIndex.h:41

mpblocks::cuda::linalg2::Matrix< Format_t, 3, 1 >

mpblocks::dubins::curves_cuda::EuclideanParams
Definition: Params.h:54

mpblocks::dubins::curves_cuda::kernels::group_distance_to_set
__global__ void group_distance_to_set(EuclideanParams< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the euclidean distance from a single dubins state to a batch of many dubins states ...
Definition: kernels.cu.hpp:669

mpblocks::dubins::curves_cuda::kernels::applySolver
__device__ void applySolver(const linalg::Matrix< Format_t, 3, 1 > &q0, const linalg::Matrix< Format_t, 3, 1 > &q1, const Format_t r, DistanceAndId< Format_t > &best)
Definition: kernels.cu.hpp:42

mpblocks::dubins::curves_cuda::DebugCurved
Definition: Solution.h:48

mpblocks::dubins::curves_cuda::kernels::distance_from_set
__global__ void distance_from_set(Params< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the distance from a batch of many dubins states to a single dubins state ...
Definition: kernels.cu.hpp:265

mpblocks::dubins::curves_cuda::Params::r
Format_t r
turning radius
Definition: Params.h:43

mpblocks::dubins::curves_cuda::DebugStraight
Definition: Solution.h:38

mpblocks::dubins::curves_cuda::DebugCurved::l
Format_t l[3]
Definition: Solution.h:53

mpblocks::dubins::curves_cuda::kernels::distance_to_set_debug
__global__ void distance_to_set_debug(Params< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the distance from a single dubins state to a batch of many dubins states ...
Definition: kernels.cu.hpp:489

mpblocks::dubins::Result::f
bool f
is feasible
Definition: result.h:46

mpblocks::dubins::Result
Encapsulates the solution distance along with a feasibility bit for a particular primitive solution...
Definition: result.h:44

threadIdx
Dim3 threadIdx

mpblocks::dubins::LSL
Definition: types.h:55

blockIdx
Dim3 blockIdx

mpblocks::dubins::curves_cuda::Solver::solve
static Result< Format_t > solve(const Vector3d_t &q0, const Vector3d_t &q1, const Format_t r)
basic interface returns only the total distance

__device__
#define __device__
Definition: fakecuda.h:34

mpblocks::dubins::RSR
Definition: types.h:56

mpblocks::dubins::curves_cuda::kernels::writeSolution
__device__ void writeSolution(DebugCurved< Format_t > &soln, Result< Format_t > &result, int off, int pitch, int idx, Format_t *g_out)
Definition: kernels.cu.hpp:79

blockDim
Dim3 blockDim

mpblocks::dubins::curves_cuda::Params::q
Format_t q[3]
the query state
Definition: Params.h:44

mpblocks::dubins::curves_cuda::kernels::distance_from_set_debug
__global__ void distance_from_set_debug(Params< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the distance from a batch of many dubins states to a single dubins state ...
Definition: kernels.cu.hpp:580

mpblocks::dubins::curves_cuda::EuclideanParams::q
Format_t q[3]
the query state
Definition: Params.h:56

mpblocks::dubins::curves_cuda::kernels::group_distance_to_set_with_id
__global__ void group_distance_to_set_with_id(EuclideanParams< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the euclidean distance from a single dubins state to a batch of many dubins states ...
Definition: kernels.cu.hpp:734

mpblocks::dubins::curves_cuda::kernels::distance_to_set_with_id
__global__ void distance_to_set_with_id(Params< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the distance from a single dubins state to a batch of many dubins states ...
Definition: kernels.cu.hpp:337

mpblocks::dubins::curves_cuda::DebugStraight::c
Vector2d_t c[2]
Definition: Solution.h:42

mpblocks::dubins::curves_cuda::kernels::distance_to_set
__global__ void distance_to_set(Params< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the distance from a single dubins state to a batch of many dubins states ...
Definition: kernels.cu.hpp:193

__syncthreads
void __syncthreads()

mpblocks::dubins::curves_cuda::DebugStraight::l
Format_t l[3]
Definition: Solution.h:44

mpblocks::dubins::curves_cuda::kernels::distance_from_set_with_id
__global__ void distance_from_set_with_id(Params< Format_t > p, Format_t *g_in, unsigned int pitchIn, Format_t *g_out, unsigned int pitchOut, unsigned int n)
batch-compute the distance from a batch of many dubins states to a single dubins state ...
Definition: kernels.cu.hpp:413

mpblocks::dubins::curves_cuda::Solver
interface for different solutions
Definition: Solution.h:58

mpblocks::dubins::DistanceAndId
Encapsulates a solution distance along with the id of the path type, identifying the nature of the th...
Definition: result.h:82