41 #ifndef PCL_GPU_PEOPLE__NCV_HPP_ 42 #define PCL_GPU_PEOPLE__NCV_HPP_ 44 #if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS 45 #define NCV_EXPORTS __declspec(dllexport) 51 #define WIN32_LEAN_AND_MEAN 54 #include <cuda_runtime.h> 57 #include <pcl/console/print.h> 80 #define NCV_CT_PREP_PASTE_AUX(a,b) a##b 81 #define NCV_CT_PREP_PASTE(a,b) NCV_CT_PREP_PASTE_AUX(a, b)
86 #define NCV_CT_ASSERT(X) \ 87 typedef NcvCTprep::assertTest<sizeof(NcvCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \ 88 NCV_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__) 96 #if !defined(__align__) && !defined(__CUDACC__) 97 #if defined(_WIN32) || defined(_WIN64) 98 #define __align__(n) __declspec(align(n)) 99 #elif defined(__unix__) 100 #define __align__(n) __attribute__((__aligned__(n))) 110 typedef bool NcvBool;
111 typedef long long Ncv64s;
113 #if defined(__APPLE__) && !defined(__CUDACC__) 114 typedef uint64_t Ncv64u;
116 typedef unsigned long long Ncv64u;
120 typedef unsigned int Ncv32u;
121 typedef short Ncv16s;
122 typedef unsigned short Ncv16u;
124 typedef unsigned char Ncv8u;
125 typedef float Ncv32f;
126 typedef double Ncv64f;
191 NCV_CT_ASSERT(
sizeof(NcvBool) <= 4);
192 NCV_CT_ASSERT(
sizeof(Ncv64s) == 8);
193 NCV_CT_ASSERT(
sizeof(Ncv64u) == 8);
194 NCV_CT_ASSERT(
sizeof(Ncv32s) == 4);
195 NCV_CT_ASSERT(
sizeof(Ncv32u) == 4);
196 NCV_CT_ASSERT(
sizeof(Ncv16s) == 2);
197 NCV_CT_ASSERT(
sizeof(Ncv16u) == 2);
198 NCV_CT_ASSERT(
sizeof(Ncv8s) == 1);
199 NCV_CT_ASSERT(
sizeof(Ncv8u) == 1);
200 NCV_CT_ASSERT(
sizeof(Ncv32f) == 4);
201 NCV_CT_ASSERT(
sizeof(Ncv64f) == 8);
202 NCV_CT_ASSERT(
sizeof(
NcvRect8u) ==
sizeof(Ncv32u));
203 NCV_CT_ASSERT(
sizeof(
NcvRect32s) == 4 *
sizeof(Ncv32s));
204 NCV_CT_ASSERT(
sizeof(
NcvRect32u) == 4 *
sizeof(Ncv32u));
205 NCV_CT_ASSERT(
sizeof(
NcvSize32u) == 2 *
sizeof(Ncv32u));
215 const Ncv32u K_WARP_SIZE = 32;
216 const Ncv32u K_LOG2_WARP_SIZE = 5;
224 NCV_EXPORTS
void ncvDebugOutput(
const std::string &msg);
226 typedef void NCVDebugOutputHandler(
const std::string &msg);
228 NCV_EXPORTS
void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
230 #define ncvAssertPrintCheck(pred, msg) \ 235 std::ostringstream oss; \ 236 oss << "NCV Assertion Failed: " << msg << ", file=" << __FILE__ << ", line=" << __LINE__ << std::endl; \ 237 ncvDebugOutput(oss.str()); \ 241 #define ncvAssertPrintReturn(pred, msg, err) \ 244 ncvAssertPrintCheck(pred, msg); \ 245 if (!(pred)) return err; \ 248 #define ncvAssertReturn(pred, err) \ 249 ncvAssertPrintReturn(pred, "retcode=" << (int)err, err) 251 #define ncvAssertReturnNcvStat(ncvOp) \ 254 NCVStatus _ncvStat = ncvOp; \ 255 ncvAssertPrintReturn(NCV_SUCCESS==_ncvStat, "NcvStat=" << (int)_ncvStat, _ncvStat); \ 258 #define ncvAssertCUDAReturn(cudacall, errCode) \ 261 cudaError_t res = cudacall; \ 262 ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \ 265 #define ncvAssertCUDALastErrorReturn(errCode) \ 268 cudaError_t res = cudaGetLastError(); \ 269 ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \ 286 NCV_INCONSISTENT_INPUT,
287 NCV_TEXTURE_BIND_ERROR,
288 NCV_DIMENSIONS_INVALID,
294 NCV_ALLOCATOR_NOT_INITIALIZED,
295 NCV_ALLOCATOR_BAD_ALLOC,
296 NCV_ALLOCATOR_BAD_DEALLOC,
297 NCV_ALLOCATOR_INSUFFICIENT_CAPACITY,
298 NCV_ALLOCATOR_DEALLOC_ORDER,
299 NCV_ALLOCATOR_BAD_REUSE,
302 NCV_MEM_RESIDENCE_ERROR,
303 NCV_MEM_INSUFFICIENT_CAPACITY,
305 NCV_HAAR_INVALID_PIXEL_STEP,
306 NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER,
307 NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE,
308 NCV_HAAR_TOO_LARGE_FEATURES,
309 NCV_HAAR_XML_LOADING_EXCEPTION,
311 NCV_NOIMPL_HAAR_TILTED_FEATURES,
314 NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,
317 NPPST_SUCCESS = NCV_SUCCESS,
319 NPPST_CUDA_KERNEL_EXECUTION_ERROR,
320 NPPST_NULL_POINTER_ERROR,
321 NPPST_TEXTURE_BIND_ERROR,
330 NPPST_MEM_INSUFFICIENT_BUFFER,
331 NPPST_MEM_RESIDENCE_ERROR,
332 NPPST_MEM_INTERNAL_ERROR,
337 typedef Ncv32u NCVStatus;
339 #define NCV_SET_SKIP_COND(x) \ 340 bool __ncv_skip_cond = x 342 #define NCV_RESET_SKIP_COND(x) \ 345 #define NCV_SKIP_COND_BEGIN \ 346 if (!__ncv_skip_cond) { 348 #define NCV_SKIP_COND_END \ 359 typedef struct _NcvTimer *NcvTimer;
361 NCV_EXPORTS NcvTimer ncvStartTimer(
void);
363 NCV_EXPORTS
double ncvEndQueryTimerUs(NcvTimer t);
365 NCV_EXPORTS
double ncvEndQueryTimerMs(NcvTimer t);
378 NCV_EXPORTS Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
387 NCVMemoryTypeHostPageable,
388 NCVMemoryTypeHostPinned,
423 virtual NCVStatus alloc(
NCVMemSegment &seg,
size_t size) = 0;
426 virtual NcvBool isInitialized(
void)
const = 0;
427 virtual NcvBool isCounting(
void)
const = 0;
429 virtual NCVMemoryType memType(
void)
const = 0;
430 virtual Ncv32u alignment(
void)
const = 0;
431 virtual size_t maxSize(
void)
const = 0;
448 NCVMemStackAllocator(NCVMemoryType memT,
size_t capacity, Ncv32u alignment,
void *reusePtr=NULL);
457 virtual NCVMemoryType
memType(
void)
const;
459 virtual size_t maxSize(
void)
const;
463 NCVMemoryType _memType;
470 NcvBool bReusesMemory;
490 virtual NCVMemoryType
memType(
void)
const;
492 virtual size_t maxSize(
void)
const;
499 NCVMemoryType _memType;
509 NCV_EXPORTS NCVStatus memSegCopyHelper(
void *dst, NCVMemoryType dstType,
510 const void *src, NCVMemoryType srcType,
511 size_t sz, cudaStream_t cuStream);
514 NCV_EXPORTS NCVStatus memSegCopyHelper2D(
void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
515 const void *src, Ncv32u srcPitch, NCVMemoryType srcType,
516 Ncv32u widthbytes, Ncv32u height, cudaStream_t cuStream);
547 ncvAssertReturn(dst.
_length == this->_length, NCV_MEM_COPY_ERROR);
548 howMuch = this->
_length *
sizeof(T);
552 ncvAssertReturn(dst.
_length *
sizeof(T) >= howMuch &&
553 this->_length *
sizeof(T) >= howMuch &&
554 howMuch > 0, NCV_MEM_COPY_ERROR);
556 ncvAssertReturn((this->
_ptr != NULL || this->
_memtype == NCVMemoryTypeNone) &&
557 (dst.
_ptr != NULL || dst.
_memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
559 NCVStatus ncvStat = NCV_SUCCESS;
560 if (this->
_memtype != NCVMemoryTypeNone)
563 this->_ptr, this->_memtype,
601 this->allocatedMem.
clear();
603 ncvStat = allocator.
alloc(this->allocatedMem,
length *
sizeof(T));
604 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS,
"NCVVectorAlloc ctor:: alloc failed", );
608 this->_memtype = this->allocatedMem.begin.memtype;
615 ncvStat = allocator.
dealloc(this->allocatedMem);
616 ncvAssertPrintCheck(ncvStat == NCV_SUCCESS,
"NCVVectorAlloc dtor:: dealloc failed");
655 this->bReused =
false;
662 this->bReused =
true;
667 this->bReused =
false;
670 ncvAssertPrintReturn(
length *
sizeof(T) <= memSegment.
size, \
671 "NCVVectorReuse ctor:: memory binding failed due to size mismatch", );
677 this->bReused =
true;
682 return this->bReused;
719 return _pitch /
sizeof(T);
727 ncvAssertReturn(dst.
_pitch == this->_pitch &&
728 dst.
_height == this->_height, NCV_MEM_COPY_ERROR);
734 this->_pitch * this->_height >= howMuch &&
735 howMuch > 0, NCV_MEM_COPY_ERROR);
737 ncvAssertReturn((this->
_ptr != NULL || this->
_memtype == NCVMemoryTypeNone) &&
738 (dst.
_ptr != NULL || dst.
_memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
740 NCVStatus ncvStat = NCV_SUCCESS;
741 if (this->
_memtype != NCVMemoryTypeNone)
744 this->_ptr, this->_memtype,
753 ncvAssertReturn(this->
width() >= roi.
width && this->height() >= roi.
height &&
755 ncvAssertReturn((this->
_ptr != NULL || this->
_memtype == NCVMemoryTypeNone) &&
756 (dst.
_ptr != NULL || dst.
_memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
758 NCVStatus ncvStat = NCV_SUCCESS;
759 if (this->
_memtype != NCVMemoryTypeNone)
762 this->_ptr, this->_pitch, this->_memtype,
769 T &
at(Ncv32u x, Ncv32u y)
const 771 NcvBool bOutRange = (x >= this->
_width || y >= this->
_height);
772 ncvAssertPrintCheck(!bOutRange,
"Error addressing matrix at [" << x <<
", " << y <<
"]");
777 return ((T *)((Ncv8u *)this->
_ptr + y * this->
_pitch))[x];
815 this->allocatedMem.
clear();
817 Ncv32u widthBytes =
width *
sizeof(T);
818 Ncv32u pitchBytes = alignUp(widthBytes, allocator.
alignment());
822 ncvAssertPrintReturn(
pitch >= pitchBytes &&
824 "NCVMatrixAlloc ctor:: incorrect pitch passed", );
828 Ncv32u requiredAllocSize = pitchBytes *
height;
830 ncvStat = allocator.
alloc(this->allocatedMem, requiredAllocSize);
831 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS,
"NCVMatrixAlloc ctor:: alloc failed", );
834 this->_width =
width;
836 this->_pitch = pitchBytes;
837 this->_memtype = this->allocatedMem.begin.memtype;
844 ncvStat = allocator.
dealloc(this->allocatedMem);
845 ncvAssertPrintCheck(ncvStat == NCV_SUCCESS,
"NCVMatrixAlloc dtor:: dealloc failed");
885 this->bReused =
false;
888 Ncv32u widthBytes =
width *
sizeof(T);
889 Ncv32u pitchBytes = alignUp(widthBytes, alignment);
893 if (!bSkipPitchCheck)
895 ncvAssertPrintReturn(
pitch >= pitchBytes &&
896 (
pitch & (alignment - 1)) == 0,
897 "NCVMatrixReuse ctor:: incorrect pitch passed", );
901 ncvAssertPrintReturn(
pitch >= widthBytes,
"NCVMatrixReuse ctor:: incorrect pitch passed", );
906 ncvAssertPrintReturn(pitchBytes *
height <= memSegment.
size, \
907 "NCVMatrixReuse ctor:: memory binding failed due to size mismatch", );
911 this->
_pitch = pitchBytes;
915 this->bReused =
true;
920 this->bReused =
false;
923 ncvAssertPrintReturn(roi.
x < mat.
width() && roi.
y < mat.
height() && \
925 "NCVMatrixReuse ctor:: memory binding failed due to mismatching ROI and source matrix dims", );
933 this->bReused =
true;
938 return this->bReused;
949 NCV_EXPORTS NCVStatus ncvGroupRectangles_host(
NCVVector<NcvRect32u> &hypotheses, Ncv32u &numHypotheses,
952 NCV_EXPORTS NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
953 NcvRect32u *h_rects, Ncv32u numRects, Ncv8u color);
955 NCV_EXPORTS NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
956 NcvRect32u *h_rects, Ncv32u numRects, Ncv32u color);
958 NCV_EXPORTS NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
959 NcvRect32u *d_rects, Ncv32u numRects, Ncv8u color, cudaStream_t cuStream);
961 NCV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
962 NcvRect32u *d_rects, Ncv32u numRects, Ncv32u color, cudaStream_t cuStream);
964 #define CLAMP(x,a,b) ( (x) > (b) ? (b) : ( (x) < (a) ? (a) : (x) ) ) 965 #define CLAMP_TOP(x, a) (((x) > (a)) ? (a) : (x)) 966 #define CLAMP_BOTTOM(x, a) (((x) < (a)) ? (a) : (x)) 967 #define CLAMP_0_255(x) CLAMP(x,0,255) 969 #define SUB_BEGIN(type, name) struct { __inline type name 970 #define SUB_END(name) } name; 971 #define SUB_CALL(name) name.name 973 #define SQR(x) ((x)*(x)) 975 #define ncvSafeMatAlloc(name, type, alloc, width, height, err) \ 976 NCVMatrixAlloc<type> name(alloc, width, height); \ 977 ncvAssertReturn(name.isMemAllocated(), err); 979 #endif // PCL_GPU_PEOPLE__NCV_HPP_
__host__ __device__ NcvRect32u()
NCVMatrixAlloc(INCVMemAllocator &allocator, Ncv32u width, Ncv32u height, Ncv32u pitch=0)
__host__ __device__ bool operator==(const NcvSize32u &another) const
__host__ __device__ NcvSize32s(Ncv32s width, Ncv32s height)
Ncv32u getAllocatorsAlignment() const
NcvBool isMemReused() const
T & at(Ncv32u x, Ncv32u y) const
Compile-time assert namespace.
Ncv32s height
Rectangle height.
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size)=0
virtual NCVStatus dealloc(NCVMemSegment &seg)=0
NCVMemoryType memType() const
Ncv32s width
Rectangle width.
NCVVectorReuse(const NCVMemSegment &memSegment)
Ncv32u height
Rectangle height.
Ncv32s width
Rectangle width.
__host__ __device__ NcvPoint2D32s(Ncv32s x, Ncv32s y)
Ncv32u width
Rectangle width.
NCVMemSegment getSegment() const
__host__ __device__ NcvSize32s()
__host__ __device__ NcvPoint2D32u(Ncv32u x, Ncv32u y)
NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width, Ncv32u height, Ncv32u pitch=0, NcvBool bSkipPitchCheck=false)
Ncv32u y
y-coordinate of upper left corner.
NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length)
Ncv32u getAllocatorsAlignment() const
NCVMemSegment getSegment() const
__host__ __device__ NcvPoint2D32u()
Ncv32s height
Rectangle height.
__host__ __device__ NcvSize32u(Ncv32u width, Ncv32u height)
__host__ __device__ NcvPoint2D32s()
virtual NCVMemoryType memType(void) const =0
NCVStatus copySolid(NCVVector< T > &dst, cudaStream_t cuStream, size_t howMuch=0) const
virtual Ncv32u alignment(void) const =0
virtual NcvBool isCounting(void) const =0
__host__ __device__ NcvSize32u()
NcvBool isMemAllocated() const
INCVMemAllocator (Interface)
Ncv32u height
Rectangle height.
NcvBool isMemAllocated() const
NCVVectorAlloc(INCVMemAllocator &allocator, Ncv32u length)
NCVStatus copy2D(NCVMatrix< T > &dst, NcvSize32u roi, cudaStream_t cuStream) const
virtual ~INCVMemAllocator()=0
__host__ __device__ NcvRect32s()
NCVStatus copySolid(NCVMatrix< T > &dst, cudaStream_t cuStream, size_t howMuch=0) const
virtual size_t maxSize(void) const =0
Ncv32s y
y-coordinate of upper left corner.
NCVMemoryType memType() const
NcvBool isMemReused() const
__host__ __device__ NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height)
Ncv32u x
x-coordinate of upper left corner.
Ncv32u width
Rectangle width.
Ncv32s x
x-coordinate of upper left corner.
__host__ __device__ NcvRect8u()
virtual NcvBool isInitialized(void) const =0
__host__ __device__ NcvRect32s(Ncv32s x, Ncv32s y, Ncv32s width, Ncv32s height)
__host__ __device__ NcvRect32u(Ncv32u x, Ncv32u y, Ncv32u width, Ncv32u height)
NCVMatrixReuse(const NCVMatrix< T > &mat, NcvRect32u roi)