diff options
Diffstat (limited to '2.3-1/thirdparty/includes/OpenCV/opencv2/gpu/device/vec_distance.hpp')
-rw-r--r-- | 2.3-1/thirdparty/includes/OpenCV/opencv2/gpu/device/vec_distance.hpp | 224 |
1 files changed, 224 insertions, 0 deletions
diff --git a/2.3-1/thirdparty/includes/OpenCV/opencv2/gpu/device/vec_distance.hpp b/2.3-1/thirdparty/includes/OpenCV/opencv2/gpu/device/vec_distance.hpp new file mode 100644 index 00000000..d5b4bb20 --- /dev/null +++ b/2.3-1/thirdparty/includes/OpenCV/opencv2/gpu/device/vec_distance.hpp @@ -0,0 +1,224 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_GPU_VEC_DISTANCE_HPP__ +#define __OPENCV_GPU_VEC_DISTANCE_HPP__ + +#include "reduce.hpp" +#include "functional.hpp" +#include "detail/vec_distance_detail.hpp" + +namespace cv { namespace gpu { namespace device +{ + template <typename T> struct L1Dist + { + typedef int value_type; + typedef int result_type; + + __device__ __forceinline__ L1Dist() : mySum(0) {} + + __device__ __forceinline__ void reduceIter(int val1, int val2) + { + mySum = __sad(val1, val2, mySum); + } + + template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) + { + reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); + } + + __device__ __forceinline__ operator int() const + { + return mySum; + } + + int mySum; + }; + template <> struct L1Dist<float> + { + typedef float value_type; + typedef float result_type; + + __device__ __forceinline__ L1Dist() : mySum(0.0f) {} + + __device__ __forceinline__ void reduceIter(float val1, float val2) + { + mySum += ::fabs(val1 - val2); + } + + template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) + { + reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); + } + + __device__ __forceinline__ operator float() const + { + return mySum; + } + + float mySum; + }; + + struct L2Dist + { + typedef float value_type; + typedef float result_type; + + __device__ __forceinline__ L2Dist() : mySum(0.0f) {} + + __device__ __forceinline__ void reduceIter(float val1, float val2) + { + float reg = val1 - val2; + mySum += reg * reg; + } + + template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) + { + reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); + } + + __device__ __forceinline__ operator float() const + { + return sqrtf(mySum); + } + + float mySum; + }; + + struct HammingDist + { + typedef int value_type; + typedef int result_type; + + __device__ __forceinline__ HammingDist() : mySum(0) {} + + __device__ __forceinline__ void reduceIter(int val1, int val2) + { + mySum += __popc(val1 ^ val2); + } + + template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) + { + reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); + } + + __device__ __forceinline__ operator int() const + { + return mySum; + } + + int mySum; + }; + + // calc distance between two vectors in global memory + template <int THREAD_DIM, typename Dist, typename T1, typename T2> + __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) + { + for (int i = tid; i < len; i += THREAD_DIM) + { + T1 val1; + ForceGlob<T1>::Load(vec1, i, val1); + + T2 val2; + ForceGlob<T2>::Load(vec2, i, val2); + + dist.reduceIter(val1, val2); + } + + dist.reduceAll<THREAD_DIM>(smem, tid); + } + + // calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory + template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2> + __device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid) + { + vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid); + + dist.reduceAll<THREAD_DIM>(smem, tid); + } + + // calc distance between two vectors in global memory + template <int THREAD_DIM, typename T1> struct VecDiffGlobal + { + explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0) + { + vec1 = vec1_; + } + + template <typename T2, typename Dist> + __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const + { + calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid); + } + + const T1* vec1; + }; + + // calc distance between two vectors, first vector is cached in register memory, second vector is in global memory + template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister + { + template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid) + { + if (glob_tid < len) + smem[glob_tid] = vec1[glob_tid]; + __syncthreads(); + + U* vec1ValsPtr = vec1Vals; + + #pragma unroll + for (int i = tid; i < MAX_LEN; i += THREAD_DIM) + *vec1ValsPtr++ = smem[i]; + + __syncthreads(); + } + + template <typename T2, typename Dist> + __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const + { + calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid); + } + + U vec1Vals[MAX_LEN / THREAD_DIM]; + }; +}}} // namespace cv { namespace gpu { namespace device + +#endif // __OPENCV_GPU_VEC_DISTANCE_HPP__ |