/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #ifndef __OPENCV_GPU_VEC_DISTANCE_HPP__ #define __OPENCV_GPU_VEC_DISTANCE_HPP__ #include "reduce.hpp" #include "functional.hpp" #include "detail/vec_distance_detail.hpp" namespace cv { namespace gpu { namespace device { template <typename T> struct L1Dist { typedef int value_type; typedef int result_type; __device__ __forceinline__ L1Dist() : mySum(0) {} __device__ __forceinline__ void reduceIter(int val1, int val2) { mySum = __sad(val1, val2, mySum); } template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) { reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); } __device__ __forceinline__ operator int() const { return mySum; } int mySum; }; template <> struct L1Dist<float> { typedef float value_type; typedef float result_type; __device__ __forceinline__ L1Dist() : mySum(0.0f) {} __device__ __forceinline__ void reduceIter(float val1, float val2) { mySum += ::fabs(val1 - val2); } template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) { reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); } __device__ __forceinline__ operator float() const { return mySum; } float mySum; }; struct L2Dist { typedef float value_type; typedef float result_type; __device__ __forceinline__ L2Dist() : mySum(0.0f) {} __device__ __forceinline__ void reduceIter(float val1, float val2) { float reg = val1 - val2; mySum += reg * reg; } template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) { reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); } __device__ __forceinline__ operator float() const { return sqrtf(mySum); } float mySum; }; struct HammingDist { typedef int value_type; typedef int result_type; __device__ __forceinline__ HammingDist() : mySum(0) {} __device__ __forceinline__ void reduceIter(int val1, int val2) { mySum += __popc(val1 ^ val2); } template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) { reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); } __device__ __forceinline__ operator int() const { return mySum; } int mySum; }; // calc distance between two vectors in global memory template <int THREAD_DIM, typename Dist, typename T1, typename T2> __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) { for (int i = tid; i < len; i += THREAD_DIM) { T1 val1; ForceGlob<T1>::Load(vec1, i, val1); T2 val2; ForceGlob<T2>::Load(vec2, i, val2); dist.reduceIter(val1, val2); } dist.reduceAll<THREAD_DIM>(smem, tid); } // calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2> __device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid) { vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid); dist.reduceAll<THREAD_DIM>(smem, tid); } // calc distance between two vectors in global memory template <int THREAD_DIM, typename T1> struct VecDiffGlobal { explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0) { vec1 = vec1_; } template <typename T2, typename Dist> __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const { calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid); } const T1* vec1; }; // calc distance between two vectors, first vector is cached in register memory, second vector is in global memory template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister { template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid) { if (glob_tid < len) smem[glob_tid] = vec1[glob_tid]; __syncthreads(); U* vec1ValsPtr = vec1Vals; #pragma unroll for (int i = tid; i < MAX_LEN; i += THREAD_DIM) *vec1ValsPtr++ = smem[i]; __syncthreads(); } template <typename T2, typename Dist> __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const { calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid); } U vec1Vals[MAX_LEN / THREAD_DIM]; }; }}} // namespace cv { namespace gpu { namespace device #endif // __OPENCV_GPU_VEC_DISTANCE_HPP__