75 files changed, 45852 insertions, 0 deletions
diff --git a/thirdparty/linux/include/opencv2/core/affine.hpp b/thirdparty/linux/include/opencv2/core/affine.hpp
new file mode 100644
index 0000000..311ff62
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/affine.hpp
@@ -0,0 +1,517 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_AFFINE3_HPP
+#define OPENCV_CORE_AFFINE3_HPP
+
+#ifdef __cplusplus
+
+#include <opencv2/core.hpp>
+
+namespace cv
+{
+
+//! @addtogroup core
+//! @{
+
+    /** @brief Affine transform
+      @todo document
+     */
+    template<typename T>
+    class Affine3
+    {
+    public:
+        typedef T float_type;
+        typedef Matx<float_type, 3, 3> Mat3;
+        typedef Matx<float_type, 4, 4> Mat4;
+        typedef Vec<float_type, 3> Vec3;
+
+        Affine3();
+
+        //! Augmented affine matrix
+        Affine3(const Mat4& affine);
+
+        //! Rotation matrix
+        Affine3(const Mat3& R, const Vec3& t = Vec3::all(0));
+
+        //! Rodrigues vector
+        Affine3(const Vec3& rvec, const Vec3& t = Vec3::all(0));
+
+        //! Combines all contructors above. Supports 4x4, 4x3, 3x3, 1x3, 3x1 sizes of data matrix
+        explicit Affine3(const Mat& data, const Vec3& t = Vec3::all(0));
+
+        //! From 16th element array
+        explicit Affine3(const float_type* vals);
+
+        //! Create identity transform
+        static Affine3 Identity();
+
+        //! Rotation matrix
+        void rotation(const Mat3& R);
+
+        //! Rodrigues vector
+        void rotation(const Vec3& rvec);
+
+        //! Combines rotation methods above. Suports 3x3, 1x3, 3x1 sizes of data matrix;
+        void rotation(const Mat& data);
+
+        void linear(const Mat3& L);
+        void translation(const Vec3& t);
+
+        Mat3 rotation() const;
+        Mat3 linear() const;
+        Vec3 translation() const;
+
+        //! Rodrigues vector
+        Vec3 rvec() const;
+
+        Affine3 inv(int method = cv::DECOMP_SVD) const;
+
+        //! a.rotate(R) is equivalent to Affine(R, 0) * a;
+        Affine3 rotate(const Mat3& R) const;
+
+        //! a.rotate(rvec) is equivalent to Affine(rvec, 0) * a;
+        Affine3 rotate(const Vec3& rvec) const;
+
+        //! a.translate(t) is equivalent to Affine(E, t) * a;
+        Affine3 translate(const Vec3& t) const;
+
+        //! a.concatenate(affine) is equivalent to affine * a;
+        Affine3 concatenate(const Affine3& affine) const;
+
+        template <typename Y> operator Affine3<Y>() const;
+
+        template <typename Y> Affine3<Y> cast() const;
+
+        Mat4 matrix;
+
+#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
+        Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine);
+        Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine);
+        operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const;
+        operator Eigen::Transform<T, 3, Eigen::Affine>() const;
+#endif
+    };
+
+    template<typename T> static
+    Affine3<T> operator*(const Affine3<T>& affine1, const Affine3<T>& affine2);
+
+    template<typename T, typename V> static
+    V operator*(const Affine3<T>& affine, const V& vector);
+
+    typedef Affine3<float> Affine3f;
+    typedef Affine3<double> Affine3d;
+
+    static Vec3f operator*(const Affine3f& affine, const Vec3f& vector);
+    static Vec3d operator*(const Affine3d& affine, const Vec3d& vector);
+
+    template<typename _Tp> class DataType< Affine3<_Tp> >
+    {
+    public:
+        typedef Affine3<_Tp>                               value_type;
+        typedef Affine3<typename DataType<_Tp>::work_type> work_type;
+        typedef _Tp                                        channel_type;
+
+        enum { generic_type = 0,
+               depth        = DataType<channel_type>::depth,
+               channels     = 16,
+               fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+               type         = CV_MAKETYPE(depth, channels)
+             };
+
+        typedef Vec<channel_type, channels> vec_type;
+    };
+
+//! @} core
+
+}
+
+//! @cond IGNORED
+
+///////////////////////////////////////////////////////////////////////////////////
+// Implementaiton
+
+template<typename T> inline
+cv::Affine3<T>::Affine3()
+    : matrix(Mat4::eye())
+{}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const Mat4& affine)
+    : matrix(affine)
+{}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const Mat3& R, const Vec3& t)
+{
+    rotation(R);
+    translation(t);
+    matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
+    matrix.val[15] = 1;
+}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const Vec3& _rvec, const Vec3& t)
+{
+    rotation(_rvec);
+    translation(t);
+    matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
+    matrix.val[15] = 1;
+}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const cv::Mat& data, const Vec3& t)
+{
+    CV_Assert(data.type() == cv::DataType<T>::type);
+
+    if (data.cols == 4 && data.rows == 4)
+    {
+        data.copyTo(matrix);
+        return;
+    }
+    else if (data.cols == 4 && data.rows == 3)
+    {
+        rotation(data(Rect(0, 0, 3, 3)));
+        translation(data(Rect(3, 0, 1, 3)));
+        return;
+    }
+
+    rotation(data);
+    translation(t);
+    matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
+    matrix.val[15] = 1;
+}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const float_type* vals) : matrix(vals)
+{}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::Identity()
+{
+    return Affine3<T>(cv::Affine3<T>::Mat4::eye());
+}
+
+template<typename T> inline
+void cv::Affine3<T>::rotation(const Mat3& R)
+{
+    linear(R);
+}
+
+template<typename T> inline
+void cv::Affine3<T>::rotation(const Vec3& _rvec)
+{
+    double theta = norm(_rvec);
+
+    if (theta < DBL_EPSILON)
+        rotation(Mat3::eye());
+    else
+    {
+        double c = std::cos(theta);
+        double s = std::sin(theta);
+        double c1 = 1. - c;
+        double itheta = (theta != 0) ? 1./theta : 0.;
+
+        Point3_<T> r = _rvec*itheta;
+
+        Mat3 rrt( r.x*r.x, r.x*r.y, r.x*r.z, r.x*r.y, r.y*r.y, r.y*r.z, r.x*r.z, r.y*r.z, r.z*r.z );
+        Mat3 r_x( 0, -r.z, r.y, r.z, 0, -r.x, -r.y, r.x, 0 );
+
+        // R = cos(theta)*I + (1 - cos(theta))*r*rT + sin(theta)*[r_x]
+        // where [r_x] is [0 -rz ry; rz 0 -rx; -ry rx 0]
+        Mat3 R = c*Mat3::eye() + c1*rrt + s*r_x;
+
+        rotation(R);
+    }
+}
+
+//Combines rotation methods above. Suports 3x3, 1x3, 3x1 sizes of data matrix;
+template<typename T> inline
+void cv::Affine3<T>::rotation(const cv::Mat& data)
+{
+    CV_Assert(data.type() == cv::DataType<T>::type);
+
+    if (data.cols == 3 && data.rows == 3)
+    {
+        Mat3 R;
+        data.copyTo(R);
+        rotation(R);
+    }
+    else if ((data.cols == 3 && data.rows == 1) || (data.cols == 1 && data.rows == 3))
+    {
+        Vec3 _rvec;
+        data.reshape(1, 3).copyTo(_rvec);
+        rotation(_rvec);
+    }
+    else
+        CV_Assert(!"Input marix can be 3x3, 1x3 or 3x1");
+}
+
+template<typename T> inline
+void cv::Affine3<T>::linear(const Mat3& L)
+{
+    matrix.val[0] = L.val[0]; matrix.val[1] = L.val[1];  matrix.val[ 2] = L.val[2];
+    matrix.val[4] = L.val[3]; matrix.val[5] = L.val[4];  matrix.val[ 6] = L.val[5];
+    matrix.val[8] = L.val[6]; matrix.val[9] = L.val[7];  matrix.val[10] = L.val[8];
+}
+
+template<typename T> inline
+void cv::Affine3<T>::translation(const Vec3& t)
+{
+    matrix.val[3] = t[0]; matrix.val[7] = t[1]; matrix.val[11] = t[2];
+}
+
+template<typename T> inline
+typename cv::Affine3<T>::Mat3 cv::Affine3<T>::rotation() const
+{
+    return linear();
+}
+
+template<typename T> inline
+typename cv::Affine3<T>::Mat3 cv::Affine3<T>::linear() const
+{
+    typename cv::Affine3<T>::Mat3 R;
+    R.val[0] = matrix.val[0];  R.val[1] = matrix.val[1];  R.val[2] = matrix.val[ 2];
+    R.val[3] = matrix.val[4];  R.val[4] = matrix.val[5];  R.val[5] = matrix.val[ 6];
+    R.val[6] = matrix.val[8];  R.val[7] = matrix.val[9];  R.val[8] = matrix.val[10];
+    return R;
+}
+
+template<typename T> inline
+typename cv::Affine3<T>::Vec3 cv::Affine3<T>::translation() const
+{
+    return Vec3(matrix.val[3], matrix.val[7], matrix.val[11]);
+}
+
+template<typename T> inline
+typename cv::Affine3<T>::Vec3 cv::Affine3<T>::rvec() const
+{
+    cv::Vec3d w;
+    cv::Matx33d u, vt, R = rotation();
+    cv::SVD::compute(R, w, u, vt, cv::SVD::FULL_UV + cv::SVD::MODIFY_A);
+    R = u * vt;
+
+    double rx = R.val[7] - R.val[5];
+    double ry = R.val[2] - R.val[6];
+    double rz = R.val[3] - R.val[1];
+
+    double s = std::sqrt((rx*rx + ry*ry + rz*rz)*0.25);
+    double c = (R.val[0] + R.val[4] + R.val[8] - 1) * 0.5;
+    c = c > 1.0 ? 1.0 : c < -1.0 ? -1.0 : c;
+    double theta = acos(c);
+
+    if( s < 1e-5 )
+    {
+        if( c > 0 )
+            rx = ry = rz = 0;
+        else
+        {
+            double t;
+            t = (R.val[0] + 1) * 0.5;
+            rx = std::sqrt(std::max(t, 0.0));
+            t = (R.val[4] + 1) * 0.5;
+            ry = std::sqrt(std::max(t, 0.0)) * (R.val[1] < 0 ? -1.0 : 1.0);
+            t = (R.val[8] + 1) * 0.5;
+            rz = std::sqrt(std::max(t, 0.0)) * (R.val[2] < 0 ? -1.0 : 1.0);
+
+            if( fabs(rx) < fabs(ry) && fabs(rx) < fabs(rz) && (R.val[5] > 0) != (ry*rz > 0) )
+                rz = -rz;
+            theta /= std::sqrt(rx*rx + ry*ry + rz*rz);
+            rx *= theta;
+            ry *= theta;
+            rz *= theta;
+        }
+    }
+    else
+    {
+        double vth = 1/(2*s);
+        vth *= theta;
+        rx *= vth; ry *= vth; rz *= vth;
+    }
+
+    return cv::Vec3d(rx, ry, rz);
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::inv(int method) const
+{
+    return matrix.inv(method);
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::rotate(const Mat3& R) const
+{
+    Mat3 Lc = linear();
+    Vec3 tc = translation();
+    Mat4 result;
+    result.val[12] = result.val[13] = result.val[14] = 0;
+    result.val[15] = 1;
+
+    for(int j = 0; j < 3; ++j)
+    {
+        for(int i = 0; i < 3; ++i)
+        {
+            float_type value = 0;
+            for(int k = 0; k < 3; ++k)
+                value += R(j, k) * Lc(k, i);
+            result(j, i) = value;
+        }
+
+        result(j, 3) = R.row(j).dot(tc.t());
+    }
+    return result;
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::rotate(const Vec3& _rvec) const
+{
+    return rotate(Affine3f(_rvec).rotation());
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::translate(const Vec3& t) const
+{
+    Mat4 m = matrix;
+    m.val[ 3] += t[0];
+    m.val[ 7] += t[1];
+    m.val[11] += t[2];
+    return m;
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::concatenate(const Affine3<T>& affine) const
+{
+    return (*this).rotate(affine.rotation()).translate(affine.translation());
+}
+
+template<typename T> template <typename Y> inline
+cv::Affine3<T>::operator Affine3<Y>() const
+{
+    return Affine3<Y>(matrix);
+}
+
+template<typename T> template <typename Y> inline
+cv::Affine3<Y> cv::Affine3<T>::cast() const
+{
+    return Affine3<Y>(matrix);
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::operator*(const cv::Affine3<T>& affine1, const cv::Affine3<T>& affine2)
+{
+    return affine2.concatenate(affine1);
+}
+
+template<typename T, typename V> inline
+V cv::operator*(const cv::Affine3<T>& affine, const V& v)
+{
+    const typename Affine3<T>::Mat4& m = affine.matrix;
+
+    V r;
+    r.x = m.val[0] * v.x + m.val[1] * v.y + m.val[ 2] * v.z + m.val[ 3];
+    r.y = m.val[4] * v.x + m.val[5] * v.y + m.val[ 6] * v.z + m.val[ 7];
+    r.z = m.val[8] * v.x + m.val[9] * v.y + m.val[10] * v.z + m.val[11];
+    return r;
+}
+
+static inline
+cv::Vec3f cv::operator*(const cv::Affine3f& affine, const cv::Vec3f& v)
+{
+    const cv::Matx44f& m = affine.matrix;
+    cv::Vec3f r;
+    r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
+    r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
+    r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
+    return r;
+}
+
+static inline
+cv::Vec3d cv::operator*(const cv::Affine3d& affine, const cv::Vec3d& v)
+{
+    const cv::Matx44d& m = affine.matrix;
+    cv::Vec3d r;
+    r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
+    r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
+    r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
+    return r;
+}
+
+
+
+#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine)
+{
+    cv::Mat(4, 4, cv::DataType<T>::type, affine.matrix().data()).copyTo(matrix);
+}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine)
+{
+    Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> a = affine;
+    cv::Mat(4, 4, cv::DataType<T>::type, a.matrix().data()).copyTo(matrix);
+}
+
+template<typename T> inline
+cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const
+{
+    Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> r;
+    cv::Mat hdr(4, 4, cv::DataType<T>::type, r.matrix().data());
+    cv::Mat(matrix, false).copyTo(hdr);
+    return r;
+}
+
+template<typename T> inline
+cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine>() const
+{
+    return this->operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>();
+}
+
+#endif /* defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H */
+
+//! @endcond
+
+#endif /* __cplusplus */
+
+#endif /* OPENCV_CORE_AFFINE3_HPP */
diff --git a/thirdparty/linux/include/opencv2/core/base.hpp b/thirdparty/linux/include/opencv2/core/base.hpp
new file mode 100644
index 0000000..017b484
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/base.hpp
@@ -0,0 +1,691 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2014, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_BASE_HPP
+#define OPENCV_CORE_BASE_HPP
+
+#ifndef __cplusplus
+#  error base.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/opencv_modules.hpp"
+
+#include <climits>
+#include <algorithm>
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/cvstd.hpp"
+
+namespace cv
+{
+
+//! @addtogroup core_utils
+//! @{
+
+namespace Error {
+//! error codes
+enum Code {
+    StsOk=                       0,  //!< everithing is ok
+    StsBackTrace=               -1,  //!< pseudo error for back trace
+    StsError=                   -2,  //!< unknown /unspecified error
+    StsInternal=                -3,  //!< internal error (bad state)
+    StsNoMem=                   -4,  //!< insufficient memory
+    StsBadArg=                  -5,  //!< function arg/param is bad
+    StsBadFunc=                 -6,  //!< unsupported function
+    StsNoConv=                  -7,  //!< iter. didn't converge
+    StsAutoTrace=               -8,  //!< tracing
+    HeaderIsNull=               -9,  //!< image header is NULL
+    BadImageSize=              -10,  //!< image size is invalid
+    BadOffset=                 -11,  //!< offset is invalid
+    BadDataPtr=                -12,  //!<
+    BadStep=                   -13,  //!<
+    BadModelOrChSeq=           -14,  //!<
+    BadNumChannels=            -15,  //!<
+    BadNumChannel1U=           -16,  //!<
+    BadDepth=                  -17,  //!<
+    BadAlphaChannel=           -18,  //!<
+    BadOrder=                  -19,  //!<
+    BadOrigin=                 -20,  //!<
+    BadAlign=                  -21,  //!<
+    BadCallBack=               -22,  //!<
+    BadTileSize=               -23,  //!<
+    BadCOI=                    -24,  //!<
+    BadROISize=                -25,  //!<
+    MaskIsTiled=               -26,  //!<
+    StsNullPtr=                -27,  //!< null pointer
+    StsVecLengthErr=           -28,  //!< incorrect vector length
+    StsFilterStructContentErr= -29,  //!< incorr. filter structure content
+    StsKernelStructContentErr= -30,  //!< incorr. transform kernel content
+    StsFilterOffsetErr=        -31,  //!< incorrect filter ofset value
+    StsBadSize=                -201, //!< the input/output structure size is incorrect
+    StsDivByZero=              -202, //!< division by zero
+    StsInplaceNotSupported=    -203, //!< in-place operation is not supported
+    StsObjectNotFound=         -204, //!< request can't be completed
+    StsUnmatchedFormats=       -205, //!< formats of input/output arrays differ
+    StsBadFlag=                -206, //!< flag is wrong or not supported
+    StsBadPoint=               -207, //!< bad CvPoint
+    StsBadMask=                -208, //!< bad format of mask (neither 8uC1 nor 8sC1)
+    StsUnmatchedSizes=         -209, //!< sizes of input/output structures do not match
+    StsUnsupportedFormat=      -210, //!< the data format/type is not supported by the function
+    StsOutOfRange=             -211, //!< some of parameters are out of range
+    StsParseError=             -212, //!< invalid syntax/structure of the parsed file
+    StsNotImplemented=         -213, //!< the requested function/feature is not implemented
+    StsBadMemBlock=            -214, //!< an allocated block has been corrupted
+    StsAssert=                 -215, //!< assertion failed
+    GpuNotSupported=           -216,
+    GpuApiCallError=           -217,
+    OpenGlNotSupported=        -218,
+    OpenGlApiCallError=        -219,
+    OpenCLApiCallError=        -220,
+    OpenCLDoubleNotSupported=  -221,
+    OpenCLInitError=           -222,
+    OpenCLNoAMDBlasFft=        -223
+};
+} //Error
+
+//! @} core_utils
+
+//! @addtogroup core_array
+//! @{
+
+//! matrix decomposition types
+enum DecompTypes {
+    /** Gaussian elimination with the optimal pivot element chosen. */
+    DECOMP_LU       = 0,
+    /** singular value decomposition (SVD) method; the system can be over-defined and/or the matrix
+    src1 can be singular */
+    DECOMP_SVD      = 1,
+    /** eigenvalue decomposition; the matrix src1 must be symmetrical */
+    DECOMP_EIG      = 2,
+    /** Cholesky \f$LL^T\f$ factorization; the matrix src1 must be symmetrical and positively
+    defined */
+    DECOMP_CHOLESKY = 3,
+    /** QR factorization; the system can be over-defined and/or the matrix src1 can be singular */
+    DECOMP_QR       = 4,
+    /** while all the previous flags are mutually exclusive, this flag can be used together with
+    any of the previous; it means that the normal equations
+    \f$\texttt{src1}^T\cdot\texttt{src1}\cdot\texttt{dst}=\texttt{src1}^T\texttt{src2}\f$ are
+    solved instead of the original system
+    \f$\texttt{src1}\cdot\texttt{dst}=\texttt{src2}\f$ */
+    DECOMP_NORMAL   = 16
+};
+
+/** norm types
+- For one array:
+\f[norm =  \forkthree{\|\texttt{src1}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
+{ \| \texttt{src1} \| _{L_1} =  \sum _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\) }
+{ \| \texttt{src1} \| _{L_2} =  \sqrt{\sum_I \texttt{src1}(I)^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }\f]
+
+- Absolute norm for two arrays
+\f[norm =  \forkthree{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
+{ \| \texttt{src1} - \texttt{src2} \| _{L_1} =  \sum _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\) }
+{ \| \texttt{src1} - \texttt{src2} \| _{L_2} =  \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }\f]
+
+- Relative norm for two arrays
+\f[norm =  \forkthree{\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}}    }{\|\texttt{src2}\|_{L_{\infty}} }}{if  \(\texttt{normType} = \texttt{NORM_RELATIVE_INF}\) }
+{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE_L1}\) }
+{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE_L2}\) }\f]
+
+As example for one array consider the function \f$r(x)= \begin{pmatrix} x \\ 1-x \end{pmatrix}, x \in [-1;1]\f$.
+The \f$ L_{1}, L_{2} \f$ and \f$ L_{\infty} \f$ norm for the sample value \f$r(-1) = \begin{pmatrix} -1 \\ 2 \end{pmatrix}\f$
+is calculated as follows
+\f{align*}
+    \| r(-1) \|_{L_1} &= |-1| + |2| = 3 \\
+    \| r(-1) \|_{L_2} &= \sqrt{(-1)^{2} + (2)^{2}} = \sqrt{5} \\
+    \| r(-1) \|_{L_\infty} &= \max(|-1|,|2|) = 2
+\f}
+and for \f$r(0.5) = \begin{pmatrix} 0.5 \\ 0.5 \end{pmatrix}\f$ the calculation is
+\f{align*}
+    \| r(0.5) \|_{L_1} &= |0.5| + |0.5| = 1 \\
+    \| r(0.5) \|_{L_2} &= \sqrt{(0.5)^{2} + (0.5)^{2}} = \sqrt{0.5} \\
+    \| r(0.5) \|_{L_\infty} &= \max(|0.5|,|0.5|) = 0.5.
+\f}
+The following graphic shows all values for the three norm functions \f$\| r(x) \|_{L_1}, \| r(x) \|_{L_2}\f$ and \f$\| r(x) \|_{L_\infty}\f$.
+It is notable that the \f$ L_{1} \f$ norm forms the upper and the \f$ L_{\infty} \f$ norm forms the lower border for the example function \f$ r(x) \f$.
+![Graphs for the different norm functions from the above example](pics/NormTypes_OneArray_1-2-INF.png)
+ */
+enum NormTypes { NORM_INF       = 1,
+                 NORM_L1        = 2,
+                 NORM_L2        = 4,
+                 NORM_L2SQR     = 5,
+                 NORM_HAMMING   = 6,
+                 NORM_HAMMING2  = 7,
+                 NORM_TYPE_MASK = 7,
+                 NORM_RELATIVE  = 8, //!< flag
+                 NORM_MINMAX    = 32 //!< flag
+               };
+
+//! comparison types
+enum CmpTypes { CMP_EQ = 0, //!< src1 is equal to src2.
+                CMP_GT = 1, //!< src1 is greater than src2.
+                CMP_GE = 2, //!< src1 is greater than or equal to src2.
+                CMP_LT = 3, //!< src1 is less than src2.
+                CMP_LE = 4, //!< src1 is less than or equal to src2.
+                CMP_NE = 5  //!< src1 is unequal to src2.
+              };
+
+//! generalized matrix multiplication flags
+enum GemmFlags { GEMM_1_T = 1, //!< transposes src1
+                 GEMM_2_T = 2, //!< transposes src2
+                 GEMM_3_T = 4 //!< transposes src3
+               };
+
+enum DftFlags {
+    /** performs an inverse 1D or 2D transform instead of the default forward
+        transform. */
+    DFT_INVERSE        = 1,
+    /** scales the result: divide it by the number of array elements. Normally, it is
+        combined with DFT_INVERSE. */
+    DFT_SCALE          = 2,
+    /** performs a forward or inverse transform of every individual row of the input
+        matrix; this flag enables you to transform multiple vectors simultaneously and can be used to
+        decrease the overhead (which is sometimes several times larger than the processing itself) to
+        perform 3D and higher-dimensional transformations and so forth.*/
+    DFT_ROWS           = 4,
+    /** performs a forward transformation of 1D or 2D real array; the result,
+        though being a complex array, has complex-conjugate symmetry (*CCS*, see the function
+        description below for details), and such an array can be packed into a real array of the same
+        size as input, which is the fastest option and which is what the function does by default;
+        however, you may wish to get a full complex array (for simpler spectrum analysis, and so on) -
+        pass the flag to enable the function to produce a full-size complex output array. */
+    DFT_COMPLEX_OUTPUT = 16,
+    /** performs an inverse transformation of a 1D or 2D complex array; the
+        result is normally a complex array of the same size, however, if the input array has
+        conjugate-complex symmetry (for example, it is a result of forward transformation with
+        DFT_COMPLEX_OUTPUT flag), the output is a real array; while the function itself does not
+        check whether the input is symmetrical or not, you can pass the flag and then the function
+        will assume the symmetry and produce the real output array (note that when the input is packed
+        into a real array and inverse transformation is executed, the function treats the input as a
+        packed complex-conjugate symmetrical array, and the output will also be a real array). */
+    DFT_REAL_OUTPUT    = 32,
+    /** performs an inverse 1D or 2D transform instead of the default forward transform. */
+    DCT_INVERSE        = DFT_INVERSE,
+    /** performs a forward or inverse transform of every individual row of the input
+        matrix. This flag enables you to transform multiple vectors simultaneously and can be used to
+        decrease the overhead (which is sometimes several times larger than the processing itself) to
+        perform 3D and higher-dimensional transforms and so forth.*/
+    DCT_ROWS           = DFT_ROWS
+};
+
+//! Various border types, image boundaries are denoted with `|`
+//! @see borderInterpolate, copyMakeBorder
+enum BorderTypes {
+    BORDER_CONSTANT    = 0, //!< `iiiiii|abcdefgh|iiiiiii`  with some specified `i`
+    BORDER_REPLICATE   = 1, //!< `aaaaaa|abcdefgh|hhhhhhh`
+    BORDER_REFLECT     = 2, //!< `fedcba|abcdefgh|hgfedcb`
+    BORDER_WRAP        = 3, //!< `cdefgh|abcdefgh|abcdefg`
+    BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba`
+    BORDER_TRANSPARENT = 5, //!< `uvwxyz|absdefgh|ijklmno`
+
+    BORDER_REFLECT101  = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
+    BORDER_DEFAULT     = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
+    BORDER_ISOLATED    = 16 //!< do not look outside of ROI
+};
+
+//! @} core_array
+
+//! @addtogroup core_utils
+//! @{
+
+//! @cond IGNORED
+
+//////////////// static assert /////////////////
+#define CVAUX_CONCAT_EXP(a, b) a##b
+#define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b)
+
+#if defined(__clang__)
+#  ifndef __has_extension
+#    define __has_extension __has_feature /* compatibility, for older versions of clang */
+#  endif
+#  if __has_extension(cxx_static_assert)
+#    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
+#  elif __has_extension(c_static_assert)
+#    define CV_StaticAssert(condition, reason)    _Static_assert((condition), reason " " #condition)
+#  endif
+#elif defined(__GNUC__)
+#  if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
+#    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
+#  endif
+#elif defined(_MSC_VER)
+#  if _MSC_VER >= 1600 /* MSVC 10 */
+#    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
+#  endif
+#endif
+#ifndef CV_StaticAssert
+#  if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302)
+#    define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); })
+#  else
+     template <bool x> struct CV_StaticAssert_failed;
+     template <> struct CV_StaticAssert_failed<true> { enum { val = 1 }; };
+     template<int x> struct CV_StaticAssert_test {};
+#    define CV_StaticAssert(condition, reason)\
+       typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast<bool>(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__)
+#  endif
+#endif
+
+// Suppress warning "-Wdeprecated-declarations" / C4996
+#if defined(_MSC_VER)
+    #define CV_DO_PRAGMA(x) __pragma(x)
+#elif defined(__GNUC__)
+    #define CV_DO_PRAGMA(x) _Pragma (#x)
+#else
+    #define CV_DO_PRAGMA(x)
+#endif
+
+#ifdef _MSC_VER
+#define CV_SUPPRESS_DEPRECATED_START \
+    CV_DO_PRAGMA(warning(push)) \
+    CV_DO_PRAGMA(warning(disable: 4996))
+#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop))
+#elif defined (__clang__) || ((__GNUC__)  && (__GNUC__*100 + __GNUC_MINOR__ > 405))
+#define CV_SUPPRESS_DEPRECATED_START \
+    CV_DO_PRAGMA(GCC diagnostic push) \
+    CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
+#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop)
+#else
+#define CV_SUPPRESS_DEPRECATED_START
+#define CV_SUPPRESS_DEPRECATED_END
+#endif
+#define CV_UNUSED(name) (void)name
+//! @endcond
+
+/*! @brief Signals an error and raises the exception.
+
+By default the function prints information about the error to stderr,
+then it either stops if setBreakOnError() had been called before or raises the exception.
+It is possible to alternate error processing by using redirectError().
+@param _code - error code (Error::Code)
+@param _err - error description
+@param _func - function name. Available only when the compiler supports getting it
+@param _file - source file name where the error has occured
+@param _line - line number in the source file where the error has occured
+@see CV_Error, CV_Error_, CV_ErrorNoReturn, CV_ErrorNoReturn_, CV_Assert, CV_DbgAssert
+ */
+CV_EXPORTS void error(int _code, const String& _err, const char* _func, const char* _file, int _line);
+
+#ifdef __GNUC__
+# if defined __clang__ || defined __APPLE__
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Winvalid-noreturn"
+# endif
+#endif
+
+/** same as cv::error, but does not return */
+CV_INLINE CV_NORETURN void errorNoReturn(int _code, const String& _err, const char* _func, const char* _file, int _line)
+{
+    error(_code, _err, _func, _file, _line);
+#ifdef __GNUC__
+# if !defined __clang__ && !defined __APPLE__
+    // this suppresses this warning: "noreturn" function does return [enabled by default]
+    __builtin_trap();
+    // or use infinite loop: for (;;) {}
+# endif
+#endif
+}
+#ifdef __GNUC__
+# if defined __clang__ || defined __APPLE__
+#   pragma GCC diagnostic pop
+# endif
+#endif
+
+#if defined __GNUC__
+#define CV_Func __func__
+#elif defined _MSC_VER
+#define CV_Func __FUNCTION__
+#else
+#define CV_Func ""
+#endif
+
+/** @brief Call the error handler.
+
+Currently, the error handler prints the error code and the error message to the standard
+error stream `stderr`. In the Debug configuration, it then provokes memory access violation, so that
+the execution stack and all the parameters can be analyzed by the debugger. In the Release
+configuration, the exception is thrown.
+
+@param code one of Error::Code
+@param msg error message
+*/
+#define CV_Error( code, msg ) cv::error( code, msg, CV_Func, __FILE__, __LINE__ )
+
+/**  @brief Call the error handler.
+
+This macro can be used to construct an error message on-fly to include some dynamic information,
+for example:
+@code
+    // note the extra parentheses around the formatted text message
+    CV_Error_( CV_StsOutOfRange,
+    ("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue));
+@endcode
+@param code one of Error::Code
+@param args printf-like formatted error message in parentheses
+*/
+#define CV_Error_( code, args ) cv::error( code, cv::format args, CV_Func, __FILE__, __LINE__ )
+
+/** @brief Checks a condition at runtime and throws exception if it fails
+
+The macros CV_Assert (and CV_DbgAssert(expr)) evaluate the specified expression. If it is 0, the macros
+raise an error (see cv::error). The macro CV_Assert checks the condition in both Debug and Release
+configurations while CV_DbgAssert is only retained in the Debug configuration.
+*/
+#define CV_Assert( expr ) if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ )
+
+/** same as CV_Error(code,msg), but does not return */
+#define CV_ErrorNoReturn( code, msg ) cv::errorNoReturn( code, msg, CV_Func, __FILE__, __LINE__ )
+
+/** same as CV_Error_(code,args), but does not return */
+#define CV_ErrorNoReturn_( code, args ) cv::errorNoReturn( code, cv::format args, CV_Func, __FILE__, __LINE__ )
+
+/** replaced with CV_Assert(expr) in Debug configuration */
+#ifdef _DEBUG
+#  define CV_DbgAssert(expr) CV_Assert(expr)
+#else
+#  define CV_DbgAssert(expr)
+#endif
+
+/*
+ * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
+ * bit count of A exclusive XOR'ed with B
+ */
+struct CV_EXPORTS Hamming
+{
+    enum { normType = NORM_HAMMING };
+    typedef unsigned char ValueType;
+    typedef int ResultType;
+
+    /** this will count the bits in a ^ b
+     */
+    ResultType operator()( const unsigned char* a, const unsigned char* b, int size ) const;
+};
+
+typedef Hamming HammingLUT;
+
+/////////////////////////////////// inline norms ////////////////////////////////////
+
+template<typename _Tp> inline _Tp cv_abs(_Tp x) { return std::abs(x); }
+inline int cv_abs(uchar x) { return x; }
+inline int cv_abs(schar x) { return std::abs(x); }
+inline int cv_abs(ushort x) { return x; }
+inline int cv_abs(short x) { return std::abs(x); }
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normL2Sqr(const _Tp* a, int n)
+{
+    _AccTp s = 0;
+    int i=0;
+#if CV_ENABLE_UNROLLED
+    for( ; i <= n - 4; i += 4 )
+    {
+        _AccTp v0 = a[i], v1 = a[i+1], v2 = a[i+2], v3 = a[i+3];
+        s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
+    }
+#endif
+    for( ; i < n; i++ )
+    {
+        _AccTp v = a[i];
+        s += v*v;
+    }
+    return s;
+}
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normL1(const _Tp* a, int n)
+{
+    _AccTp s = 0;
+    int i = 0;
+#if CV_ENABLE_UNROLLED
+    for(; i <= n - 4; i += 4 )
+    {
+        s += (_AccTp)cv_abs(a[i]) + (_AccTp)cv_abs(a[i+1]) +
+            (_AccTp)cv_abs(a[i+2]) + (_AccTp)cv_abs(a[i+3]);
+    }
+#endif
+    for( ; i < n; i++ )
+        s += cv_abs(a[i]);
+    return s;
+}
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normInf(const _Tp* a, int n)
+{
+    _AccTp s = 0;
+    for( int i = 0; i < n; i++ )
+        s = std::max(s, (_AccTp)cv_abs(a[i]));
+    return s;
+}
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normL2Sqr(const _Tp* a, const _Tp* b, int n)
+{
+    _AccTp s = 0;
+    int i= 0;
+#if CV_ENABLE_UNROLLED
+    for(; i <= n - 4; i += 4 )
+    {
+        _AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
+        s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
+    }
+#endif
+    for( ; i < n; i++ )
+    {
+        _AccTp v = _AccTp(a[i] - b[i]);
+        s += v*v;
+    }
+    return s;
+}
+
+static inline float normL2Sqr(const float* a, const float* b, int n)
+{
+    float s = 0.f;
+    for( int i = 0; i < n; i++ )
+    {
+        float v = a[i] - b[i];
+        s += v*v;
+    }
+    return s;
+}
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normL1(const _Tp* a, const _Tp* b, int n)
+{
+    _AccTp s = 0;
+    int i= 0;
+#if CV_ENABLE_UNROLLED
+    for(; i <= n - 4; i += 4 )
+    {
+        _AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
+        s += std::abs(v0) + std::abs(v1) + std::abs(v2) + std::abs(v3);
+    }
+#endif
+    for( ; i < n; i++ )
+    {
+        _AccTp v = _AccTp(a[i] - b[i]);
+        s += std::abs(v);
+    }
+    return s;
+}
+
+inline float normL1(const float* a, const float* b, int n)
+{
+    float s = 0.f;
+    for( int i = 0; i < n; i++ )
+    {
+        s += std::abs(a[i] - b[i]);
+    }
+    return s;
+}
+
+inline int normL1(const uchar* a, const uchar* b, int n)
+{
+    int s = 0;
+    for( int i = 0; i < n; i++ )
+    {
+        s += std::abs(a[i] - b[i]);
+    }
+    return s;
+}
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normInf(const _Tp* a, const _Tp* b, int n)
+{
+    _AccTp s = 0;
+    for( int i = 0; i < n; i++ )
+    {
+        _AccTp v0 = a[i] - b[i];
+        s = std::max(s, std::abs(v0));
+    }
+    return s;
+}
+
+/** @brief Computes the cube root of an argument.
+
+ The function cubeRoot computes \f$\sqrt[3]{\texttt{val}}\f$. Negative arguments are handled correctly.
+ NaN and Inf are not handled. The accuracy approaches the maximum possible accuracy for
+ single-precision data.
+ @param val A function argument.
+ */
+CV_EXPORTS_W float cubeRoot(float val);
+
+/** @brief Calculates the angle of a 2D vector in degrees.
+
+ The function fastAtan2 calculates the full-range angle of an input 2D vector. The angle is measured
+ in degrees and varies from 0 to 360 degrees. The accuracy is about 0.3 degrees.
+ @param x x-coordinate of the vector.
+ @param y y-coordinate of the vector.
+ */
+CV_EXPORTS_W float fastAtan2(float y, float x);
+
+/** proxy for hal::LU */
+CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+/** proxy for hal::LU */
+CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+/** proxy for hal::Cholesky */
+CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+/** proxy for hal::Cholesky */
+CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+
+////////////////// forward declarations for important OpenCV types //////////////////
+
+//! @cond IGNORED
+
+template<typename _Tp, int cn> class Vec;
+template<typename _Tp, int m, int n> class Matx;
+
+template<typename _Tp> class Complex;
+template<typename _Tp> class Point_;
+template<typename _Tp> class Point3_;
+template<typename _Tp> class Size_;
+template<typename _Tp> class Rect_;
+template<typename _Tp> class Scalar_;
+
+class CV_EXPORTS RotatedRect;
+class CV_EXPORTS Range;
+class CV_EXPORTS TermCriteria;
+class CV_EXPORTS KeyPoint;
+class CV_EXPORTS DMatch;
+class CV_EXPORTS RNG;
+
+class CV_EXPORTS Mat;
+class CV_EXPORTS MatExpr;
+
+class CV_EXPORTS UMat;
+
+class CV_EXPORTS SparseMat;
+typedef Mat MatND;
+
+template<typename _Tp> class Mat_;
+template<typename _Tp> class SparseMat_;
+
+class CV_EXPORTS MatConstIterator;
+class CV_EXPORTS SparseMatIterator;
+class CV_EXPORTS SparseMatConstIterator;
+template<typename _Tp> class MatIterator_;
+template<typename _Tp> class MatConstIterator_;
+template<typename _Tp> class SparseMatIterator_;
+template<typename _Tp> class SparseMatConstIterator_;
+
+namespace ogl
+{
+    class CV_EXPORTS Buffer;
+    class CV_EXPORTS Texture2D;
+    class CV_EXPORTS Arrays;
+}
+
+namespace cuda
+{
+    class CV_EXPORTS GpuMat;
+    class CV_EXPORTS HostMem;
+    class CV_EXPORTS Stream;
+    class CV_EXPORTS Event;
+}
+
+namespace cudev
+{
+    template <typename _Tp> class GpuMat_;
+}
+
+namespace ipp
+{
+CV_EXPORTS int getIppFeatures();
+CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL,
+                             int line = 0);
+CV_EXPORTS int getIppStatus();
+CV_EXPORTS String getIppErrorLocation();
+CV_EXPORTS bool useIPP();
+CV_EXPORTS void setUseIPP(bool flag);
+
+} // ipp
+
+//! @endcond
+
+//! @} core_utils
+
+
+
+
+} // cv
+
+#include "opencv2/core/neon_utils.hpp"
+
+#endif //OPENCV_CORE_BASE_HPP
diff --git a/thirdparty/linux/include/opencv2/core/bufferpool.hpp b/thirdparty/linux/include/opencv2/core/bufferpool.hpp
new file mode 100644
index 0000000..9e7b7c2
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/bufferpool.hpp
@@ -0,0 +1,31 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
+
+#ifndef OPENCV_CORE_BUFFER_POOL_HPP
+#define OPENCV_CORE_BUFFER_POOL_HPP
+
+namespace cv
+{
+
+//! @addtogroup core
+//! @{
+
+class BufferPoolController
+{
+protected:
+    ~BufferPoolController() { }
+public:
+    virtual size_t getReservedSize() const = 0;
+    virtual size_t getMaxReservedSize() const = 0;
+    virtual void setMaxReservedSize(size_t size) = 0;
+    virtual void freeAllReservedBuffers() = 0;
+};
+
+//! @}
+
+}
+
+#endif // OPENCV_CORE_BUFFER_POOL_HPP
diff --git a/thirdparty/linux/include/opencv2/core/core.hpp b/thirdparty/linux/include/opencv2/core/core.hpp
new file mode 100644
index 0000000..4389183
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/core.hpp
@@ -0,0 +1,48 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __OPENCV_BUILD
+#error this is a compatibility header which should not be used inside the OpenCV library
+#endif
+
+#include "opencv2/core.hpp"
diff --git a/thirdparty/linux/include/opencv2/core/core_c.h b/thirdparty/linux/include/opencv2/core/core_c.h
new file mode 100644
index 0000000..e12f79d
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/core_c.h
@@ -0,0 +1,3184 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#ifndef OPENCV_CORE_C_H
+#define OPENCV_CORE_C_H
+
+#include "opencv2/core/types_c.h"
+
+#ifdef __cplusplus
+#  ifdef _MSC_VER
+/* disable warning C4190: 'function' has C-linkage specified, but returns UDT 'typename'
+                          which is incompatible with C
+
+   It is OK to disable it because we only extend few plain structures with
+   C++ construrtors for simpler interoperability with C++ API of the library
+*/
+#    pragma warning(disable:4190)
+#  elif defined __clang__ && __clang_major__ >= 3
+#    pragma GCC diagnostic ignored "-Wreturn-type-c-linkage"
+#  endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @addtogroup core_c
+    @{
+*/
+
+/****************************************************************************************\
+*          Array allocation, deallocation, initialization and access to elements         *
+\****************************************************************************************/
+
+/** `malloc` wrapper.
+   If there is no enough memory, the function
+   (as well as other OpenCV functions that call cvAlloc)
+   raises an error. */
+CVAPI(void*)  cvAlloc( size_t size );
+
+/** `free` wrapper.
+   Here and further all the memory releasing functions
+   (that all call cvFree) take double pointer in order to
+   to clear pointer to the data after releasing it.
+   Passing pointer to NULL pointer is Ok: nothing happens in this case
+*/
+CVAPI(void)   cvFree_( void* ptr );
+#define cvFree(ptr) (cvFree_(*(ptr)), *(ptr)=0)
+
+/** @brief Creates an image header but does not allocate the image data.
+
+@param size Image width and height
+@param depth Image depth (see cvCreateImage )
+@param channels Number of channels (see cvCreateImage )
+ */
+CVAPI(IplImage*)  cvCreateImageHeader( CvSize size, int depth, int channels );
+
+/** @brief Initializes an image header that was previously allocated.
+
+The returned IplImage\* points to the initialized header.
+@param image Image header to initialize
+@param size Image width and height
+@param depth Image depth (see cvCreateImage )
+@param channels Number of channels (see cvCreateImage )
+@param origin Top-left IPL_ORIGIN_TL or bottom-left IPL_ORIGIN_BL
+@param align Alignment for image rows, typically 4 or 8 bytes
+ */
+CVAPI(IplImage*) cvInitImageHeader( IplImage* image, CvSize size, int depth,
+                                   int channels, int origin CV_DEFAULT(0),
+                                   int align CV_DEFAULT(4));
+
+/** @brief Creates an image header and allocates the image data.
+
+This function call is equivalent to the following code:
+@code
+    header = cvCreateImageHeader(size, depth, channels);
+    cvCreateData(header);
+@endcode
+@param size Image width and height
+@param depth Bit depth of image elements. See IplImage for valid depths.
+@param channels Number of channels per pixel. See IplImage for details. This function only creates
+images with interleaved channels.
+ */
+CVAPI(IplImage*)  cvCreateImage( CvSize size, int depth, int channels );
+
+/** @brief Deallocates an image header.
+
+This call is an analogue of :
+@code
+    if(image )
+    {
+        iplDeallocate(*image, IPL_IMAGE_HEADER | IPL_IMAGE_ROI);
+        *image = 0;
+    }
+@endcode
+but it does not use IPL functions by default (see the CV_TURN_ON_IPL_COMPATIBILITY macro).
+@param image Double pointer to the image header
+ */
+CVAPI(void)  cvReleaseImageHeader( IplImage** image );
+
+/** @brief Deallocates the image header and the image data.
+
+This call is a shortened form of :
+@code
+    if(*image )
+    {
+        cvReleaseData(*image);
+        cvReleaseImageHeader(image);
+    }
+@endcode
+@param image Double pointer to the image header
+*/
+CVAPI(void)  cvReleaseImage( IplImage** image );
+
+/** Creates a copy of IPL image (widthStep may differ) */
+CVAPI(IplImage*) cvCloneImage( const IplImage* image );
+
+/** @brief Sets the channel of interest in an IplImage.
+
+If the ROI is set to NULL and the coi is *not* 0, the ROI is allocated. Most OpenCV functions do
+*not* support the COI setting, so to process an individual image/matrix channel one may copy (via
+cvCopy or cvSplit) the channel to a separate image/matrix, process it and then copy the result
+back (via cvCopy or cvMerge) if needed.
+@param image A pointer to the image header
+@param coi The channel of interest. 0 - all channels are selected, 1 - first channel is selected,
+etc. Note that the channel indices become 1-based.
+ */
+CVAPI(void)  cvSetImageCOI( IplImage* image, int coi );
+
+/** @brief Returns the index of the channel of interest.
+
+Returns the channel of interest of in an IplImage. Returned values correspond to the coi in
+cvSetImageCOI.
+@param image A pointer to the image header
+ */
+CVAPI(int)  cvGetImageCOI( const IplImage* image );
+
+/** @brief Sets an image Region Of Interest (ROI) for a given rectangle.
+
+If the original image ROI was NULL and the rect is not the whole image, the ROI structure is
+allocated.
+
+Most OpenCV functions support the use of ROI and treat the image rectangle as a separate image. For
+example, all of the pixel coordinates are counted from the top-left (or bottom-left) corner of the
+ROI, not the original image.
+@param image A pointer to the image header
+@param rect The ROI rectangle
+ */
+CVAPI(void)  cvSetImageROI( IplImage* image, CvRect rect );
+
+/** @brief Resets the image ROI to include the entire image and releases the ROI structure.
+
+This produces a similar result to the following, but in addition it releases the ROI structure. :
+@code
+    cvSetImageROI(image, cvRect(0, 0, image->width, image->height ));
+    cvSetImageCOI(image, 0);
+@endcode
+@param image A pointer to the image header
+ */
+CVAPI(void)  cvResetImageROI( IplImage* image );
+
+/** @brief Returns the image ROI.
+
+If there is no ROI set, cvRect(0,0,image-\>width,image-\>height) is returned.
+@param image A pointer to the image header
+ */
+CVAPI(CvRect) cvGetImageROI( const IplImage* image );
+
+/** @brief Creates a matrix header but does not allocate the matrix data.
+
+The function allocates a new matrix header and returns a pointer to it. The matrix data can then be
+allocated using cvCreateData or set explicitly to user-allocated data via cvSetData.
+@param rows Number of rows in the matrix
+@param cols Number of columns in the matrix
+@param type Type of the matrix elements, see cvCreateMat
+ */
+CVAPI(CvMat*)  cvCreateMatHeader( int rows, int cols, int type );
+
+#define CV_AUTOSTEP  0x7fffffff
+
+/** @brief Initializes a pre-allocated matrix header.
+
+This function is often used to process raw data with OpenCV matrix functions. For example, the
+following code computes the matrix product of two matrices, stored as ordinary arrays:
+@code
+    double a[] = { 1, 2, 3, 4,
+                   5, 6, 7, 8,
+                   9, 10, 11, 12 };
+
+    double b[] = { 1, 5, 9,
+                   2, 6, 10,
+                   3, 7, 11,
+                   4, 8, 12 };
+
+    double c[9];
+    CvMat Ma, Mb, Mc ;
+
+    cvInitMatHeader(&Ma, 3, 4, CV_64FC1, a);
+    cvInitMatHeader(&Mb, 4, 3, CV_64FC1, b);
+    cvInitMatHeader(&Mc, 3, 3, CV_64FC1, c);
+
+    cvMatMulAdd(&Ma, &Mb, 0, &Mc);
+    // the c array now contains the product of a (3x4) and b (4x3)
+@endcode
+@param mat A pointer to the matrix header to be initialized
+@param rows Number of rows in the matrix
+@param cols Number of columns in the matrix
+@param type Type of the matrix elements, see cvCreateMat .
+@param data Optional: data pointer assigned to the matrix header
+@param step Optional: full row width in bytes of the assigned data. By default, the minimal
+possible step is used which assumes there are no gaps between subsequent rows of the matrix.
+ */
+CVAPI(CvMat*) cvInitMatHeader( CvMat* mat, int rows, int cols,
+                              int type, void* data CV_DEFAULT(NULL),
+                              int step CV_DEFAULT(CV_AUTOSTEP) );
+
+/** @brief Creates a matrix header and allocates the matrix data.
+
+The function call is equivalent to the following code:
+@code
+    CvMat* mat = cvCreateMatHeader(rows, cols, type);
+    cvCreateData(mat);
+@endcode
+@param rows Number of rows in the matrix
+@param cols Number of columns in the matrix
+@param type The type of the matrix elements in the form
+CV_\<bit depth\>\<S|U|F\>C\<number of channels\> , where S=signed, U=unsigned, F=float. For
+example, CV _ 8UC1 means the elements are 8-bit unsigned and the there is 1 channel, and CV _
+32SC2 means the elements are 32-bit signed and there are 2 channels.
+ */
+CVAPI(CvMat*)  cvCreateMat( int rows, int cols, int type );
+
+/** @brief Deallocates a matrix.
+
+The function decrements the matrix data reference counter and deallocates matrix header. If the data
+reference counter is 0, it also deallocates the data. :
+@code
+    if(*mat )
+        cvDecRefData(*mat);
+    cvFree((void**)mat);
+@endcode
+@param mat Double pointer to the matrix
+ */
+CVAPI(void)  cvReleaseMat( CvMat** mat );
+
+/** @brief Decrements an array data reference counter.
+
+The function decrements the data reference counter in a CvMat or CvMatND if the reference counter
+
+pointer is not NULL. If the counter reaches zero, the data is deallocated. In the current
+implementation the reference counter is not NULL only if the data was allocated using the
+cvCreateData function. The counter will be NULL in other cases such as: external data was assigned
+to the header using cvSetData, header is part of a larger matrix or image, or the header was
+converted from an image or n-dimensional matrix header.
+@param arr Pointer to an array header
+ */
+CV_INLINE  void  cvDecRefData( CvArr* arr )
+{
+    if( CV_IS_MAT( arr ))
+    {
+        CvMat* mat = (CvMat*)arr;
+        mat->data.ptr = NULL;
+        if( mat->refcount != NULL && --*mat->refcount == 0 )
+            cvFree( &mat->refcount );
+        mat->refcount = NULL;
+    }
+    else if( CV_IS_MATND( arr ))
+    {
+        CvMatND* mat = (CvMatND*)arr;
+        mat->data.ptr = NULL;
+        if( mat->refcount != NULL && --*mat->refcount == 0 )
+            cvFree( &mat->refcount );
+        mat->refcount = NULL;
+    }
+}
+
+/** @brief Increments array data reference counter.
+
+The function increments CvMat or CvMatND data reference counter and returns the new counter value if
+the reference counter pointer is not NULL, otherwise it returns zero.
+@param arr Array header
+ */
+CV_INLINE  int  cvIncRefData( CvArr* arr )
+{
+    int refcount = 0;
+    if( CV_IS_MAT( arr ))
+    {
+        CvMat* mat = (CvMat*)arr;
+        if( mat->refcount != NULL )
+            refcount = ++*mat->refcount;
+    }
+    else if( CV_IS_MATND( arr ))
+    {
+        CvMatND* mat = (CvMatND*)arr;
+        if( mat->refcount != NULL )
+            refcount = ++*mat->refcount;
+    }
+    return refcount;
+}
+
+
+/** Creates an exact copy of the input matrix (except, may be, step value) */
+CVAPI(CvMat*) cvCloneMat( const CvMat* mat );
+
+
+/** @brief Returns matrix header corresponding to the rectangular sub-array of input image or matrix.
+
+The function returns header, corresponding to a specified rectangle of the input array. In other
+
+words, it allows the user to treat a rectangular part of input array as a stand-alone array. ROI is
+taken into account by the function so the sub-array of ROI is actually extracted.
+@param arr Input array
+@param submat Pointer to the resultant sub-array header
+@param rect Zero-based coordinates of the rectangle of interest
+ */
+CVAPI(CvMat*) cvGetSubRect( const CvArr* arr, CvMat* submat, CvRect rect );
+#define cvGetSubArr cvGetSubRect
+
+/** @brief Returns array row or row span.
+
+The functions return the header, corresponding to a specified row/row span of the input array.
+cvGetRow(arr, submat, row) is a shortcut for cvGetRows(arr, submat, row, row+1).
+@param arr Input array
+@param submat Pointer to the resulting sub-array header
+@param start_row Zero-based index of the starting row (inclusive) of the span
+@param end_row Zero-based index of the ending row (exclusive) of the span
+@param delta_row Index step in the row span. That is, the function extracts every delta_row -th
+row from start_row and up to (but not including) end_row .
+ */
+CVAPI(CvMat*) cvGetRows( const CvArr* arr, CvMat* submat,
+                        int start_row, int end_row,
+                        int delta_row CV_DEFAULT(1));
+
+/** @overload
+@param arr Input array
+@param submat Pointer to the resulting sub-array header
+@param row Zero-based index of the selected row
+*/
+CV_INLINE  CvMat*  cvGetRow( const CvArr* arr, CvMat* submat, int row )
+{
+    return cvGetRows( arr, submat, row, row + 1, 1 );
+}
+
+
+/** @brief Returns one of more array columns.
+
+The functions return the header, corresponding to a specified column span of the input array. That
+
+is, no data is copied. Therefore, any modifications of the submatrix will affect the original array.
+If you need to copy the columns, use cvCloneMat. cvGetCol(arr, submat, col) is a shortcut for
+cvGetCols(arr, submat, col, col+1).
+@param arr Input array
+@param submat Pointer to the resulting sub-array header
+@param start_col Zero-based index of the starting column (inclusive) of the span
+@param end_col Zero-based index of the ending column (exclusive) of the span
+ */
+CVAPI(CvMat*) cvGetCols( const CvArr* arr, CvMat* submat,
+                        int start_col, int end_col );
+
+/** @overload
+@param arr Input array
+@param submat Pointer to the resulting sub-array header
+@param col Zero-based index of the selected column
+*/
+CV_INLINE  CvMat*  cvGetCol( const CvArr* arr, CvMat* submat, int col )
+{
+    return cvGetCols( arr, submat, col, col + 1 );
+}
+
+/** @brief Returns one of array diagonals.
+
+The function returns the header, corresponding to a specified diagonal of the input array.
+@param arr Input array
+@param submat Pointer to the resulting sub-array header
+@param diag Index of the array diagonal. Zero value corresponds to the main diagonal, -1
+corresponds to the diagonal above the main, 1 corresponds to the diagonal below the main, and so
+forth.
+ */
+CVAPI(CvMat*) cvGetDiag( const CvArr* arr, CvMat* submat,
+                            int diag CV_DEFAULT(0));
+
+/** low-level scalar <-> raw data conversion functions */
+CVAPI(void) cvScalarToRawData( const CvScalar* scalar, void* data, int type,
+                              int extend_to_12 CV_DEFAULT(0) );
+
+CVAPI(void) cvRawDataToScalar( const void* data, int type, CvScalar* scalar );
+
+/** @brief Creates a new matrix header but does not allocate the matrix data.
+
+The function allocates a header for a multi-dimensional dense array. The array data can further be
+allocated using cvCreateData or set explicitly to user-allocated data via cvSetData.
+@param dims Number of array dimensions
+@param sizes Array of dimension sizes
+@param type Type of array elements, see cvCreateMat
+ */
+CVAPI(CvMatND*)  cvCreateMatNDHeader( int dims, const int* sizes, int type );
+
+/** @brief Creates the header and allocates the data for a multi-dimensional dense array.
+
+This function call is equivalent to the following code:
+@code
+    CvMatND* mat = cvCreateMatNDHeader(dims, sizes, type);
+    cvCreateData(mat);
+@endcode
+@param dims Number of array dimensions. This must not exceed CV_MAX_DIM (32 by default, but can be
+changed at build time).
+@param sizes Array of dimension sizes.
+@param type Type of array elements, see cvCreateMat .
+ */
+CVAPI(CvMatND*)  cvCreateMatND( int dims, const int* sizes, int type );
+
+/** @brief Initializes a pre-allocated multi-dimensional array header.
+
+@param mat A pointer to the array header to be initialized
+@param dims The number of array dimensions
+@param sizes An array of dimension sizes
+@param type Type of array elements, see cvCreateMat
+@param data Optional data pointer assigned to the matrix header
+ */
+CVAPI(CvMatND*)  cvInitMatNDHeader( CvMatND* mat, int dims, const int* sizes,
+                                    int type, void* data CV_DEFAULT(NULL) );
+
+/** @brief Deallocates a multi-dimensional array.
+
+The function decrements the array data reference counter and releases the array header. If the
+reference counter reaches 0, it also deallocates the data. :
+@code
+    if(*mat )
+        cvDecRefData(*mat);
+    cvFree((void**)mat);
+@endcode
+@param mat Double pointer to the array
+ */
+CV_INLINE  void  cvReleaseMatND( CvMatND** mat )
+{
+    cvReleaseMat( (CvMat**)mat );
+}
+
+/** Creates a copy of CvMatND (except, may be, steps) */
+CVAPI(CvMatND*) cvCloneMatND( const CvMatND* mat );
+
+/** @brief Creates sparse array.
+
+The function allocates a multi-dimensional sparse array. Initially the array contain no elements,
+that is PtrND and other related functions will return 0 for every index.
+@param dims Number of array dimensions. In contrast to the dense matrix, the number of dimensions is
+practically unlimited (up to \f$2^{16}\f$ ).
+@param sizes Array of dimension sizes
+@param type Type of array elements. The same as for CvMat
+ */
+CVAPI(CvSparseMat*)  cvCreateSparseMat( int dims, const int* sizes, int type );
+
+/** @brief Deallocates sparse array.
+
+The function releases the sparse array and clears the array pointer upon exit.
+@param mat Double pointer to the array
+ */
+CVAPI(void)  cvReleaseSparseMat( CvSparseMat** mat );
+
+/** Creates a copy of CvSparseMat (except, may be, zero items) */
+CVAPI(CvSparseMat*) cvCloneSparseMat( const CvSparseMat* mat );
+
+/** @brief Initializes sparse array elements iterator.
+
+The function initializes iterator of sparse array elements and returns pointer to the first element,
+or NULL if the array is empty.
+@param mat Input array
+@param mat_iterator Initialized iterator
+ */
+CVAPI(CvSparseNode*) cvInitSparseMatIterator( const CvSparseMat* mat,
+                                              CvSparseMatIterator* mat_iterator );
+
+/** @brief Returns the next sparse matrix element
+
+The function moves iterator to the next sparse matrix element and returns pointer to it. In the
+current version there is no any particular order of the elements, because they are stored in the
+hash table. The sample below demonstrates how to iterate through the sparse matrix:
+@code
+    // print all the non-zero sparse matrix elements and compute their sum
+    double sum = 0;
+    int i, dims = cvGetDims(sparsemat);
+    CvSparseMatIterator it;
+    CvSparseNode* node = cvInitSparseMatIterator(sparsemat, &it);
+
+    for(; node != 0; node = cvGetNextSparseNode(&it))
+    {
+        int* idx = CV_NODE_IDX(array, node);
+        float val = *(float*)CV_NODE_VAL(array, node);
+        printf("M");
+        for(i = 0; i < dims; i++ )
+            printf("[%d]", idx[i]);
+        printf("=%g\n", val);
+
+        sum += val;
+    }
+
+    printf("nTotal sum = %g\n", sum);
+@endcode
+@param mat_iterator Sparse array iterator
+ */
+CV_INLINE CvSparseNode* cvGetNextSparseNode( CvSparseMatIterator* mat_iterator )
+{
+    if( mat_iterator->node->next )
+        return mat_iterator->node = mat_iterator->node->next;
+    else
+    {
+        int idx;
+        for( idx = ++mat_iterator->curidx; idx < mat_iterator->mat->hashsize; idx++ )
+        {
+            CvSparseNode* node = (CvSparseNode*)mat_iterator->mat->hashtable[idx];
+            if( node )
+            {
+                mat_iterator->curidx = idx;
+                return mat_iterator->node = node;
+            }
+        }
+        return NULL;
+    }
+}
+
+
+#define CV_MAX_ARR 10
+
+/** matrix iterator: used for n-ary operations on dense arrays */
+typedef struct CvNArrayIterator
+{
+    int count; /**< number of arrays */
+    int dims; /**< number of dimensions to iterate */
+    CvSize size; /**< maximal common linear size: { width = size, height = 1 } */
+    uchar* ptr[CV_MAX_ARR]; /**< pointers to the array slices */
+    int stack[CV_MAX_DIM]; /**< for internal use */
+    CvMatND* hdr[CV_MAX_ARR]; /**< pointers to the headers of the
+                                 matrices that are processed */
+}
+CvNArrayIterator;
+
+#define CV_NO_DEPTH_CHECK     1
+#define CV_NO_CN_CHECK        2
+#define CV_NO_SIZE_CHECK      4
+
+/** initializes iterator that traverses through several arrays simulteneously
+   (the function together with cvNextArraySlice is used for
+    N-ari element-wise operations) */
+CVAPI(int) cvInitNArrayIterator( int count, CvArr** arrs,
+                                 const CvArr* mask, CvMatND* stubs,
+                                 CvNArrayIterator* array_iterator,
+                                 int flags CV_DEFAULT(0) );
+
+/** returns zero value if iteration is finished, non-zero (slice length) otherwise */
+CVAPI(int) cvNextNArraySlice( CvNArrayIterator* array_iterator );
+
+
+/** @brief Returns type of array elements.
+
+The function returns type of the array elements. In the case of IplImage the type is converted to
+CvMat-like representation. For example, if the image has been created as:
+@code
+    IplImage* img = cvCreateImage(cvSize(640, 480), IPL_DEPTH_8U, 3);
+@endcode
+The code cvGetElemType(img) will return CV_8UC3.
+@param arr Input array
+ */
+CVAPI(int) cvGetElemType( const CvArr* arr );
+
+/** @brief Return number of array dimensions
+
+The function returns the array dimensionality and the array of dimension sizes. In the case of
+IplImage or CvMat it always returns 2 regardless of number of image/matrix rows. For example, the
+following code calculates total number of array elements:
+@code
+    int sizes[CV_MAX_DIM];
+    int i, total = 1;
+    int dims = cvGetDims(arr, size);
+    for(i = 0; i < dims; i++ )
+        total *= sizes[i];
+@endcode
+@param arr Input array
+@param sizes Optional output vector of the array dimension sizes. For 2d arrays the number of rows
+(height) goes first, number of columns (width) next.
+ */
+CVAPI(int) cvGetDims( const CvArr* arr, int* sizes CV_DEFAULT(NULL) );
+
+
+/** @brief Returns array size along the specified dimension.
+
+@param arr Input array
+@param index Zero-based dimension index (for matrices 0 means number of rows, 1 means number of
+columns; for images 0 means height, 1 means width)
+ */
+CVAPI(int) cvGetDimSize( const CvArr* arr, int index );
+
+
+/** @brief Return pointer to a particular array element.
+
+The functions return a pointer to a specific array element. Number of array dimension should match
+to the number of indices passed to the function except for cvPtr1D function that can be used for
+sequential access to 1D, 2D or nD dense arrays.
+
+The functions can be used for sparse arrays as well - if the requested node does not exist they
+create it and set it to zero.
+
+All these as well as other functions accessing array elements ( cvGetND , cvGetRealND , cvSet
+, cvSetND , cvSetRealND ) raise an error in case if the element index is out of range.
+@param arr Input array
+@param idx0 The first zero-based component of the element index
+@param type Optional output parameter: type of matrix elements
+ */
+CVAPI(uchar*) cvPtr1D( const CvArr* arr, int idx0, int* type CV_DEFAULT(NULL));
+/** @overload */
+CVAPI(uchar*) cvPtr2D( const CvArr* arr, int idx0, int idx1, int* type CV_DEFAULT(NULL) );
+/** @overload */
+CVAPI(uchar*) cvPtr3D( const CvArr* arr, int idx0, int idx1, int idx2,
+                      int* type CV_DEFAULT(NULL));
+/** @overload
+@param arr Input array
+@param idx Array of the element indices
+@param type Optional output parameter: type of matrix elements
+@param create_node Optional input parameter for sparse matrices. Non-zero value of the parameter
+means that the requested element is created if it does not exist already.
+@param precalc_hashval Optional input parameter for sparse matrices. If the pointer is not NULL,
+the function does not recalculate the node hash value, but takes it from the specified location.
+It is useful for speeding up pair-wise operations (TODO: provide an example)
+*/
+CVAPI(uchar*) cvPtrND( const CvArr* arr, const int* idx, int* type CV_DEFAULT(NULL),
+                      int create_node CV_DEFAULT(1),
+                      unsigned* precalc_hashval CV_DEFAULT(NULL));
+
+/** @brief Return a specific array element.
+
+The functions return a specific array element. In the case of a sparse array the functions return 0
+if the requested node does not exist (no new node is created by the functions).
+@param arr Input array
+@param idx0 The first zero-based component of the element index
+ */
+CVAPI(CvScalar) cvGet1D( const CvArr* arr, int idx0 );
+/** @overload */
+CVAPI(CvScalar) cvGet2D( const CvArr* arr, int idx0, int idx1 );
+/** @overload */
+CVAPI(CvScalar) cvGet3D( const CvArr* arr, int idx0, int idx1, int idx2 );
+/** @overload
+@param arr Input array
+@param idx Array of the element indices
+*/
+CVAPI(CvScalar) cvGetND( const CvArr* arr, const int* idx );
+
+/** @brief Return a specific element of single-channel 1D, 2D, 3D or nD array.
+
+Returns a specific element of a single-channel array. If the array has multiple channels, a runtime
+error is raised. Note that Get?D functions can be used safely for both single-channel and
+multiple-channel arrays though they are a bit slower.
+
+In the case of a sparse array the functions return 0 if the requested node does not exist (no new
+node is created by the functions).
+@param arr Input array. Must have a single channel.
+@param idx0 The first zero-based component of the element index
+ */
+CVAPI(double) cvGetReal1D( const CvArr* arr, int idx0 );
+/** @overload */
+CVAPI(double) cvGetReal2D( const CvArr* arr, int idx0, int idx1 );
+/** @overload */
+CVAPI(double) cvGetReal3D( const CvArr* arr, int idx0, int idx1, int idx2 );
+/** @overload
+@param arr Input array. Must have a single channel.
+@param idx Array of the element indices
+*/
+CVAPI(double) cvGetRealND( const CvArr* arr, const int* idx );
+
+/** @brief Change the particular array element.
+
+The functions assign the new value to a particular array element. In the case of a sparse array the
+functions create the node if it does not exist yet.
+@param arr Input array
+@param idx0 The first zero-based component of the element index
+@param value The assigned value
+ */
+CVAPI(void) cvSet1D( CvArr* arr, int idx0, CvScalar value );
+/** @overload */
+CVAPI(void) cvSet2D( CvArr* arr, int idx0, int idx1, CvScalar value );
+/** @overload */
+CVAPI(void) cvSet3D( CvArr* arr, int idx0, int idx1, int idx2, CvScalar value );
+/** @overload
+@param arr Input array
+@param idx Array of the element indices
+@param value The assigned value
+*/
+CVAPI(void) cvSetND( CvArr* arr, const int* idx, CvScalar value );
+
+/** @brief Change a specific array element.
+
+The functions assign a new value to a specific element of a single-channel array. If the array has
+multiple channels, a runtime error is raised. Note that the Set\*D function can be used safely for
+both single-channel and multiple-channel arrays, though they are a bit slower.
+
+In the case of a sparse array the functions create the node if it does not yet exist.
+@param arr Input array
+@param idx0 The first zero-based component of the element index
+@param value The assigned value
+ */
+CVAPI(void) cvSetReal1D( CvArr* arr, int idx0, double value );
+/** @overload */
+CVAPI(void) cvSetReal2D( CvArr* arr, int idx0, int idx1, double value );
+/** @overload */
+CVAPI(void) cvSetReal3D( CvArr* arr, int idx0,
+                        int idx1, int idx2, double value );
+/** @overload
+@param arr Input array
+@param idx Array of the element indices
+@param value The assigned value
+*/
+CVAPI(void) cvSetRealND( CvArr* arr, const int* idx, double value );
+
+/** clears element of ND dense array,
+   in case of sparse arrays it deletes the specified node */
+CVAPI(void) cvClearND( CvArr* arr, const int* idx );
+
+/** @brief Returns matrix header for arbitrary array.
+
+The function returns a matrix header for the input array that can be a matrix - CvMat, an image -
+IplImage, or a multi-dimensional dense array - CvMatND (the third option is allowed only if
+allowND != 0) . In the case of matrix the function simply returns the input pointer. In the case of
+IplImage\* or CvMatND it initializes the header structure with parameters of the current image ROI
+and returns &header. Because COI is not supported by CvMat, it is returned separately.
+
+The function provides an easy way to handle both types of arrays - IplImage and CvMat using the same
+code. Input array must have non-zero data pointer, otherwise the function will report an error.
+
+@note If the input array is IplImage with planar data layout and COI set, the function returns the
+pointer to the selected plane and COI == 0. This feature allows user to process IplImage structures
+with planar data layout, even though OpenCV does not support such images.
+@param arr Input array
+@param header Pointer to CvMat structure used as a temporary buffer
+@param coi Optional output parameter for storing COI
+@param allowND If non-zero, the function accepts multi-dimensional dense arrays (CvMatND\*) and
+returns 2D matrix (if CvMatND has two dimensions) or 1D matrix (when CvMatND has 1 dimension or
+more than 2 dimensions). The CvMatND array must be continuous.
+@sa cvGetImage, cvarrToMat.
+ */
+CVAPI(CvMat*) cvGetMat( const CvArr* arr, CvMat* header,
+                       int* coi CV_DEFAULT(NULL),
+                       int allowND CV_DEFAULT(0));
+
+/** @brief Returns image header for arbitrary array.
+
+The function returns the image header for the input array that can be a matrix (CvMat) or image
+(IplImage). In the case of an image the function simply returns the input pointer. In the case of
+CvMat it initializes an image_header structure with the parameters of the input matrix. Note that
+if we transform IplImage to CvMat using cvGetMat and then transform CvMat back to IplImage using
+this function, we will get different headers if the ROI is set in the original image.
+@param arr Input array
+@param image_header Pointer to IplImage structure used as a temporary buffer
+ */
+CVAPI(IplImage*) cvGetImage( const CvArr* arr, IplImage* image_header );
+
+
+/** @brief Changes the shape of a multi-dimensional array without copying the data.
+
+The function is an advanced version of cvReshape that can work with multi-dimensional arrays as
+well (though it can work with ordinary images and matrices) and change the number of dimensions.
+
+Below are the two samples from the cvReshape description rewritten using cvReshapeMatND:
+@code
+    IplImage* color_img = cvCreateImage(cvSize(320,240), IPL_DEPTH_8U, 3);
+    IplImage gray_img_hdr, *gray_img;
+    gray_img = (IplImage*)cvReshapeMatND(color_img, sizeof(gray_img_hdr), &gray_img_hdr, 1, 0, 0);
+    ...
+    int size[] = { 2, 2, 2 };
+    CvMatND* mat = cvCreateMatND(3, size, CV_32F);
+    CvMat row_header, *row;
+    row = (CvMat*)cvReshapeMatND(mat, sizeof(row_header), &row_header, 0, 1, 0);
+@endcode
+In C, the header file for this function includes a convenient macro cvReshapeND that does away with
+the sizeof_header parameter. So, the lines containing the call to cvReshapeMatND in the examples
+may be replaced as follow:
+@code
+    gray_img = (IplImage*)cvReshapeND(color_img, &gray_img_hdr, 1, 0, 0);
+    ...
+    row = (CvMat*)cvReshapeND(mat, &row_header, 0, 1, 0);
+@endcode
+@param arr Input array
+@param sizeof_header Size of output header to distinguish between IplImage, CvMat and CvMatND
+output headers
+@param header Output header to be filled
+@param new_cn New number of channels. new_cn = 0 means that the number of channels remains
+unchanged.
+@param new_dims New number of dimensions. new_dims = 0 means that the number of dimensions
+remains the same.
+@param new_sizes Array of new dimension sizes. Only new_dims-1 values are used, because the
+total number of elements must remain the same. Thus, if new_dims = 1, new_sizes array is not
+used.
+ */
+CVAPI(CvArr*) cvReshapeMatND( const CvArr* arr,
+                             int sizeof_header, CvArr* header,
+                             int new_cn, int new_dims, int* new_sizes );
+
+#define cvReshapeND( arr, header, new_cn, new_dims, new_sizes )   \
+      cvReshapeMatND( (arr), sizeof(*(header)), (header),         \
+                      (new_cn), (new_dims), (new_sizes))
+
+/** @brief Changes shape of matrix/image without copying data.
+
+The function initializes the CvMat header so that it points to the same data as the original array
+but has a different shape - different number of channels, different number of rows, or both.
+
+The following example code creates one image buffer and two image headers, the first is for a
+320x240x3 image and the second is for a 960x240x1 image:
+@code
+    IplImage* color_img = cvCreateImage(cvSize(320,240), IPL_DEPTH_8U, 3);
+    CvMat gray_mat_hdr;
+    IplImage gray_img_hdr, *gray_img;
+    cvReshape(color_img, &gray_mat_hdr, 1);
+    gray_img = cvGetImage(&gray_mat_hdr, &gray_img_hdr);
+@endcode
+And the next example converts a 3x3 matrix to a single 1x9 vector:
+@code
+    CvMat* mat = cvCreateMat(3, 3, CV_32F);
+    CvMat row_header, *row;
+    row = cvReshape(mat, &row_header, 0, 1);
+@endcode
+@param arr Input array
+@param header Output header to be filled
+@param new_cn New number of channels. 'new_cn = 0' means that the number of channels remains
+unchanged.
+@param new_rows New number of rows. 'new_rows = 0' means that the number of rows remains
+unchanged unless it needs to be changed according to new_cn value.
+*/
+CVAPI(CvMat*) cvReshape( const CvArr* arr, CvMat* header,
+                        int new_cn, int new_rows CV_DEFAULT(0) );
+
+/** Repeats source 2d array several times in both horizontal and
+   vertical direction to fill destination array */
+CVAPI(void) cvRepeat( const CvArr* src, CvArr* dst );
+
+/** @brief Allocates array data
+
+The function allocates image, matrix or multi-dimensional dense array data. Note that in the case of
+matrix types OpenCV allocation functions are used. In the case of IplImage they are used unless
+CV_TURN_ON_IPL_COMPATIBILITY() has been called before. In the latter case IPL functions are used
+to allocate the data.
+@param arr Array header
+ */
+CVAPI(void)  cvCreateData( CvArr* arr );
+
+/** @brief Releases array data.
+
+The function releases the array data. In the case of CvMat or CvMatND it simply calls
+cvDecRefData(), that is the function can not deallocate external data. See also the note to
+cvCreateData .
+@param arr Array header
+ */
+CVAPI(void)  cvReleaseData( CvArr* arr );
+
+/** @brief Assigns user data to the array header.
+
+The function assigns user data to the array header. Header should be initialized before using
+cvCreateMatHeader, cvCreateImageHeader, cvCreateMatNDHeader, cvInitMatHeader,
+cvInitImageHeader or cvInitMatNDHeader.
+@param arr Array header
+@param data User data
+@param step Full row length in bytes
+ */
+CVAPI(void)  cvSetData( CvArr* arr, void* data, int step );
+
+/** @brief Retrieves low-level information about the array.
+
+The function fills output variables with low-level information about the array data. All output
+
+parameters are optional, so some of the pointers may be set to NULL. If the array is IplImage with
+ROI set, the parameters of ROI are returned.
+
+The following example shows how to get access to array elements. It computes absolute values of the
+array elements :
+@code
+    float* data;
+    int step;
+    CvSize size;
+
+    cvGetRawData(array, (uchar**)&data, &step, &size);
+    step /= sizeof(data[0]);
+
+    for(int y = 0; y < size.height; y++, data += step )
+        for(int x = 0; x < size.width; x++ )
+            data[x] = (float)fabs(data[x]);
+@endcode
+@param arr Array header
+@param data Output pointer to the whole image origin or ROI origin if ROI is set
+@param step Output full row length in bytes
+@param roi_size Output ROI size
+ */
+CVAPI(void) cvGetRawData( const CvArr* arr, uchar** data,
+                         int* step CV_DEFAULT(NULL),
+                         CvSize* roi_size CV_DEFAULT(NULL));
+
+/** @brief Returns size of matrix or image ROI.
+
+The function returns number of rows (CvSize::height) and number of columns (CvSize::width) of the
+input matrix or image. In the case of image the size of ROI is returned.
+@param arr array header
+ */
+CVAPI(CvSize) cvGetSize( const CvArr* arr );
+
+/** @brief Copies one array to another.
+
+The function copies selected elements from an input array to an output array:
+
+\f[\texttt{dst} (I)= \texttt{src} (I)  \quad \text{if} \quad \texttt{mask} (I)  \ne 0.\f]
+
+If any of the passed arrays is of IplImage type, then its ROI and COI fields are used. Both arrays
+must have the same type, the same number of dimensions, and the same size. The function can also
+copy sparse arrays (mask is not supported in this case).
+@param src The source array
+@param dst The destination array
+@param mask Operation mask, 8-bit single channel array; specifies elements of the destination array
+to be changed
+ */
+CVAPI(void)  cvCopy( const CvArr* src, CvArr* dst,
+                     const CvArr* mask CV_DEFAULT(NULL) );
+
+/** @brief Sets every element of an array to a given value.
+
+The function copies the scalar value to every selected element of the destination array:
+\f[\texttt{arr} (I)= \texttt{value} \quad \text{if} \quad \texttt{mask} (I)  \ne 0\f]
+If array arr is of IplImage type, then is ROI used, but COI must not be set.
+@param arr The destination array
+@param value Fill value
+@param mask Operation mask, 8-bit single channel array; specifies elements of the destination
+array to be changed
+ */
+CVAPI(void)  cvSet( CvArr* arr, CvScalar value,
+                    const CvArr* mask CV_DEFAULT(NULL) );
+
+/** @brief Clears the array.
+
+The function clears the array. In the case of dense arrays (CvMat, CvMatND or IplImage),
+cvZero(array) is equivalent to cvSet(array,cvScalarAll(0),0). In the case of sparse arrays all the
+elements are removed.
+@param arr Array to be cleared
+ */
+CVAPI(void)  cvSetZero( CvArr* arr );
+#define cvZero  cvSetZero
+
+
+/** Splits a multi-channel array into the set of single-channel arrays or
+   extracts particular [color] plane */
+CVAPI(void)  cvSplit( const CvArr* src, CvArr* dst0, CvArr* dst1,
+                      CvArr* dst2, CvArr* dst3 );
+
+/** Merges a set of single-channel arrays into the single multi-channel array
+   or inserts one particular [color] plane to the array */
+CVAPI(void)  cvMerge( const CvArr* src0, const CvArr* src1,
+                      const CvArr* src2, const CvArr* src3,
+                      CvArr* dst );
+
+/** Copies several channels from input arrays to
+   certain channels of output arrays */
+CVAPI(void)  cvMixChannels( const CvArr** src, int src_count,
+                            CvArr** dst, int dst_count,
+                            const int* from_to, int pair_count );
+
+/** @brief Converts one array to another with optional linear transformation.
+
+The function has several different purposes, and thus has several different names. It copies one
+array to another with optional scaling, which is performed first, and/or optional type conversion,
+performed after:
+
+\f[\texttt{dst} (I) =  \texttt{scale} \texttt{src} (I) + ( \texttt{shift} _0, \texttt{shift} _1,...)\f]
+
+All the channels of multi-channel arrays are processed independently.
+
+The type of conversion is done with rounding and saturation, that is if the result of scaling +
+conversion can not be represented exactly by a value of the destination array element type, it is
+set to the nearest representable value on the real axis.
+@param src Source array
+@param dst Destination array
+@param scale Scale factor
+@param shift Value added to the scaled source array elements
+ */
+CVAPI(void)  cvConvertScale( const CvArr* src, CvArr* dst,
+                             double scale CV_DEFAULT(1),
+                             double shift CV_DEFAULT(0) );
+#define cvCvtScale cvConvertScale
+#define cvScale  cvConvertScale
+#define cvConvert( src, dst )  cvConvertScale( (src), (dst), 1, 0 )
+
+
+/** Performs linear transformation on every source array element,
+   stores absolute value of the result:
+   dst(x,y,c) = abs(scale*src(x,y,c)+shift).
+   destination array must have 8u type.
+   In other cases one may use cvConvertScale + cvAbsDiffS */
+CVAPI(void)  cvConvertScaleAbs( const CvArr* src, CvArr* dst,
+                                double scale CV_DEFAULT(1),
+                                double shift CV_DEFAULT(0) );
+#define cvCvtScaleAbs  cvConvertScaleAbs
+
+
+/** checks termination criteria validity and
+   sets eps to default_eps (if it is not set),
+   max_iter to default_max_iters (if it is not set)
+*/
+CVAPI(CvTermCriteria) cvCheckTermCriteria( CvTermCriteria criteria,
+                                           double default_eps,
+                                           int default_max_iters );
+
+/****************************************************************************************\
+*                   Arithmetic, logic and comparison operations                          *
+\****************************************************************************************/
+
+/** dst(mask) = src1(mask) + src2(mask) */
+CVAPI(void)  cvAdd( const CvArr* src1, const CvArr* src2, CvArr* dst,
+                    const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(mask) = src(mask) + value */
+CVAPI(void)  cvAddS( const CvArr* src, CvScalar value, CvArr* dst,
+                     const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(mask) = src1(mask) - src2(mask) */
+CVAPI(void)  cvSub( const CvArr* src1, const CvArr* src2, CvArr* dst,
+                    const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(mask) = src(mask) - value = src(mask) + (-value) */
+CV_INLINE  void  cvSubS( const CvArr* src, CvScalar value, CvArr* dst,
+                         const CvArr* mask CV_DEFAULT(NULL))
+{
+    cvAddS( src, cvScalar( -value.val[0], -value.val[1], -value.val[2], -value.val[3]),
+            dst, mask );
+}
+
+/** dst(mask) = value - src(mask) */
+CVAPI(void)  cvSubRS( const CvArr* src, CvScalar value, CvArr* dst,
+                      const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(idx) = src1(idx) * src2(idx) * scale
+   (scaled element-wise multiplication of 2 arrays) */
+CVAPI(void)  cvMul( const CvArr* src1, const CvArr* src2,
+                    CvArr* dst, double scale CV_DEFAULT(1) );
+
+/** element-wise division/inversion with scaling:
+    dst(idx) = src1(idx) * scale / src2(idx)
+    or dst(idx) = scale / src2(idx) if src1 == 0 */
+CVAPI(void)  cvDiv( const CvArr* src1, const CvArr* src2,
+                    CvArr* dst, double scale CV_DEFAULT(1));
+
+/** dst = src1 * scale + src2 */
+CVAPI(void)  cvScaleAdd( const CvArr* src1, CvScalar scale,
+                         const CvArr* src2, CvArr* dst );
+#define cvAXPY( A, real_scalar, B, C ) cvScaleAdd(A, cvRealScalar(real_scalar), B, C)
+
+/** dst = src1 * alpha + src2 * beta + gamma */
+CVAPI(void)  cvAddWeighted( const CvArr* src1, double alpha,
+                            const CvArr* src2, double beta,
+                            double gamma, CvArr* dst );
+
+/** @brief Calculates the dot product of two arrays in Euclidean metrics.
+
+The function calculates and returns the Euclidean dot product of two arrays.
+
+\f[src1  \bullet src2 =  \sum _I ( \texttt{src1} (I)  \texttt{src2} (I))\f]
+
+In the case of multiple channel arrays, the results for all channels are accumulated. In particular,
+cvDotProduct(a,a) where a is a complex vector, will return \f$||\texttt{a}||^2\f$. The function can
+process multi-dimensional arrays, row by row, layer by layer, and so on.
+@param src1 The first source array
+@param src2 The second source array
+ */
+CVAPI(double)  cvDotProduct( const CvArr* src1, const CvArr* src2 );
+
+/** dst(idx) = src1(idx) & src2(idx) */
+CVAPI(void) cvAnd( const CvArr* src1, const CvArr* src2,
+                  CvArr* dst, const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(idx) = src(idx) & value */
+CVAPI(void) cvAndS( const CvArr* src, CvScalar value,
+                   CvArr* dst, const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(idx) = src1(idx) | src2(idx) */
+CVAPI(void) cvOr( const CvArr* src1, const CvArr* src2,
+                 CvArr* dst, const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(idx) = src(idx) | value */
+CVAPI(void) cvOrS( const CvArr* src, CvScalar value,
+                  CvArr* dst, const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(idx) = src1(idx) ^ src2(idx) */
+CVAPI(void) cvXor( const CvArr* src1, const CvArr* src2,
+                  CvArr* dst, const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(idx) = src(idx) ^ value */
+CVAPI(void) cvXorS( const CvArr* src, CvScalar value,
+                   CvArr* dst, const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(idx) = ~src(idx) */
+CVAPI(void) cvNot( const CvArr* src, CvArr* dst );
+
+/** dst(idx) = lower(idx) <= src(idx) < upper(idx) */
+CVAPI(void) cvInRange( const CvArr* src, const CvArr* lower,
+                      const CvArr* upper, CvArr* dst );
+
+/** dst(idx) = lower <= src(idx) < upper */
+CVAPI(void) cvInRangeS( const CvArr* src, CvScalar lower,
+                       CvScalar upper, CvArr* dst );
+
+#define CV_CMP_EQ   0
+#define CV_CMP_GT   1
+#define CV_CMP_GE   2
+#define CV_CMP_LT   3
+#define CV_CMP_LE   4
+#define CV_CMP_NE   5
+
+/** The comparison operation support single-channel arrays only.
+   Destination image should be 8uC1 or 8sC1 */
+
+/** dst(idx) = src1(idx) _cmp_op_ src2(idx) */
+CVAPI(void) cvCmp( const CvArr* src1, const CvArr* src2, CvArr* dst, int cmp_op );
+
+/** dst(idx) = src1(idx) _cmp_op_ value */
+CVAPI(void) cvCmpS( const CvArr* src, double value, CvArr* dst, int cmp_op );
+
+/** dst(idx) = min(src1(idx),src2(idx)) */
+CVAPI(void) cvMin( const CvArr* src1, const CvArr* src2, CvArr* dst );
+
+/** dst(idx) = max(src1(idx),src2(idx)) */
+CVAPI(void) cvMax( const CvArr* src1, const CvArr* src2, CvArr* dst );
+
+/** dst(idx) = min(src(idx),value) */
+CVAPI(void) cvMinS( const CvArr* src, double value, CvArr* dst );
+
+/** dst(idx) = max(src(idx),value) */
+CVAPI(void) cvMaxS( const CvArr* src, double value, CvArr* dst );
+
+/** dst(x,y,c) = abs(src1(x,y,c) - src2(x,y,c)) */
+CVAPI(void) cvAbsDiff( const CvArr* src1, const CvArr* src2, CvArr* dst );
+
+/** dst(x,y,c) = abs(src(x,y,c) - value(c)) */
+CVAPI(void) cvAbsDiffS( const CvArr* src, CvArr* dst, CvScalar value );
+#define cvAbs( src, dst ) cvAbsDiffS( (src), (dst), cvScalarAll(0))
+
+/****************************************************************************************\
+*                                Math operations                                         *
+\****************************************************************************************/
+
+/** Does cartesian->polar coordinates conversion.
+   Either of output components (magnitude or angle) is optional */
+CVAPI(void)  cvCartToPolar( const CvArr* x, const CvArr* y,
+                            CvArr* magnitude, CvArr* angle CV_DEFAULT(NULL),
+                            int angle_in_degrees CV_DEFAULT(0));
+
+/** Does polar->cartesian coordinates conversion.
+   Either of output components (magnitude or angle) is optional.
+   If magnitude is missing it is assumed to be all 1's */
+CVAPI(void)  cvPolarToCart( const CvArr* magnitude, const CvArr* angle,
+                            CvArr* x, CvArr* y,
+                            int angle_in_degrees CV_DEFAULT(0));
+
+/** Does powering: dst(idx) = src(idx)^power */
+CVAPI(void)  cvPow( const CvArr* src, CvArr* dst, double power );
+
+/** Does exponention: dst(idx) = exp(src(idx)).
+   Overflow is not handled yet. Underflow is handled.
+   Maximal relative error is ~7e-6 for single-precision input */
+CVAPI(void)  cvExp( const CvArr* src, CvArr* dst );
+
+/** Calculates natural logarithms: dst(idx) = log(abs(src(idx))).
+   Logarithm of 0 gives large negative number(~-700)
+   Maximal relative error is ~3e-7 for single-precision output
+*/
+CVAPI(void)  cvLog( const CvArr* src, CvArr* dst );
+
+/** Fast arctangent calculation */
+CVAPI(float) cvFastArctan( float y, float x );
+
+/** Fast cubic root calculation */
+CVAPI(float)  cvCbrt( float value );
+
+#define  CV_CHECK_RANGE    1
+#define  CV_CHECK_QUIET    2
+/** Checks array values for NaNs, Infs or simply for too large numbers
+   (if CV_CHECK_RANGE is set). If CV_CHECK_QUIET is set,
+   no runtime errors is raised (function returns zero value in case of "bad" values).
+   Otherwise cvError is called */
+CVAPI(int)  cvCheckArr( const CvArr* arr, int flags CV_DEFAULT(0),
+                        double min_val CV_DEFAULT(0), double max_val CV_DEFAULT(0));
+#define cvCheckArray cvCheckArr
+
+#define CV_RAND_UNI      0
+#define CV_RAND_NORMAL   1
+
+/** @brief Fills an array with random numbers and updates the RNG state.
+
+The function fills the destination array with uniformly or normally distributed random numbers.
+@param rng CvRNG state initialized by cvRNG
+@param arr The destination array
+@param dist_type Distribution type
+> -   **CV_RAND_UNI** uniform distribution
+> -   **CV_RAND_NORMAL** normal or Gaussian distribution
+@param param1 The first parameter of the distribution. In the case of a uniform distribution it is
+the inclusive lower boundary of the random numbers range. In the case of a normal distribution it
+is the mean value of the random numbers.
+@param param2 The second parameter of the distribution. In the case of a uniform distribution it
+is the exclusive upper boundary of the random numbers range. In the case of a normal distribution
+it is the standard deviation of the random numbers.
+@sa randu, randn, RNG::fill.
+ */
+CVAPI(void) cvRandArr( CvRNG* rng, CvArr* arr, int dist_type,
+                      CvScalar param1, CvScalar param2 );
+
+CVAPI(void) cvRandShuffle( CvArr* mat, CvRNG* rng,
+                           double iter_factor CV_DEFAULT(1.));
+
+#define CV_SORT_EVERY_ROW 0
+#define CV_SORT_EVERY_COLUMN 1
+#define CV_SORT_ASCENDING 0
+#define CV_SORT_DESCENDING 16
+
+CVAPI(void) cvSort( const CvArr* src, CvArr* dst CV_DEFAULT(NULL),
+                    CvArr* idxmat CV_DEFAULT(NULL),
+                    int flags CV_DEFAULT(0));
+
+/** Finds real roots of a cubic equation */
+CVAPI(int) cvSolveCubic( const CvMat* coeffs, CvMat* roots );
+
+/** Finds all real and complex roots of a polynomial equation */
+CVAPI(void) cvSolvePoly(const CvMat* coeffs, CvMat *roots2,
+      int maxiter CV_DEFAULT(20), int fig CV_DEFAULT(100));
+
+/****************************************************************************************\
+*                                Matrix operations                                       *
+\****************************************************************************************/
+
+/** @brief Calculates the cross product of two 3D vectors.
+
+The function calculates the cross product of two 3D vectors:
+\f[\texttt{dst} =  \texttt{src1} \times \texttt{src2}\f]
+or:
+\f[\begin{array}{l} \texttt{dst} _1 =  \texttt{src1} _2  \texttt{src2} _3 -  \texttt{src1} _3  \texttt{src2} _2 \\ \texttt{dst} _2 =  \texttt{src1} _3  \texttt{src2} _1 -  \texttt{src1} _1  \texttt{src2} _3 \\ \texttt{dst} _3 =  \texttt{src1} _1  \texttt{src2} _2 -  \texttt{src1} _2  \texttt{src2} _1 \end{array}\f]
+@param src1 The first source vector
+@param src2 The second source vector
+@param dst The destination vector
+ */
+CVAPI(void)  cvCrossProduct( const CvArr* src1, const CvArr* src2, CvArr* dst );
+
+/** Matrix transform: dst = A*B + C, C is optional */
+#define cvMatMulAdd( src1, src2, src3, dst ) cvGEMM( (src1), (src2), 1., (src3), 1., (dst), 0 )
+#define cvMatMul( src1, src2, dst )  cvMatMulAdd( (src1), (src2), NULL, (dst))
+
+#define CV_GEMM_A_T 1
+#define CV_GEMM_B_T 2
+#define CV_GEMM_C_T 4
+/** Extended matrix transform:
+   dst = alpha*op(A)*op(B) + beta*op(C), where op(X) is X or X^T */
+CVAPI(void)  cvGEMM( const CvArr* src1, const CvArr* src2, double alpha,
+                     const CvArr* src3, double beta, CvArr* dst,
+                     int tABC CV_DEFAULT(0));
+#define cvMatMulAddEx cvGEMM
+
+/** Transforms each element of source array and stores
+   resultant vectors in destination array */
+CVAPI(void)  cvTransform( const CvArr* src, CvArr* dst,
+                          const CvMat* transmat,
+                          const CvMat* shiftvec CV_DEFAULT(NULL));
+#define cvMatMulAddS cvTransform
+
+/** Does perspective transform on every element of input array */
+CVAPI(void)  cvPerspectiveTransform( const CvArr* src, CvArr* dst,
+                                     const CvMat* mat );
+
+/** Calculates (A-delta)*(A-delta)^T (order=0) or (A-delta)^T*(A-delta) (order=1) */
+CVAPI(void) cvMulTransposed( const CvArr* src, CvArr* dst, int order,
+                             const CvArr* delta CV_DEFAULT(NULL),
+                             double scale CV_DEFAULT(1.) );
+
+/** Tranposes matrix. Square matrices can be transposed in-place */
+CVAPI(void)  cvTranspose( const CvArr* src, CvArr* dst );
+#define cvT cvTranspose
+
+/** Completes the symmetric matrix from the lower (LtoR=0) or from the upper (LtoR!=0) part */
+CVAPI(void)  cvCompleteSymm( CvMat* matrix, int LtoR CV_DEFAULT(0) );
+
+/** Mirror array data around horizontal (flip=0),
+   vertical (flip=1) or both(flip=-1) axises:
+   cvFlip(src) flips images vertically and sequences horizontally (inplace) */
+CVAPI(void)  cvFlip( const CvArr* src, CvArr* dst CV_DEFAULT(NULL),
+                     int flip_mode CV_DEFAULT(0));
+#define cvMirror cvFlip
+
+
+#define CV_SVD_MODIFY_A   1
+#define CV_SVD_U_T        2
+#define CV_SVD_V_T        4
+
+/** Performs Singular Value Decomposition of a matrix */
+CVAPI(void)   cvSVD( CvArr* A, CvArr* W, CvArr* U CV_DEFAULT(NULL),
+                     CvArr* V CV_DEFAULT(NULL), int flags CV_DEFAULT(0));
+
+/** Performs Singular Value Back Substitution (solves A*X = B):
+   flags must be the same as in cvSVD */
+CVAPI(void)   cvSVBkSb( const CvArr* W, const CvArr* U,
+                        const CvArr* V, const CvArr* B,
+                        CvArr* X, int flags );
+
+#define CV_LU  0
+#define CV_SVD 1
+#define CV_SVD_SYM 2
+#define CV_CHOLESKY 3
+#define CV_QR  4
+#define CV_NORMAL 16
+
+/** Inverts matrix */
+CVAPI(double)  cvInvert( const CvArr* src, CvArr* dst,
+                         int method CV_DEFAULT(CV_LU));
+#define cvInv cvInvert
+
+/** Solves linear system (src1)*(dst) = (src2)
+   (returns 0 if src1 is a singular and CV_LU method is used) */
+CVAPI(int)  cvSolve( const CvArr* src1, const CvArr* src2, CvArr* dst,
+                     int method CV_DEFAULT(CV_LU));
+
+/** Calculates determinant of input matrix */
+CVAPI(double) cvDet( const CvArr* mat );
+
+/** Calculates trace of the matrix (sum of elements on the main diagonal) */
+CVAPI(CvScalar) cvTrace( const CvArr* mat );
+
+/** Finds eigen values and vectors of a symmetric matrix */
+CVAPI(void)  cvEigenVV( CvArr* mat, CvArr* evects, CvArr* evals,
+                        double eps CV_DEFAULT(0),
+                        int lowindex CV_DEFAULT(-1),
+                        int highindex CV_DEFAULT(-1));
+
+///* Finds selected eigen values and vectors of a symmetric matrix */
+//CVAPI(void)  cvSelectedEigenVV( CvArr* mat, CvArr* evects, CvArr* evals,
+//                                int lowindex, int highindex );
+
+/** Makes an identity matrix (mat_ij = i == j) */
+CVAPI(void)  cvSetIdentity( CvArr* mat, CvScalar value CV_DEFAULT(cvRealScalar(1)) );
+
+/** Fills matrix with given range of numbers */
+CVAPI(CvArr*)  cvRange( CvArr* mat, double start, double end );
+
+/**   @anchor core_c_CovarFlags
+@name Flags for cvCalcCovarMatrix
+@see cvCalcCovarMatrix
+  @{
+*/
+
+/** flag for cvCalcCovarMatrix, transpose([v1-avg, v2-avg,...]) * [v1-avg,v2-avg,...] */
+#define CV_COVAR_SCRAMBLED 0
+
+/** flag for cvCalcCovarMatrix, [v1-avg, v2-avg,...] * transpose([v1-avg,v2-avg,...]) */
+#define CV_COVAR_NORMAL    1
+
+/** flag for cvCalcCovarMatrix, do not calc average (i.e. mean vector) - use the input vector instead
+   (useful for calculating covariance matrix by parts) */
+#define CV_COVAR_USE_AVG   2
+
+/** flag for cvCalcCovarMatrix, scale the covariance matrix coefficients by number of the vectors */
+#define CV_COVAR_SCALE     4
+
+/** flag for cvCalcCovarMatrix, all the input vectors are stored in a single matrix, as its rows */
+#define CV_COVAR_ROWS      8
+
+/** flag for cvCalcCovarMatrix, all the input vectors are stored in a single matrix, as its columns */
+#define CV_COVAR_COLS     16
+
+/** @} */
+
+/** Calculates covariation matrix for a set of vectors
+@see @ref core_c_CovarFlags "flags"
+*/
+CVAPI(void)  cvCalcCovarMatrix( const CvArr** vects, int count,
+                                CvArr* cov_mat, CvArr* avg, int flags );
+
+#define CV_PCA_DATA_AS_ROW 0
+#define CV_PCA_DATA_AS_COL 1
+#define CV_PCA_USE_AVG 2
+CVAPI(void)  cvCalcPCA( const CvArr* data, CvArr* mean,
+                        CvArr* eigenvals, CvArr* eigenvects, int flags );
+
+CVAPI(void)  cvProjectPCA( const CvArr* data, const CvArr* mean,
+                           const CvArr* eigenvects, CvArr* result );
+
+CVAPI(void)  cvBackProjectPCA( const CvArr* proj, const CvArr* mean,
+                               const CvArr* eigenvects, CvArr* result );
+
+/** Calculates Mahalanobis(weighted) distance */
+CVAPI(double)  cvMahalanobis( const CvArr* vec1, const CvArr* vec2, const CvArr* mat );
+#define cvMahalonobis  cvMahalanobis
+
+/****************************************************************************************\
+*                                    Array Statistics                                    *
+\****************************************************************************************/
+
+/** Finds sum of array elements */
+CVAPI(CvScalar)  cvSum( const CvArr* arr );
+
+/** Calculates number of non-zero pixels */
+CVAPI(int)  cvCountNonZero( const CvArr* arr );
+
+/** Calculates mean value of array elements */
+CVAPI(CvScalar)  cvAvg( const CvArr* arr, const CvArr* mask CV_DEFAULT(NULL) );
+
+/** Calculates mean and standard deviation of pixel values */
+CVAPI(void)  cvAvgSdv( const CvArr* arr, CvScalar* mean, CvScalar* std_dev,
+                       const CvArr* mask CV_DEFAULT(NULL) );
+
+/** Finds global minimum, maximum and their positions */
+CVAPI(void)  cvMinMaxLoc( const CvArr* arr, double* min_val, double* max_val,
+                          CvPoint* min_loc CV_DEFAULT(NULL),
+                          CvPoint* max_loc CV_DEFAULT(NULL),
+                          const CvArr* mask CV_DEFAULT(NULL) );
+
+/** @anchor core_c_NormFlags
+  @name Flags for cvNorm and cvNormalize
+  @{
+*/
+#define CV_C            1
+#define CV_L1           2
+#define CV_L2           4
+#define CV_NORM_MASK    7
+#define CV_RELATIVE     8
+#define CV_DIFF         16
+#define CV_MINMAX       32
+
+#define CV_DIFF_C       (CV_DIFF | CV_C)
+#define CV_DIFF_L1      (CV_DIFF | CV_L1)
+#define CV_DIFF_L2      (CV_DIFF | CV_L2)
+#define CV_RELATIVE_C   (CV_RELATIVE | CV_C)
+#define CV_RELATIVE_L1  (CV_RELATIVE | CV_L1)
+#define CV_RELATIVE_L2  (CV_RELATIVE | CV_L2)
+/** @} */
+
+/** Finds norm, difference norm or relative difference norm for an array (or two arrays)
+@see ref core_c_NormFlags "flags"
+*/
+CVAPI(double)  cvNorm( const CvArr* arr1, const CvArr* arr2 CV_DEFAULT(NULL),
+                       int norm_type CV_DEFAULT(CV_L2),
+                       const CvArr* mask CV_DEFAULT(NULL) );
+
+/** @see ref core_c_NormFlags "flags" */
+CVAPI(void)  cvNormalize( const CvArr* src, CvArr* dst,
+                          double a CV_DEFAULT(1.), double b CV_DEFAULT(0.),
+                          int norm_type CV_DEFAULT(CV_L2),
+                          const CvArr* mask CV_DEFAULT(NULL) );
+
+/** @anchor core_c_ReduceFlags
+  @name Flags for cvReduce
+  @{
+*/
+#define CV_REDUCE_SUM 0
+#define CV_REDUCE_AVG 1
+#define CV_REDUCE_MAX 2
+#define CV_REDUCE_MIN 3
+/** @} */
+
+/** @see @ref core_c_ReduceFlags "flags" */
+CVAPI(void)  cvReduce( const CvArr* src, CvArr* dst, int dim CV_DEFAULT(-1),
+                       int op CV_DEFAULT(CV_REDUCE_SUM) );
+
+/****************************************************************************************\
+*                      Discrete Linear Transforms and Related Functions                  *
+\****************************************************************************************/
+
+/** @anchor core_c_DftFlags
+  @name Flags for cvDFT, cvDCT and cvMulSpectrums
+  @{
+  */
+#define CV_DXT_FORWARD  0
+#define CV_DXT_INVERSE  1
+#define CV_DXT_SCALE    2 /**< divide result by size of array */
+#define CV_DXT_INV_SCALE (CV_DXT_INVERSE + CV_DXT_SCALE)
+#define CV_DXT_INVERSE_SCALE CV_DXT_INV_SCALE
+#define CV_DXT_ROWS     4 /**< transform each row individually */
+#define CV_DXT_MUL_CONJ 8 /**< conjugate the second argument of cvMulSpectrums */
+/** @} */
+
+/** Discrete Fourier Transform:
+    complex->complex,
+    real->ccs (forward),
+    ccs->real (inverse)
+@see core_c_DftFlags "flags"
+*/
+CVAPI(void)  cvDFT( const CvArr* src, CvArr* dst, int flags,
+                    int nonzero_rows CV_DEFAULT(0) );
+#define cvFFT cvDFT
+
+/** Multiply results of DFTs: DFT(X)*DFT(Y) or DFT(X)*conj(DFT(Y))
+@see core_c_DftFlags "flags"
+*/
+CVAPI(void)  cvMulSpectrums( const CvArr* src1, const CvArr* src2,
+                             CvArr* dst, int flags );
+
+/** Finds optimal DFT vector size >= size0 */
+CVAPI(int)  cvGetOptimalDFTSize( int size0 );
+
+/** Discrete Cosine Transform
+@see core_c_DftFlags "flags"
+*/
+CVAPI(void)  cvDCT( const CvArr* src, CvArr* dst, int flags );
+
+/****************************************************************************************\
+*                              Dynamic data structures                                   *
+\****************************************************************************************/
+
+/** Calculates length of sequence slice (with support of negative indices). */
+CVAPI(int) cvSliceLength( CvSlice slice, const CvSeq* seq );
+
+
+/** Creates new memory storage.
+   block_size == 0 means that default,
+   somewhat optimal size, is used (currently, it is 64K) */
+CVAPI(CvMemStorage*)  cvCreateMemStorage( int block_size CV_DEFAULT(0));
+
+
+/** Creates a memory storage that will borrow memory blocks from parent storage */
+CVAPI(CvMemStorage*)  cvCreateChildMemStorage( CvMemStorage* parent );
+
+
+/** Releases memory storage. All the children of a parent must be released before
+   the parent. A child storage returns all the blocks to parent when it is released */
+CVAPI(void)  cvReleaseMemStorage( CvMemStorage** storage );
+
+
+/** Clears memory storage. This is the only way(!!!) (besides cvRestoreMemStoragePos)
+   to reuse memory allocated for the storage - cvClearSeq,cvClearSet ...
+   do not free any memory.
+   A child storage returns all the blocks to the parent when it is cleared */
+CVAPI(void)  cvClearMemStorage( CvMemStorage* storage );
+
+/** Remember a storage "free memory" position */
+CVAPI(void)  cvSaveMemStoragePos( const CvMemStorage* storage, CvMemStoragePos* pos );
+
+/** Restore a storage "free memory" position */
+CVAPI(void)  cvRestoreMemStoragePos( CvMemStorage* storage, CvMemStoragePos* pos );
+
+/** Allocates continuous buffer of the specified size in the storage */
+CVAPI(void*) cvMemStorageAlloc( CvMemStorage* storage, size_t size );
+
+/** Allocates string in memory storage */
+CVAPI(CvString) cvMemStorageAllocString( CvMemStorage* storage, const char* ptr,
+                                         int len CV_DEFAULT(-1) );
+
+/** Creates new empty sequence that will reside in the specified storage */
+CVAPI(CvSeq*)  cvCreateSeq( int seq_flags, size_t header_size,
+                            size_t elem_size, CvMemStorage* storage );
+
+/** Changes default size (granularity) of sequence blocks.
+   The default size is ~1Kbyte */
+CVAPI(void)  cvSetSeqBlockSize( CvSeq* seq, int delta_elems );
+
+
+/** Adds new element to the end of sequence. Returns pointer to the element */
+CVAPI(schar*)  cvSeqPush( CvSeq* seq, const void* element CV_DEFAULT(NULL));
+
+
+/** Adds new element to the beginning of sequence. Returns pointer to it */
+CVAPI(schar*)  cvSeqPushFront( CvSeq* seq, const void* element CV_DEFAULT(NULL));
+
+
+/** Removes the last element from sequence and optionally saves it */
+CVAPI(void)  cvSeqPop( CvSeq* seq, void* element CV_DEFAULT(NULL));
+
+
+/** Removes the first element from sequence and optioanally saves it */
+CVAPI(void)  cvSeqPopFront( CvSeq* seq, void* element CV_DEFAULT(NULL));
+
+
+#define CV_FRONT 1
+#define CV_BACK 0
+/** Adds several new elements to the end of sequence */
+CVAPI(void)  cvSeqPushMulti( CvSeq* seq, const void* elements,
+                             int count, int in_front CV_DEFAULT(0) );
+
+/** Removes several elements from the end of sequence and optionally saves them */
+CVAPI(void)  cvSeqPopMulti( CvSeq* seq, void* elements,
+                            int count, int in_front CV_DEFAULT(0) );
+
+/** Inserts a new element in the middle of sequence.
+   cvSeqInsert(seq,0,elem) == cvSeqPushFront(seq,elem) */
+CVAPI(schar*)  cvSeqInsert( CvSeq* seq, int before_index,
+                            const void* element CV_DEFAULT(NULL));
+
+/** Removes specified sequence element */
+CVAPI(void)  cvSeqRemove( CvSeq* seq, int index );
+
+
+/** Removes all the elements from the sequence. The freed memory
+   can be reused later only by the same sequence unless cvClearMemStorage
+   or cvRestoreMemStoragePos is called */
+CVAPI(void)  cvClearSeq( CvSeq* seq );
+
+
+/** Retrieves pointer to specified sequence element.
+   Negative indices are supported and mean counting from the end
+   (e.g -1 means the last sequence element) */
+CVAPI(schar*)  cvGetSeqElem( const CvSeq* seq, int index );
+
+/** Calculates index of the specified sequence element.
+   Returns -1 if element does not belong to the sequence */
+CVAPI(int)  cvSeqElemIdx( const CvSeq* seq, const void* element,
+                         CvSeqBlock** block CV_DEFAULT(NULL) );
+
+/** Initializes sequence writer. The new elements will be added to the end of sequence */
+CVAPI(void)  cvStartAppendToSeq( CvSeq* seq, CvSeqWriter* writer );
+
+
+/** Combination of cvCreateSeq and cvStartAppendToSeq */
+CVAPI(void)  cvStartWriteSeq( int seq_flags, int header_size,
+                              int elem_size, CvMemStorage* storage,
+                              CvSeqWriter* writer );
+
+/** Closes sequence writer, updates sequence header and returns pointer
+   to the resultant sequence
+   (which may be useful if the sequence was created using cvStartWriteSeq))
+*/
+CVAPI(CvSeq*)  cvEndWriteSeq( CvSeqWriter* writer );
+
+
+/** Updates sequence header. May be useful to get access to some of previously
+   written elements via cvGetSeqElem or sequence reader */
+CVAPI(void)   cvFlushSeqWriter( CvSeqWriter* writer );
+
+
+/** Initializes sequence reader.
+   The sequence can be read in forward or backward direction */
+CVAPI(void) cvStartReadSeq( const CvSeq* seq, CvSeqReader* reader,
+                           int reverse CV_DEFAULT(0) );
+
+
+/** Returns current sequence reader position (currently observed sequence element) */
+CVAPI(int)  cvGetSeqReaderPos( CvSeqReader* reader );
+
+
+/** Changes sequence reader position. It may seek to an absolute or
+   to relative to the current position */
+CVAPI(void)   cvSetSeqReaderPos( CvSeqReader* reader, int index,
+                                 int is_relative CV_DEFAULT(0));
+
+/** Copies sequence content to a continuous piece of memory */
+CVAPI(void*)  cvCvtSeqToArray( const CvSeq* seq, void* elements,
+                               CvSlice slice CV_DEFAULT(CV_WHOLE_SEQ) );
+
+/** Creates sequence header for array.
+   After that all the operations on sequences that do not alter the content
+   can be applied to the resultant sequence */
+CVAPI(CvSeq*) cvMakeSeqHeaderForArray( int seq_type, int header_size,
+                                       int elem_size, void* elements, int total,
+                                       CvSeq* seq, CvSeqBlock* block );
+
+/** Extracts sequence slice (with or without copying sequence elements) */
+CVAPI(CvSeq*) cvSeqSlice( const CvSeq* seq, CvSlice slice,
+                         CvMemStorage* storage CV_DEFAULT(NULL),
+                         int copy_data CV_DEFAULT(0));
+
+CV_INLINE CvSeq* cvCloneSeq( const CvSeq* seq, CvMemStorage* storage CV_DEFAULT(NULL))
+{
+    return cvSeqSlice( seq, CV_WHOLE_SEQ, storage, 1 );
+}
+
+/** Removes sequence slice */
+CVAPI(void)  cvSeqRemoveSlice( CvSeq* seq, CvSlice slice );
+
+/** Inserts a sequence or array into another sequence */
+CVAPI(void)  cvSeqInsertSlice( CvSeq* seq, int before_index, const CvArr* from_arr );
+
+/** a < b ? -1 : a > b ? 1 : 0 */
+typedef int (CV_CDECL* CvCmpFunc)(const void* a, const void* b, void* userdata );
+
+/** Sorts sequence in-place given element comparison function */
+CVAPI(void) cvSeqSort( CvSeq* seq, CvCmpFunc func, void* userdata CV_DEFAULT(NULL) );
+
+/** Finds element in a [sorted] sequence */
+CVAPI(schar*) cvSeqSearch( CvSeq* seq, const void* elem, CvCmpFunc func,
+                           int is_sorted, int* elem_idx,
+                           void* userdata CV_DEFAULT(NULL) );
+
+/** Reverses order of sequence elements in-place */
+CVAPI(void) cvSeqInvert( CvSeq* seq );
+
+/** Splits sequence into one or more equivalence classes using the specified criteria */
+CVAPI(int)  cvSeqPartition( const CvSeq* seq, CvMemStorage* storage,
+                            CvSeq** labels, CvCmpFunc is_equal, void* userdata );
+
+/************ Internal sequence functions ************/
+CVAPI(void)  cvChangeSeqBlock( void* reader, int direction );
+CVAPI(void)  cvCreateSeqBlock( CvSeqWriter* writer );
+
+
+/** Creates a new set */
+CVAPI(CvSet*)  cvCreateSet( int set_flags, int header_size,
+                            int elem_size, CvMemStorage* storage );
+
+/** Adds new element to the set and returns pointer to it */
+CVAPI(int)  cvSetAdd( CvSet* set_header, CvSetElem* elem CV_DEFAULT(NULL),
+                      CvSetElem** inserted_elem CV_DEFAULT(NULL) );
+
+/** Fast variant of cvSetAdd */
+CV_INLINE  CvSetElem* cvSetNew( CvSet* set_header )
+{
+    CvSetElem* elem = set_header->free_elems;
+    if( elem )
+    {
+        set_header->free_elems = elem->next_free;
+        elem->flags = elem->flags & CV_SET_ELEM_IDX_MASK;
+        set_header->active_count++;
+    }
+    else
+        cvSetAdd( set_header, NULL, &elem );
+    return elem;
+}
+
+/** Removes set element given its pointer */
+CV_INLINE  void cvSetRemoveByPtr( CvSet* set_header, void* elem )
+{
+    CvSetElem* _elem = (CvSetElem*)elem;
+    assert( _elem->flags >= 0 /*&& (elem->flags & CV_SET_ELEM_IDX_MASK) < set_header->total*/ );
+    _elem->next_free = set_header->free_elems;
+    _elem->flags = (_elem->flags & CV_SET_ELEM_IDX_MASK) | CV_SET_ELEM_FREE_FLAG;
+    set_header->free_elems = _elem;
+    set_header->active_count--;
+}
+
+/** Removes element from the set by its index  */
+CVAPI(void)   cvSetRemove( CvSet* set_header, int index );
+
+/** Returns a set element by index. If the element doesn't belong to the set,
+   NULL is returned */
+CV_INLINE CvSetElem* cvGetSetElem( const CvSet* set_header, int idx )
+{
+    CvSetElem* elem = (CvSetElem*)(void *)cvGetSeqElem( (CvSeq*)set_header, idx );
+    return elem && CV_IS_SET_ELEM( elem ) ? elem : 0;
+}
+
+/** Removes all the elements from the set */
+CVAPI(void)  cvClearSet( CvSet* set_header );
+
+/** Creates new graph */
+CVAPI(CvGraph*)  cvCreateGraph( int graph_flags, int header_size,
+                                int vtx_size, int edge_size,
+                                CvMemStorage* storage );
+
+/** Adds new vertex to the graph */
+CVAPI(int)  cvGraphAddVtx( CvGraph* graph, const CvGraphVtx* vtx CV_DEFAULT(NULL),
+                           CvGraphVtx** inserted_vtx CV_DEFAULT(NULL) );
+
+
+/** Removes vertex from the graph together with all incident edges */
+CVAPI(int)  cvGraphRemoveVtx( CvGraph* graph, int index );
+CVAPI(int)  cvGraphRemoveVtxByPtr( CvGraph* graph, CvGraphVtx* vtx );
+
+
+/** Link two vertices specifed by indices or pointers if they
+   are not connected or return pointer to already existing edge
+   connecting the vertices.
+   Functions return 1 if a new edge was created, 0 otherwise */
+CVAPI(int)  cvGraphAddEdge( CvGraph* graph,
+                            int start_idx, int end_idx,
+                            const CvGraphEdge* edge CV_DEFAULT(NULL),
+                            CvGraphEdge** inserted_edge CV_DEFAULT(NULL) );
+
+CVAPI(int)  cvGraphAddEdgeByPtr( CvGraph* graph,
+                               CvGraphVtx* start_vtx, CvGraphVtx* end_vtx,
+                               const CvGraphEdge* edge CV_DEFAULT(NULL),
+                               CvGraphEdge** inserted_edge CV_DEFAULT(NULL) );
+
+/** Remove edge connecting two vertices */
+CVAPI(void)  cvGraphRemoveEdge( CvGraph* graph, int start_idx, int end_idx );
+CVAPI(void)  cvGraphRemoveEdgeByPtr( CvGraph* graph, CvGraphVtx* start_vtx,
+                                     CvGraphVtx* end_vtx );
+
+/** Find edge connecting two vertices */
+CVAPI(CvGraphEdge*)  cvFindGraphEdge( const CvGraph* graph, int start_idx, int end_idx );
+CVAPI(CvGraphEdge*)  cvFindGraphEdgeByPtr( const CvGraph* graph,
+                                           const CvGraphVtx* start_vtx,
+                                           const CvGraphVtx* end_vtx );
+#define cvGraphFindEdge cvFindGraphEdge
+#define cvGraphFindEdgeByPtr cvFindGraphEdgeByPtr
+
+/** Remove all vertices and edges from the graph */
+CVAPI(void)  cvClearGraph( CvGraph* graph );
+
+
+/** Count number of edges incident to the vertex */
+CVAPI(int)  cvGraphVtxDegree( const CvGraph* graph, int vtx_idx );
+CVAPI(int)  cvGraphVtxDegreeByPtr( const CvGraph* graph, const CvGraphVtx* vtx );
+
+
+/** Retrieves graph vertex by given index */
+#define cvGetGraphVtx( graph, idx ) (CvGraphVtx*)cvGetSetElem((CvSet*)(graph), (idx))
+
+/** Retrieves index of a graph vertex given its pointer */
+#define cvGraphVtxIdx( graph, vtx ) ((vtx)->flags & CV_SET_ELEM_IDX_MASK)
+
+/** Retrieves index of a graph edge given its pointer */
+#define cvGraphEdgeIdx( graph, edge ) ((edge)->flags & CV_SET_ELEM_IDX_MASK)
+
+#define cvGraphGetVtxCount( graph ) ((graph)->active_count)
+#define cvGraphGetEdgeCount( graph ) ((graph)->edges->active_count)
+
+#define  CV_GRAPH_VERTEX        1
+#define  CV_GRAPH_TREE_EDGE     2
+#define  CV_GRAPH_BACK_EDGE     4
+#define  CV_GRAPH_FORWARD_EDGE  8
+#define  CV_GRAPH_CROSS_EDGE    16
+#define  CV_GRAPH_ANY_EDGE      30
+#define  CV_GRAPH_NEW_TREE      32
+#define  CV_GRAPH_BACKTRACKING  64
+#define  CV_GRAPH_OVER          -1
+
+#define  CV_GRAPH_ALL_ITEMS    -1
+
+/** flags for graph vertices and edges */
+#define  CV_GRAPH_ITEM_VISITED_FLAG  (1 << 30)
+#define  CV_IS_GRAPH_VERTEX_VISITED(vtx) \
+    (((CvGraphVtx*)(vtx))->flags & CV_GRAPH_ITEM_VISITED_FLAG)
+#define  CV_IS_GRAPH_EDGE_VISITED(edge) \
+    (((CvGraphEdge*)(edge))->flags & CV_GRAPH_ITEM_VISITED_FLAG)
+#define  CV_GRAPH_SEARCH_TREE_NODE_FLAG   (1 << 29)
+#define  CV_GRAPH_FORWARD_EDGE_FLAG       (1 << 28)
+
+typedef struct CvGraphScanner
+{
+    CvGraphVtx* vtx;       /* current graph vertex (or current edge origin) */
+    CvGraphVtx* dst;       /* current graph edge destination vertex */
+    CvGraphEdge* edge;     /* current edge */
+
+    CvGraph* graph;        /* the graph */
+    CvSeq*   stack;        /* the graph vertex stack */
+    int      index;        /* the lower bound of certainly visited vertices */
+    int      mask;         /* event mask */
+}
+CvGraphScanner;
+
+/** Creates new graph scanner. */
+CVAPI(CvGraphScanner*)  cvCreateGraphScanner( CvGraph* graph,
+                                             CvGraphVtx* vtx CV_DEFAULT(NULL),
+                                             int mask CV_DEFAULT(CV_GRAPH_ALL_ITEMS));
+
+/** Releases graph scanner. */
+CVAPI(void) cvReleaseGraphScanner( CvGraphScanner** scanner );
+
+/** Get next graph element */
+CVAPI(int)  cvNextGraphItem( CvGraphScanner* scanner );
+
+/** Creates a copy of graph */
+CVAPI(CvGraph*) cvCloneGraph( const CvGraph* graph, CvMemStorage* storage );
+
+
+/** Does look-up transformation. Elements of the source array
+   (that should be 8uC1 or 8sC1) are used as indexes in lutarr 256-element table */
+CVAPI(void) cvLUT( const CvArr* src, CvArr* dst, const CvArr* lut );
+
+
+/******************* Iteration through the sequence tree *****************/
+typedef struct CvTreeNodeIterator
+{
+    const void* node;
+    int level;
+    int max_level;
+}
+CvTreeNodeIterator;
+
+CVAPI(void) cvInitTreeNodeIterator( CvTreeNodeIterator* tree_iterator,
+                                   const void* first, int max_level );
+CVAPI(void*) cvNextTreeNode( CvTreeNodeIterator* tree_iterator );
+CVAPI(void*) cvPrevTreeNode( CvTreeNodeIterator* tree_iterator );
+
+/** Inserts sequence into tree with specified "parent" sequence.
+   If parent is equal to frame (e.g. the most external contour),
+   then added contour will have null pointer to parent. */
+CVAPI(void) cvInsertNodeIntoTree( void* node, void* parent, void* frame );
+
+/** Removes contour from tree (together with the contour children). */
+CVAPI(void) cvRemoveNodeFromTree( void* node, void* frame );
+
+/** Gathers pointers to all the sequences,
+   accessible from the `first`, to the single sequence */
+CVAPI(CvSeq*) cvTreeToNodeSeq( const void* first, int header_size,
+                              CvMemStorage* storage );
+
+/** The function implements the K-means algorithm for clustering an array of sample
+   vectors in a specified number of classes */
+#define CV_KMEANS_USE_INITIAL_LABELS    1
+CVAPI(int) cvKMeans2( const CvArr* samples, int cluster_count, CvArr* labels,
+                      CvTermCriteria termcrit, int attempts CV_DEFAULT(1),
+                      CvRNG* rng CV_DEFAULT(0), int flags CV_DEFAULT(0),
+                      CvArr* _centers CV_DEFAULT(0), double* compactness CV_DEFAULT(0) );
+
+/****************************************************************************************\
+*                                    System functions                                    *
+\****************************************************************************************/
+
+/** Loads optimized functions from IPP, MKL etc. or switches back to pure C code */
+CVAPI(int)  cvUseOptimized( int on_off );
+
+typedef IplImage* (CV_STDCALL* Cv_iplCreateImageHeader)
+                            (int,int,int,char*,char*,int,int,int,int,int,
+                            IplROI*,IplImage*,void*,IplTileInfo*);
+typedef void (CV_STDCALL* Cv_iplAllocateImageData)(IplImage*,int,int);
+typedef void (CV_STDCALL* Cv_iplDeallocate)(IplImage*,int);
+typedef IplROI* (CV_STDCALL* Cv_iplCreateROI)(int,int,int,int,int);
+typedef IplImage* (CV_STDCALL* Cv_iplCloneImage)(const IplImage*);
+
+/** @brief Makes OpenCV use IPL functions for allocating IplImage and IplROI structures.
+
+Normally, the function is not called directly. Instead, a simple macro
+CV_TURN_ON_IPL_COMPATIBILITY() is used that calls cvSetIPLAllocators and passes there pointers
+to IPL allocation functions. :
+@code
+    ...
+    CV_TURN_ON_IPL_COMPATIBILITY()
+    ...
+@endcode
+@param create_header pointer to a function, creating IPL image header.
+@param allocate_data pointer to a function, allocating IPL image data.
+@param deallocate pointer to a function, deallocating IPL image.
+@param create_roi pointer to a function, creating IPL image ROI (i.e. Region of Interest).
+@param clone_image pointer to a function, cloning an IPL image.
+ */
+CVAPI(void) cvSetIPLAllocators( Cv_iplCreateImageHeader create_header,
+                               Cv_iplAllocateImageData allocate_data,
+                               Cv_iplDeallocate deallocate,
+                               Cv_iplCreateROI create_roi,
+                               Cv_iplCloneImage clone_image );
+
+#define CV_TURN_ON_IPL_COMPATIBILITY()                                  \
+    cvSetIPLAllocators( iplCreateImageHeader, iplAllocateImage,         \
+                        iplDeallocate, iplCreateROI, iplCloneImage )
+
+/****************************************************************************************\
+*                                    Data Persistence                                    *
+\****************************************************************************************/
+
+/********************************** High-level functions ********************************/
+
+/** @brief Opens file storage for reading or writing data.
+
+The function opens file storage for reading or writing data. In the latter case, a new file is
+created or an existing file is rewritten. The type of the read or written file is determined by the
+filename extension: .xml for XML, .yml or .yaml for YAML and .json for JSON.
+
+At the same time, it also supports adding parameters like "example.xml?base64". The three ways
+are the same:
+@snippet samples/cpp/filestorage_base64.cpp suffix_in_file_name
+@snippet samples/cpp/filestorage_base64.cpp flag_write_base64
+@snippet samples/cpp/filestorage_base64.cpp flag_write_and_flag_base64
+
+The function returns a pointer to the CvFileStorage structure.
+If the file cannot be opened then the function returns NULL.
+@param filename Name of the file associated with the storage
+@param memstorage Memory storage used for temporary data and for
+:   storing dynamic structures, such as CvSeq or CvGraph . If it is NULL, a temporary memory
+    storage is created and used.
+@param flags Can be one of the following:
+> -   **CV_STORAGE_READ** the storage is open for reading
+> -   **CV_STORAGE_WRITE** the storage is open for writing
+      (use **CV_STORAGE_WRITE | CV_STORAGE_WRITE_BASE64** to write rawdata in Base64)
+@param encoding
+ */
+CVAPI(CvFileStorage*)  cvOpenFileStorage( const char* filename, CvMemStorage* memstorage,
+                                          int flags, const char* encoding CV_DEFAULT(NULL) );
+
+/** @brief Releases file storage.
+
+The function closes the file associated with the storage and releases all the temporary structures.
+It must be called after all I/O operations with the storage are finished.
+@param fs Double pointer to the released file storage
+ */
+CVAPI(void) cvReleaseFileStorage( CvFileStorage** fs );
+
+/** returns attribute value or 0 (NULL) if there is no such attribute */
+CVAPI(const char*) cvAttrValue( const CvAttrList* attr, const char* attr_name );
+
+/** @brief Starts writing a new structure.
+
+The function starts writing a compound structure (collection) that can be a sequence or a map. After
+all the structure fields, which can be scalars or structures, are written, cvEndWriteStruct should
+be called. The function can be used to group some objects or to implement the write function for a
+some user object (see CvTypeInfo).
+@param fs File storage
+@param name Name of the written structure. The structure can be accessed by this name when the
+storage is read.
+@param struct_flags A combination one of the following values:
+-   **CV_NODE_SEQ** the written structure is a sequence (see discussion of CvFileStorage ),
+    that is, its elements do not have a name.
+-   **CV_NODE_MAP** the written structure is a map (see discussion of CvFileStorage ), that
+    is, all its elements have names.
+One and only one of the two above flags must be specified
+-   **CV_NODE_FLOW** the optional flag that makes sense only for YAML streams. It means that
+     the structure is written as a flow (not as a block), which is more compact. It is
+     recommended to use this flag for structures or arrays whose elements are all scalars.
+@param type_name Optional parameter - the object type name. In
+    case of XML it is written as a type_id attribute of the structure opening tag. In the case of
+    YAML it is written after a colon following the structure name (see the example in
+    CvFileStorage description). In case of JSON it is written as a name/value pair.
+    Mainly it is used with user objects. When the storage is read, the
+    encoded type name is used to determine the object type (see CvTypeInfo and cvFindType ).
+@param attributes This parameter is not used in the current implementation
+ */
+CVAPI(void) cvStartWriteStruct( CvFileStorage* fs, const char* name,
+                                int struct_flags, const char* type_name CV_DEFAULT(NULL),
+                                CvAttrList attributes CV_DEFAULT(cvAttrList()));
+
+/** @brief Finishes writing to a file node collection.
+@param fs File storage
+@sa cvStartWriteStruct.
+ */
+CVAPI(void) cvEndWriteStruct( CvFileStorage* fs );
+
+/** @brief Writes an integer value.
+
+The function writes a single integer value (with or without a name) to the file storage.
+@param fs File storage
+@param name Name of the written value. Should be NULL if and only if the parent structure is a
+sequence.
+@param value The written value
+ */
+CVAPI(void) cvWriteInt( CvFileStorage* fs, const char* name, int value );
+
+/** @brief Writes a floating-point value.
+
+The function writes a single floating-point value (with or without a name) to file storage. Special
+values are encoded as follows: NaN (Not A Number) as .NaN, infinity as +.Inf or -.Inf.
+
+The following example shows how to use the low-level writing functions to store custom structures,
+such as termination criteria, without registering a new type. :
+@code
+    void write_termcriteria( CvFileStorage* fs, const char* struct_name,
+                             CvTermCriteria* termcrit )
+    {
+        cvStartWriteStruct( fs, struct_name, CV_NODE_MAP, NULL, cvAttrList(0,0));
+        cvWriteComment( fs, "termination criteria", 1 ); // just a description
+        if( termcrit->type & CV_TERMCRIT_ITER )
+            cvWriteInteger( fs, "max_iterations", termcrit->max_iter );
+        if( termcrit->type & CV_TERMCRIT_EPS )
+            cvWriteReal( fs, "accuracy", termcrit->epsilon );
+        cvEndWriteStruct( fs );
+    }
+@endcode
+@param fs File storage
+@param name Name of the written value. Should be NULL if and only if the parent structure is a
+sequence.
+@param value The written value
+*/
+CVAPI(void) cvWriteReal( CvFileStorage* fs, const char* name, double value );
+
+/** @brief Writes a text string.
+
+The function writes a text string to file storage.
+@param fs File storage
+@param name Name of the written string . Should be NULL if and only if the parent structure is a
+sequence.
+@param str The written text string
+@param quote If non-zero, the written string is put in quotes, regardless of whether they are
+required. Otherwise, if the flag is zero, quotes are used only when they are required (e.g. when
+the string starts with a digit or contains spaces).
+ */
+CVAPI(void) cvWriteString( CvFileStorage* fs, const char* name,
+                           const char* str, int quote CV_DEFAULT(0) );
+
+/** @brief Writes a comment.
+
+The function writes a comment into file storage. The comments are skipped when the storage is read.
+@param fs File storage
+@param comment The written comment, single-line or multi-line
+@param eol_comment If non-zero, the function tries to put the comment at the end of current line.
+If the flag is zero, if the comment is multi-line, or if it does not fit at the end of the current
+line, the comment starts a new line.
+ */
+CVAPI(void) cvWriteComment( CvFileStorage* fs, const char* comment,
+                            int eol_comment );
+
+/** @brief Writes an object to file storage.
+
+The function writes an object to file storage. First, the appropriate type info is found using
+cvTypeOf. Then, the write method associated with the type info is called.
+
+Attributes are used to customize the writing procedure. The standard types support the following
+attributes (all the dt attributes have the same format as in cvWriteRawData):
+
+-# CvSeq
+    -   **header_dt** description of user fields of the sequence header that follow CvSeq, or
+        CvChain (if the sequence is a Freeman chain) or CvContour (if the sequence is a contour or
+        point sequence)
+    -   **dt** description of the sequence elements.
+    -   **recursive** if the attribute is present and is not equal to "0" or "false", the whole
+        tree of sequences (contours) is stored.
+-# CvGraph
+    -   **header_dt** description of user fields of the graph header that follows CvGraph;
+    -   **vertex_dt** description of user fields of graph vertices
+    -   **edge_dt** description of user fields of graph edges (note that the edge weight is
+        always written, so there is no need to specify it explicitly)
+
+Below is the code that creates the YAML file shown in the CvFileStorage description:
+@code
+    #include "cxcore.h"
+
+    int main( int argc, char** argv )
+    {
+        CvMat* mat = cvCreateMat( 3, 3, CV_32F );
+        CvFileStorage* fs = cvOpenFileStorage( "example.yml", 0, CV_STORAGE_WRITE );
+
+        cvSetIdentity( mat );
+        cvWrite( fs, "A", mat, cvAttrList(0,0) );
+
+        cvReleaseFileStorage( &fs );
+        cvReleaseMat( &mat );
+        return 0;
+    }
+@endcode
+@param fs File storage
+@param name Name of the written object. Should be NULL if and only if the parent structure is a
+sequence.
+@param ptr Pointer to the object
+@param attributes The attributes of the object. They are specific for each particular type (see
+the discussion below).
+ */
+CVAPI(void) cvWrite( CvFileStorage* fs, const char* name, const void* ptr,
+                         CvAttrList attributes CV_DEFAULT(cvAttrList()));
+
+/** @brief Starts the next stream.
+
+The function finishes the currently written stream and starts the next stream. In the case of XML
+the file with multiple streams looks like this:
+@code{.xml}
+    <opencv_storage>
+    <!-- stream #1 data -->
+    </opencv_storage>
+    <opencv_storage>
+    <!-- stream #2 data -->
+    </opencv_storage>
+    ...
+@endcode
+The YAML file will look like this:
+@code{.yaml}
+    %YAML 1.0
+    # stream #1 data
+    ...
+    ---
+    # stream #2 data
+@endcode
+This is useful for concatenating files or for resuming the writing process.
+@param fs File storage
+ */
+CVAPI(void) cvStartNextStream( CvFileStorage* fs );
+
+/** @brief Writes multiple numbers.
+
+The function writes an array, whose elements consist of single or multiple numbers. The function
+call can be replaced with a loop containing a few cvWriteInt and cvWriteReal calls, but a single
+call is more efficient. Note that because none of the elements have a name, they should be written
+to a sequence rather than a map.
+@param fs File storage
+@param src Pointer to the written array
+@param len Number of the array elements to write
+@param dt Specification of each array element, see @ref format_spec "format specification"
+ */
+CVAPI(void) cvWriteRawData( CvFileStorage* fs, const void* src,
+                                int len, const char* dt );
+
+/** @brief Writes multiple numbers in Base64.
+
+If either CV_STORAGE_WRITE_BASE64 or cv::FileStorage::WRITE_BASE64 is used,
+this function will be the same as cvWriteRawData. If neither, the main
+difference is that it outputs a sequence in Base64 encoding rather than
+in plain text.
+
+This function can only be used to write a sequence with a type "binary".
+
+Consider the following two examples where their output is the same:
+@snippet samples/cpp/filestorage_base64.cpp without_base64_flag
+and
+@snippet samples/cpp/filestorage_base64.cpp with_write_base64_flag
+
+@param fs File storage
+@param src Pointer to the written array
+@param len Number of the array elements to write
+@param dt Specification of each array element, see @ref format_spec "format specification"
+*/
+CVAPI(void) cvWriteRawDataBase64( CvFileStorage* fs, const void* src,
+                                 int len, const char* dt );
+
+/** @brief Returns a unique pointer for a given name.
+
+The function returns a unique pointer for each particular file node name. This pointer can be then
+passed to the cvGetFileNode function that is faster than cvGetFileNodeByName because it compares
+text strings by comparing pointers rather than the strings' content.
+
+Consider the following example where an array of points is encoded as a sequence of 2-entry maps:
+@code
+    points:
+      - { x: 10, y: 10 }
+      - { x: 20, y: 20 }
+      - { x: 30, y: 30 }
+      # ...
+@endcode
+Then, it is possible to get hashed "x" and "y" pointers to speed up decoding of the points. :
+@code
+    #include "cxcore.h"
+
+    int main( int argc, char** argv )
+    {
+        CvFileStorage* fs = cvOpenFileStorage( "points.yml", 0, CV_STORAGE_READ );
+        CvStringHashNode* x_key = cvGetHashedNode( fs, "x", -1, 1 );
+        CvStringHashNode* y_key = cvGetHashedNode( fs, "y", -1, 1 );
+        CvFileNode* points = cvGetFileNodeByName( fs, 0, "points" );
+
+        if( CV_NODE_IS_SEQ(points->tag) )
+        {
+            CvSeq* seq = points->data.seq;
+            int i, total = seq->total;
+            CvSeqReader reader;
+            cvStartReadSeq( seq, &reader, 0 );
+            for( i = 0; i < total; i++ )
+            {
+                CvFileNode* pt = (CvFileNode*)reader.ptr;
+    #if 1 // faster variant
+                CvFileNode* xnode = cvGetFileNode( fs, pt, x_key, 0 );
+                CvFileNode* ynode = cvGetFileNode( fs, pt, y_key, 0 );
+                assert( xnode && CV_NODE_IS_INT(xnode->tag) &&
+                        ynode && CV_NODE_IS_INT(ynode->tag));
+                int x = xnode->data.i; // or x = cvReadInt( xnode, 0 );
+                int y = ynode->data.i; // or y = cvReadInt( ynode, 0 );
+    #elif 1 // slower variant; does not use x_key & y_key
+                CvFileNode* xnode = cvGetFileNodeByName( fs, pt, "x" );
+                CvFileNode* ynode = cvGetFileNodeByName( fs, pt, "y" );
+                assert( xnode && CV_NODE_IS_INT(xnode->tag) &&
+                        ynode && CV_NODE_IS_INT(ynode->tag));
+                int x = xnode->data.i; // or x = cvReadInt( xnode, 0 );
+                int y = ynode->data.i; // or y = cvReadInt( ynode, 0 );
+    #else // the slowest yet the easiest to use variant
+                int x = cvReadIntByName( fs, pt, "x", 0 );
+                int y = cvReadIntByName( fs, pt, "y", 0 );
+    #endif
+                CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
+                printf("
+            }
+        }
+        cvReleaseFileStorage( &fs );
+        return 0;
+    }
+@endcode
+Please note that whatever method of accessing a map you are using, it is still much slower than
+using plain sequences; for example, in the above example, it is more efficient to encode the points
+as pairs of integers in a single numeric sequence.
+@param fs File storage
+@param name Literal node name
+@param len Length of the name (if it is known apriori), or -1 if it needs to be calculated
+@param create_missing Flag that specifies, whether an absent key should be added into the hash table
+*/
+CVAPI(CvStringHashNode*) cvGetHashedKey( CvFileStorage* fs, const char* name,
+                                        int len CV_DEFAULT(-1),
+                                        int create_missing CV_DEFAULT(0));
+
+/** @brief Retrieves one of the top-level nodes of the file storage.
+
+The function returns one of the top-level file nodes. The top-level nodes do not have a name, they
+correspond to the streams that are stored one after another in the file storage. If the index is out
+of range, the function returns a NULL pointer, so all the top-level nodes can be iterated by
+subsequent calls to the function with stream_index=0,1,..., until the NULL pointer is returned.
+This function can be used as a base for recursive traversal of the file storage.
+@param fs File storage
+@param stream_index Zero-based index of the stream. See cvStartNextStream . In most cases,
+there is only one stream in the file; however, there can be several.
+ */
+CVAPI(CvFileNode*) cvGetRootFileNode( const CvFileStorage* fs,
+                                     int stream_index CV_DEFAULT(0) );
+
+/** @brief Finds a node in a map or file storage.
+
+The function finds a file node. It is a faster version of cvGetFileNodeByName (see
+cvGetHashedKey discussion). Also, the function can insert a new node, if it is not in the map yet.
+@param fs File storage
+@param map The parent map. If it is NULL, the function searches a top-level node. If both map and
+key are NULLs, the function returns the root file node - a map that contains top-level nodes.
+@param key Unique pointer to the node name, retrieved with cvGetHashedKey
+@param create_missing Flag that specifies whether an absent node should be added to the map
+ */
+CVAPI(CvFileNode*) cvGetFileNode( CvFileStorage* fs, CvFileNode* map,
+                                 const CvStringHashNode* key,
+                                 int create_missing CV_DEFAULT(0) );
+
+/** @brief Finds a node in a map or file storage.
+
+The function finds a file node by name. The node is searched either in map or, if the pointer is
+NULL, among the top-level file storage nodes. Using this function for maps and cvGetSeqElem (or
+sequence reader) for sequences, it is possible to navigate through the file storage. To speed up
+multiple queries for a certain key (e.g., in the case of an array of structures) one may use a
+combination of cvGetHashedKey and cvGetFileNode.
+@param fs File storage
+@param map The parent map. If it is NULL, the function searches in all the top-level nodes
+(streams), starting with the first one.
+@param name The file node name
+ */
+CVAPI(CvFileNode*) cvGetFileNodeByName( const CvFileStorage* fs,
+                                       const CvFileNode* map,
+                                       const char* name );
+
+/** @brief Retrieves an integer value from a file node.
+
+The function returns an integer that is represented by the file node. If the file node is NULL, the
+default_value is returned (thus, it is convenient to call the function right after cvGetFileNode
+without checking for a NULL pointer). If the file node has type CV_NODE_INT, then node-\>data.i is
+returned. If the file node has type CV_NODE_REAL, then node-\>data.f is converted to an integer
+and returned. Otherwise the error is reported.
+@param node File node
+@param default_value The value that is returned if node is NULL
+ */
+CV_INLINE int cvReadInt( const CvFileNode* node, int default_value CV_DEFAULT(0) )
+{
+    return !node ? default_value :
+        CV_NODE_IS_INT(node->tag) ? node->data.i :
+        CV_NODE_IS_REAL(node->tag) ? cvRound(node->data.f) : 0x7fffffff;
+}
+
+/** @brief Finds a file node and returns its value.
+
+The function is a simple superposition of cvGetFileNodeByName and cvReadInt.
+@param fs File storage
+@param map The parent map. If it is NULL, the function searches a top-level node.
+@param name The node name
+@param default_value The value that is returned if the file node is not found
+ */
+CV_INLINE int cvReadIntByName( const CvFileStorage* fs, const CvFileNode* map,
+                         const char* name, int default_value CV_DEFAULT(0) )
+{
+    return cvReadInt( cvGetFileNodeByName( fs, map, name ), default_value );
+}
+
+/** @brief Retrieves a floating-point value from a file node.
+
+The function returns a floating-point value that is represented by the file node. If the file node
+is NULL, the default_value is returned (thus, it is convenient to call the function right after
+cvGetFileNode without checking for a NULL pointer). If the file node has type CV_NODE_REAL ,
+then node-\>data.f is returned. If the file node has type CV_NODE_INT , then node-:math:\>data.f
+is converted to floating-point and returned. Otherwise the result is not determined.
+@param node File node
+@param default_value The value that is returned if node is NULL
+ */
+CV_INLINE double cvReadReal( const CvFileNode* node, double default_value CV_DEFAULT(0.) )
+{
+    return !node ? default_value :
+        CV_NODE_IS_INT(node->tag) ? (double)node->data.i :
+        CV_NODE_IS_REAL(node->tag) ? node->data.f : 1e300;
+}
+
+/** @brief Finds a file node and returns its value.
+
+The function is a simple superposition of cvGetFileNodeByName and cvReadReal .
+@param fs File storage
+@param map The parent map. If it is NULL, the function searches a top-level node.
+@param name The node name
+@param default_value The value that is returned if the file node is not found
+ */
+CV_INLINE double cvReadRealByName( const CvFileStorage* fs, const CvFileNode* map,
+                        const char* name, double default_value CV_DEFAULT(0.) )
+{
+    return cvReadReal( cvGetFileNodeByName( fs, map, name ), default_value );
+}
+
+/** @brief Retrieves a text string from a file node.
+
+The function returns a text string that is represented by the file node. If the file node is NULL,
+the default_value is returned (thus, it is convenient to call the function right after
+cvGetFileNode without checking for a NULL pointer). If the file node has type CV_NODE_STR , then
+node-:math:\>data.str.ptr is returned. Otherwise the result is not determined.
+@param node File node
+@param default_value The value that is returned if node is NULL
+ */
+CV_INLINE const char* cvReadString( const CvFileNode* node,
+                        const char* default_value CV_DEFAULT(NULL) )
+{
+    return !node ? default_value : CV_NODE_IS_STRING(node->tag) ? node->data.str.ptr : 0;
+}
+
+/** @brief Finds a file node by its name and returns its value.
+
+The function is a simple superposition of cvGetFileNodeByName and cvReadString .
+@param fs File storage
+@param map The parent map. If it is NULL, the function searches a top-level node.
+@param name The node name
+@param default_value The value that is returned if the file node is not found
+ */
+CV_INLINE const char* cvReadStringByName( const CvFileStorage* fs, const CvFileNode* map,
+                        const char* name, const char* default_value CV_DEFAULT(NULL) )
+{
+    return cvReadString( cvGetFileNodeByName( fs, map, name ), default_value );
+}
+
+
+/** @brief Decodes an object and returns a pointer to it.
+
+The function decodes a user object (creates an object in a native representation from the file
+storage subtree) and returns it. The object to be decoded must be an instance of a registered type
+that supports the read method (see CvTypeInfo). The type of the object is determined by the type
+name that is encoded in the file. If the object is a dynamic structure, it is created either in
+memory storage and passed to cvOpenFileStorage or, if a NULL pointer was passed, in temporary
+memory storage, which is released when cvReleaseFileStorage is called. Otherwise, if the object is
+not a dynamic structure, it is created in a heap and should be released with a specialized function
+or by using the generic cvRelease.
+@param fs File storage
+@param node The root object node
+@param attributes Unused parameter
+ */
+CVAPI(void*) cvRead( CvFileStorage* fs, CvFileNode* node,
+                        CvAttrList* attributes CV_DEFAULT(NULL));
+
+/** @brief Finds an object by name and decodes it.
+
+The function is a simple superposition of cvGetFileNodeByName and cvRead.
+@param fs File storage
+@param map The parent map. If it is NULL, the function searches a top-level node.
+@param name The node name
+@param attributes Unused parameter
+ */
+CV_INLINE void* cvReadByName( CvFileStorage* fs, const CvFileNode* map,
+                              const char* name, CvAttrList* attributes CV_DEFAULT(NULL) )
+{
+    return cvRead( fs, cvGetFileNodeByName( fs, map, name ), attributes );
+}
+
+
+/** @brief Initializes the file node sequence reader.
+
+The function initializes the sequence reader to read data from a file node. The initialized reader
+can be then passed to cvReadRawDataSlice.
+@param fs File storage
+@param src The file node (a sequence) to read numbers from
+@param reader Pointer to the sequence reader
+ */
+CVAPI(void) cvStartReadRawData( const CvFileStorage* fs, const CvFileNode* src,
+                               CvSeqReader* reader );
+
+/** @brief Initializes file node sequence reader.
+
+The function reads one or more elements from the file node, representing a sequence, to a
+user-specified array. The total number of read sequence elements is a product of total and the
+number of components in each array element. For example, if dt=2if, the function will read total\*3
+sequence elements. As with any sequence, some parts of the file node sequence can be skipped or read
+repeatedly by repositioning the reader using cvSetSeqReaderPos.
+@param fs File storage
+@param reader The sequence reader. Initialize it with cvStartReadRawData .
+@param count The number of elements to read
+@param dst Pointer to the destination array
+@param dt Specification of each array element. It has the same format as in cvWriteRawData .
+ */
+CVAPI(void) cvReadRawDataSlice( const CvFileStorage* fs, CvSeqReader* reader,
+                               int count, void* dst, const char* dt );
+
+/** @brief Reads multiple numbers.
+
+The function reads elements from a file node that represents a sequence of scalars.
+@param fs File storage
+@param src The file node (a sequence) to read numbers from
+@param dst Pointer to the destination array
+@param dt Specification of each array element. It has the same format as in cvWriteRawData .
+ */
+CVAPI(void) cvReadRawData( const CvFileStorage* fs, const CvFileNode* src,
+                          void* dst, const char* dt );
+
+/** @brief Writes a file node to another file storage.
+
+The function writes a copy of a file node to file storage. Possible applications of the function are
+merging several file storages into one and conversion between XML, YAML and JSON formats.
+@param fs Destination file storage
+@param new_node_name New name of the file node in the destination file storage. To keep the
+existing name, use cvcvGetFileNodeName
+@param node The written node
+@param embed If the written node is a collection and this parameter is not zero, no extra level of
+hierarchy is created. Instead, all the elements of node are written into the currently written
+structure. Of course, map elements can only be embedded into another map, and sequence elements
+can only be embedded into another sequence.
+ */
+CVAPI(void) cvWriteFileNode( CvFileStorage* fs, const char* new_node_name,
+                            const CvFileNode* node, int embed );
+
+/** @brief Returns the name of a file node.
+
+The function returns the name of a file node or NULL, if the file node does not have a name or if
+node is NULL.
+@param node File node
+ */
+CVAPI(const char*) cvGetFileNodeName( const CvFileNode* node );
+
+/*********************************** Adding own types ***********************************/
+
+/** @brief Registers a new type.
+
+The function registers a new type, which is described by info . The function creates a copy of the
+structure, so the user should delete it after calling the function.
+@param info Type info structure
+ */
+CVAPI(void) cvRegisterType( const CvTypeInfo* info );
+
+/** @brief Unregisters the type.
+
+The function unregisters a type with a specified name. If the name is unknown, it is possible to
+locate the type info by an instance of the type using cvTypeOf or by iterating the type list,
+starting from cvFirstType, and then calling cvUnregisterType(info-\>typeName).
+@param type_name Name of an unregistered type
+ */
+CVAPI(void) cvUnregisterType( const char* type_name );
+
+/** @brief Returns the beginning of a type list.
+
+The function returns the first type in the list of registered types. Navigation through the list can
+be done via the prev and next fields of the CvTypeInfo structure.
+ */
+CVAPI(CvTypeInfo*) cvFirstType(void);
+
+/** @brief Finds a type by its name.
+
+The function finds a registered type by its name. It returns NULL if there is no type with the
+specified name.
+@param type_name Type name
+ */
+CVAPI(CvTypeInfo*) cvFindType( const char* type_name );
+
+/** @brief Returns the type of an object.
+
+The function finds the type of a given object. It iterates through the list of registered types and
+calls the is_instance function/method for every type info structure with that object until one of
+them returns non-zero or until the whole list has been traversed. In the latter case, the function
+returns NULL.
+@param struct_ptr The object pointer
+ */
+CVAPI(CvTypeInfo*) cvTypeOf( const void* struct_ptr );
+
+/** @brief Releases an object.
+
+The function finds the type of a given object and calls release with the double pointer.
+@param struct_ptr Double pointer to the object
+ */
+CVAPI(void) cvRelease( void** struct_ptr );
+
+/** @brief Makes a clone of an object.
+
+The function finds the type of a given object and calls clone with the passed object. Of course, if
+you know the object type, for example, struct_ptr is CvMat\*, it is faster to call the specific
+function, like cvCloneMat.
+@param struct_ptr The object to clone
+ */
+CVAPI(void*) cvClone( const void* struct_ptr );
+
+/** @brief Saves an object to a file.
+
+The function saves an object to a file. It provides a simple interface to cvWrite .
+@param filename File name
+@param struct_ptr Object to save
+@param name Optional object name. If it is NULL, the name will be formed from filename .
+@param comment Optional comment to put in the beginning of the file
+@param attributes Optional attributes passed to cvWrite
+ */
+CVAPI(void) cvSave( const char* filename, const void* struct_ptr,
+                    const char* name CV_DEFAULT(NULL),
+                    const char* comment CV_DEFAULT(NULL),
+                    CvAttrList attributes CV_DEFAULT(cvAttrList()));
+
+/** @brief Loads an object from a file.
+
+The function loads an object from a file. It basically reads the specified file, find the first
+top-level node and calls cvRead for that node. If the file node does not have type information or
+the type information can not be found by the type name, the function returns NULL. After the object
+is loaded, the file storage is closed and all the temporary buffers are deleted. Thus, to load a
+dynamic structure, such as a sequence, contour, or graph, one should pass a valid memory storage
+destination to the function.
+@param filename File name
+@param memstorage Memory storage for dynamic structures, such as CvSeq or CvGraph . It is not used
+for matrices or images.
+@param name Optional object name. If it is NULL, the first top-level object in the storage will be
+loaded.
+@param real_name Optional output parameter that will contain the name of the loaded object
+(useful if name=NULL )
+ */
+CVAPI(void*) cvLoad( const char* filename,
+                     CvMemStorage* memstorage CV_DEFAULT(NULL),
+                     const char* name CV_DEFAULT(NULL),
+                     const char** real_name CV_DEFAULT(NULL) );
+
+/*********************************** Measuring Execution Time ***************************/
+
+/** helper functions for RNG initialization and accurate time measurement:
+   uses internal clock counter on x86 */
+CVAPI(int64)  cvGetTickCount( void );
+CVAPI(double) cvGetTickFrequency( void );
+
+/*********************************** CPU capabilities ***********************************/
+
+CVAPI(int) cvCheckHardwareSupport(int feature);
+
+/*********************************** Multi-Threading ************************************/
+
+/** retrieve/set the number of threads used in OpenMP implementations */
+CVAPI(int)  cvGetNumThreads( void );
+CVAPI(void) cvSetNumThreads( int threads CV_DEFAULT(0) );
+/** get index of the thread being executed */
+CVAPI(int)  cvGetThreadNum( void );
+
+
+/********************************** Error Handling **************************************/
+
+/** Get current OpenCV error status */
+CVAPI(int) cvGetErrStatus( void );
+
+/** Sets error status silently */
+CVAPI(void) cvSetErrStatus( int status );
+
+#define CV_ErrModeLeaf     0   /* Print error and exit program */
+#define CV_ErrModeParent   1   /* Print error and continue */
+#define CV_ErrModeSilent   2   /* Don't print and continue */
+
+/** Retrives current error processing mode */
+CVAPI(int)  cvGetErrMode( void );
+
+/** Sets error processing mode, returns previously used mode */
+CVAPI(int) cvSetErrMode( int mode );
+
+/** Sets error status and performs some additonal actions (displaying message box,
+ writing message to stderr, terminating application etc.)
+ depending on the current error mode */
+CVAPI(void) cvError( int status, const char* func_name,
+                    const char* err_msg, const char* file_name, int line );
+
+/** Retrieves textual description of the error given its code */
+CVAPI(const char*) cvErrorStr( int status );
+
+/** Retrieves detailed information about the last error occured */
+CVAPI(int) cvGetErrInfo( const char** errcode_desc, const char** description,
+                        const char** filename, int* line );
+
+/** Maps IPP error codes to the counterparts from OpenCV */
+CVAPI(int) cvErrorFromIppStatus( int ipp_status );
+
+typedef int (CV_CDECL *CvErrorCallback)( int status, const char* func_name,
+                                        const char* err_msg, const char* file_name, int line, void* userdata );
+
+/** Assigns a new error-handling function */
+CVAPI(CvErrorCallback) cvRedirectError( CvErrorCallback error_handler,
+                                       void* userdata CV_DEFAULT(NULL),
+                                       void** prev_userdata CV_DEFAULT(NULL) );
+
+/** Output nothing */
+CVAPI(int) cvNulDevReport( int status, const char* func_name, const char* err_msg,
+                          const char* file_name, int line, void* userdata );
+
+/** Output to console(fprintf(stderr,...)) */
+CVAPI(int) cvStdErrReport( int status, const char* func_name, const char* err_msg,
+                          const char* file_name, int line, void* userdata );
+
+/** Output to MessageBox(WIN32) */
+CVAPI(int) cvGuiBoxReport( int status, const char* func_name, const char* err_msg,
+                          const char* file_name, int line, void* userdata );
+
+#define OPENCV_ERROR(status,func,context)                           \
+cvError((status),(func),(context),__FILE__,__LINE__)
+
+#define OPENCV_ASSERT(expr,func,context)                            \
+{if (! (expr))                                      \
+{OPENCV_ERROR(CV_StsInternal,(func),(context));}}
+
+#define OPENCV_CALL( Func )                                         \
+{                                                                   \
+Func;                                                           \
+}
+
+
+/** CV_FUNCNAME macro defines icvFuncName constant which is used by CV_ERROR macro */
+#ifdef CV_NO_FUNC_NAMES
+#define CV_FUNCNAME( Name )
+#define cvFuncName ""
+#else
+#define CV_FUNCNAME( Name )  \
+static char cvFuncName[] = Name
+#endif
+
+
+/**
+ CV_ERROR macro unconditionally raises error with passed code and message.
+ After raising error, control will be transferred to the exit label.
+ */
+#define CV_ERROR( Code, Msg )                                       \
+{                                                                   \
+    cvError( (Code), cvFuncName, Msg, __FILE__, __LINE__ );        \
+    __CV_EXIT__;                                                   \
+}
+
+/**
+ CV_CHECK macro checks error status after CV (or IPL)
+ function call. If error detected, control will be transferred to the exit
+ label.
+ */
+#define CV_CHECK()                                                  \
+{                                                                   \
+    if( cvGetErrStatus() < 0 )                                      \
+        CV_ERROR( CV_StsBackTrace, "Inner function failed." );      \
+}
+
+
+/**
+ CV_CALL macro calls CV (or IPL) function, checks error status and
+ signals a error if the function failed. Useful in "parent node"
+ error procesing mode
+ */
+#define CV_CALL( Func )                                             \
+{                                                                   \
+    Func;                                                           \
+    CV_CHECK();                                                     \
+}
+
+
+/** Runtime assertion macro */
+#define CV_ASSERT( Condition )                                          \
+{                                                                       \
+    if( !(Condition) )                                                  \
+        CV_ERROR( CV_StsInternal, "Assertion: " #Condition " failed" ); \
+}
+
+#define __CV_BEGIN__       {
+#define __CV_END__         goto exit; exit: ; }
+#define __CV_EXIT__        goto exit
+
+/** @} core_c */
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#ifdef __cplusplus
+
+//! @addtogroup core_c_glue
+//! @{
+
+//! class for automatic module/RTTI data registration/unregistration
+struct CV_EXPORTS CvType
+{
+    CvType( const char* type_name,
+            CvIsInstanceFunc is_instance, CvReleaseFunc release=0,
+            CvReadFunc read=0, CvWriteFunc write=0, CvCloneFunc clone=0 );
+    ~CvType();
+    CvTypeInfo* info;
+
+    static CvTypeInfo* first;
+    static CvTypeInfo* last;
+};
+
+//! @}
+
+#include "opencv2/core/utility.hpp"
+
+namespace cv
+{
+
+//! @addtogroup core_c_glue
+//! @{
+
+/////////////////////////////////////////// glue ///////////////////////////////////////////
+
+//! converts array (CvMat or IplImage) to cv::Mat
+CV_EXPORTS Mat cvarrToMat(const CvArr* arr, bool copyData=false,
+                          bool allowND=true, int coiMode=0,
+                          AutoBuffer<double>* buf=0);
+
+static inline Mat cvarrToMatND(const CvArr* arr, bool copyData=false, int coiMode=0)
+{
+    return cvarrToMat(arr, copyData, true, coiMode);
+}
+
+
+//! extracts Channel of Interest from CvMat or IplImage and makes cv::Mat out of it.
+CV_EXPORTS void extractImageCOI(const CvArr* arr, OutputArray coiimg, int coi=-1);
+//! inserts single-channel cv::Mat into a multi-channel CvMat or IplImage
+CV_EXPORTS void insertImageCOI(InputArray coiimg, CvArr* arr, int coi=-1);
+
+
+
+////// specialized implementations of DefaultDeleter::operator() for classic OpenCV types //////
+
+template<> CV_EXPORTS void DefaultDeleter<CvMat>::operator ()(CvMat* obj) const;
+template<> CV_EXPORTS void DefaultDeleter<IplImage>::operator ()(IplImage* obj) const;
+template<> CV_EXPORTS void DefaultDeleter<CvMatND>::operator ()(CvMatND* obj) const;
+template<> CV_EXPORTS void DefaultDeleter<CvSparseMat>::operator ()(CvSparseMat* obj) const;
+template<> CV_EXPORTS void DefaultDeleter<CvMemStorage>::operator ()(CvMemStorage* obj) const;
+
+////////////// convenient wrappers for operating old-style dynamic structures //////////////
+
+template<typename _Tp> class SeqIterator;
+
+typedef Ptr<CvMemStorage> MemStorage;
+
+/*!
+ Template Sequence Class derived from CvSeq
+
+ The class provides more convenient access to sequence elements,
+ STL-style operations and iterators.
+
+ \note The class is targeted for simple data types,
+    i.e. no constructors or destructors
+    are called for the sequence elements.
+*/
+template<typename _Tp> class Seq
+{
+public:
+    typedef SeqIterator<_Tp> iterator;
+    typedef SeqIterator<_Tp> const_iterator;
+
+    //! the default constructor
+    Seq();
+    //! the constructor for wrapping CvSeq structure. The real element type in CvSeq should match _Tp.
+    Seq(const CvSeq* seq);
+    //! creates the empty sequence that resides in the specified storage
+    Seq(MemStorage& storage, int headerSize = sizeof(CvSeq));
+    //! returns read-write reference to the specified element
+    _Tp& operator [](int idx);
+    //! returns read-only reference to the specified element
+    const _Tp& operator[](int idx) const;
+    //! returns iterator pointing to the beginning of the sequence
+    SeqIterator<_Tp> begin() const;
+    //! returns iterator pointing to the element following the last sequence element
+    SeqIterator<_Tp> end() const;
+    //! returns the number of elements in the sequence
+    size_t size() const;
+    //! returns the type of sequence elements (CV_8UC1 ... CV_64FC(CV_CN_MAX) ...)
+    int type() const;
+    //! returns the depth of sequence elements (CV_8U ... CV_64F)
+    int depth() const;
+    //! returns the number of channels in each sequence element
+    int channels() const;
+    //! returns the size of each sequence element
+    size_t elemSize() const;
+    //! returns index of the specified sequence element
+    size_t index(const _Tp& elem) const;
+    //! appends the specified element to the end of the sequence
+    void push_back(const _Tp& elem);
+    //! appends the specified element to the front of the sequence
+    void push_front(const _Tp& elem);
+    //! appends zero or more elements to the end of the sequence
+    void push_back(const _Tp* elems, size_t count);
+    //! appends zero or more elements to the front of the sequence
+    void push_front(const _Tp* elems, size_t count);
+    //! inserts the specified element to the specified position
+    void insert(int idx, const _Tp& elem);
+    //! inserts zero or more elements to the specified position
+    void insert(int idx, const _Tp* elems, size_t count);
+    //! removes element at the specified position
+    void remove(int idx);
+    //! removes the specified subsequence
+    void remove(const Range& r);
+
+    //! returns reference to the first sequence element
+    _Tp& front();
+    //! returns read-only reference to the first sequence element
+    const _Tp& front() const;
+    //! returns reference to the last sequence element
+    _Tp& back();
+    //! returns read-only reference to the last sequence element
+    const _Tp& back() const;
+    //! returns true iff the sequence contains no elements
+    bool empty() const;
+
+    //! removes all the elements from the sequence
+    void clear();
+    //! removes the first element from the sequence
+    void pop_front();
+    //! removes the last element from the sequence
+    void pop_back();
+    //! removes zero or more elements from the beginning of the sequence
+    void pop_front(_Tp* elems, size_t count);
+    //! removes zero or more elements from the end of the sequence
+    void pop_back(_Tp* elems, size_t count);
+
+    //! copies the whole sequence or the sequence slice to the specified vector
+    void copyTo(std::vector<_Tp>& vec, const Range& range=Range::all()) const;
+    //! returns the vector containing all the sequence elements
+    operator std::vector<_Tp>() const;
+
+    CvSeq* seq;
+};
+
+
+/*!
+ STL-style Sequence Iterator inherited from the CvSeqReader structure
+*/
+template<typename _Tp> class SeqIterator : public CvSeqReader
+{
+public:
+    //! the default constructor
+    SeqIterator();
+    //! the constructor setting the iterator to the beginning or to the end of the sequence
+    SeqIterator(const Seq<_Tp>& seq, bool seekEnd=false);
+    //! positions the iterator within the sequence
+    void seek(size_t pos);
+    //! reports the current iterator position
+    size_t tell() const;
+    //! returns reference to the current sequence element
+    _Tp& operator *();
+    //! returns read-only reference to the current sequence element
+    const _Tp& operator *() const;
+    //! moves iterator to the next sequence element
+    SeqIterator& operator ++();
+    //! moves iterator to the next sequence element
+    SeqIterator operator ++(int) const;
+    //! moves iterator to the previous sequence element
+    SeqIterator& operator --();
+    //! moves iterator to the previous sequence element
+    SeqIterator operator --(int) const;
+
+    //! moves iterator forward by the specified offset (possibly negative)
+    SeqIterator& operator +=(int);
+    //! moves iterator backward by the specified offset (possibly negative)
+    SeqIterator& operator -=(int);
+
+    // this is index of the current element module seq->total*2
+    // (to distinguish between 0 and seq->total)
+    int index;
+};
+
+
+
+// bridge C++ => C Seq API
+CV_EXPORTS schar*  seqPush( CvSeq* seq, const void* element=0);
+CV_EXPORTS schar*  seqPushFront( CvSeq* seq, const void* element=0);
+CV_EXPORTS void  seqPop( CvSeq* seq, void* element=0);
+CV_EXPORTS void  seqPopFront( CvSeq* seq, void* element=0);
+CV_EXPORTS void  seqPopMulti( CvSeq* seq, void* elements,
+                              int count, int in_front=0 );
+CV_EXPORTS void  seqRemove( CvSeq* seq, int index );
+CV_EXPORTS void  clearSeq( CvSeq* seq );
+CV_EXPORTS schar*  getSeqElem( const CvSeq* seq, int index );
+CV_EXPORTS void  seqRemoveSlice( CvSeq* seq, CvSlice slice );
+CV_EXPORTS void  seqInsertSlice( CvSeq* seq, int before_index, const CvArr* from_arr );
+
+template<typename _Tp> inline Seq<_Tp>::Seq() : seq(0) {}
+template<typename _Tp> inline Seq<_Tp>::Seq( const CvSeq* _seq ) : seq((CvSeq*)_seq)
+{
+    CV_Assert(!_seq || _seq->elem_size == sizeof(_Tp));
+}
+
+template<typename _Tp> inline Seq<_Tp>::Seq( MemStorage& storage,
+                                             int headerSize )
+{
+    CV_Assert(headerSize >= (int)sizeof(CvSeq));
+    seq = cvCreateSeq(DataType<_Tp>::type, headerSize, sizeof(_Tp), storage);
+}
+
+template<typename _Tp> inline _Tp& Seq<_Tp>::operator [](int idx)
+{ return *(_Tp*)getSeqElem(seq, idx); }
+
+template<typename _Tp> inline const _Tp& Seq<_Tp>::operator [](int idx) const
+{ return *(_Tp*)getSeqElem(seq, idx); }
+
+template<typename _Tp> inline SeqIterator<_Tp> Seq<_Tp>::begin() const
+{ return SeqIterator<_Tp>(*this); }
+
+template<typename _Tp> inline SeqIterator<_Tp> Seq<_Tp>::end() const
+{ return SeqIterator<_Tp>(*this, true); }
+
+template<typename _Tp> inline size_t Seq<_Tp>::size() const
+{ return seq ? seq->total : 0; }
+
+template<typename _Tp> inline int Seq<_Tp>::type() const
+{ return seq ? CV_MAT_TYPE(seq->flags) : 0; }
+
+template<typename _Tp> inline int Seq<_Tp>::depth() const
+{ return seq ? CV_MAT_DEPTH(seq->flags) : 0; }
+
+template<typename _Tp> inline int Seq<_Tp>::channels() const
+{ return seq ? CV_MAT_CN(seq->flags) : 0; }
+
+template<typename _Tp> inline size_t Seq<_Tp>::elemSize() const
+{ return seq ? seq->elem_size : 0; }
+
+template<typename _Tp> inline size_t Seq<_Tp>::index(const _Tp& elem) const
+{ return cvSeqElemIdx(seq, &elem); }
+
+template<typename _Tp> inline void Seq<_Tp>::push_back(const _Tp& elem)
+{ cvSeqPush(seq, &elem); }
+
+template<typename _Tp> inline void Seq<_Tp>::push_front(const _Tp& elem)
+{ cvSeqPushFront(seq, &elem); }
+
+template<typename _Tp> inline void Seq<_Tp>::push_back(const _Tp* elem, size_t count)
+{ cvSeqPushMulti(seq, elem, (int)count, 0); }
+
+template<typename _Tp> inline void Seq<_Tp>::push_front(const _Tp* elem, size_t count)
+{ cvSeqPushMulti(seq, elem, (int)count, 1); }
+
+template<typename _Tp> inline _Tp& Seq<_Tp>::back()
+{ return *(_Tp*)getSeqElem(seq, -1); }
+
+template<typename _Tp> inline const _Tp& Seq<_Tp>::back() const
+{ return *(const _Tp*)getSeqElem(seq, -1); }
+
+template<typename _Tp> inline _Tp& Seq<_Tp>::front()
+{ return *(_Tp*)getSeqElem(seq, 0); }
+
+template<typename _Tp> inline const _Tp& Seq<_Tp>::front() const
+{ return *(const _Tp*)getSeqElem(seq, 0); }
+
+template<typename _Tp> inline bool Seq<_Tp>::empty() const
+{ return !seq || seq->total == 0; }
+
+template<typename _Tp> inline void Seq<_Tp>::clear()
+{ if(seq) clearSeq(seq); }
+
+template<typename _Tp> inline void Seq<_Tp>::pop_back()
+{ seqPop(seq); }
+
+template<typename _Tp> inline void Seq<_Tp>::pop_front()
+{ seqPopFront(seq); }
+
+template<typename _Tp> inline void Seq<_Tp>::pop_back(_Tp* elem, size_t count)
+{ seqPopMulti(seq, elem, (int)count, 0); }
+
+template<typename _Tp> inline void Seq<_Tp>::pop_front(_Tp* elem, size_t count)
+{ seqPopMulti(seq, elem, (int)count, 1); }
+
+template<typename _Tp> inline void Seq<_Tp>::insert(int idx, const _Tp& elem)
+{ seqInsert(seq, idx, &elem); }
+
+template<typename _Tp> inline void Seq<_Tp>::insert(int idx, const _Tp* elems, size_t count)
+{
+    CvMat m = cvMat(1, count, DataType<_Tp>::type, elems);
+    seqInsertSlice(seq, idx, &m);
+}
+
+template<typename _Tp> inline void Seq<_Tp>::remove(int idx)
+{ seqRemove(seq, idx); }
+
+template<typename _Tp> inline void Seq<_Tp>::remove(const Range& r)
+{ seqRemoveSlice(seq, cvSlice(r.start, r.end)); }
+
+template<typename _Tp> inline void Seq<_Tp>::copyTo(std::vector<_Tp>& vec, const Range& range) const
+{
+    size_t len = !seq ? 0 : range == Range::all() ? seq->total : range.end - range.start;
+    vec.resize(len);
+    if( seq && len )
+        cvCvtSeqToArray(seq, &vec[0], range);
+}
+
+template<typename _Tp> inline Seq<_Tp>::operator std::vector<_Tp>() const
+{
+    std::vector<_Tp> vec;
+    copyTo(vec);
+    return vec;
+}
+
+template<typename _Tp> inline SeqIterator<_Tp>::SeqIterator()
+{ memset(this, 0, sizeof(*this)); }
+
+template<typename _Tp> inline SeqIterator<_Tp>::SeqIterator(const Seq<_Tp>& _seq, bool seekEnd)
+{
+    cvStartReadSeq(_seq.seq, this);
+    index = seekEnd ? _seq.seq->total : 0;
+}
+
+template<typename _Tp> inline void SeqIterator<_Tp>::seek(size_t pos)
+{
+    cvSetSeqReaderPos(this, (int)pos, false);
+    index = pos;
+}
+
+template<typename _Tp> inline size_t SeqIterator<_Tp>::tell() const
+{ return index; }
+
+template<typename _Tp> inline _Tp& SeqIterator<_Tp>::operator *()
+{ return *(_Tp*)ptr; }
+
+template<typename _Tp> inline const _Tp& SeqIterator<_Tp>::operator *() const
+{ return *(const _Tp*)ptr; }
+
+template<typename _Tp> inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator ++()
+{
+    CV_NEXT_SEQ_ELEM(sizeof(_Tp), *this);
+    if( ++index >= seq->total*2 )
+        index = 0;
+    return *this;
+}
+
+template<typename _Tp> inline SeqIterator<_Tp> SeqIterator<_Tp>::operator ++(int) const
+{
+    SeqIterator<_Tp> it = *this;
+    ++*this;
+    return it;
+}
+
+template<typename _Tp> inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator --()
+{
+    CV_PREV_SEQ_ELEM(sizeof(_Tp), *this);
+    if( --index < 0 )
+        index = seq->total*2-1;
+    return *this;
+}
+
+template<typename _Tp> inline SeqIterator<_Tp> SeqIterator<_Tp>::operator --(int) const
+{
+    SeqIterator<_Tp> it = *this;
+    --*this;
+    return it;
+}
+
+template<typename _Tp> inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator +=(int delta)
+{
+    cvSetSeqReaderPos(this, delta, 1);
+    index += delta;
+    int n = seq->total*2;
+    if( index < 0 )
+        index += n;
+    if( index >= n )
+        index -= n;
+    return *this;
+}
+
+template<typename _Tp> inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator -=(int delta)
+{
+    return (*this += -delta);
+}
+
+template<typename _Tp> inline ptrdiff_t operator - (const SeqIterator<_Tp>& a,
+                                                    const SeqIterator<_Tp>& b)
+{
+    ptrdiff_t delta = a.index - b.index, n = a.seq->total;
+    if( delta > n || delta < -n )
+        delta += delta < 0 ? n : -n;
+    return delta;
+}
+
+template<typename _Tp> inline bool operator == (const SeqIterator<_Tp>& a,
+                                                const SeqIterator<_Tp>& b)
+{
+    return a.seq == b.seq && a.index == b.index;
+}
+
+template<typename _Tp> inline bool operator != (const SeqIterator<_Tp>& a,
+                                                const SeqIterator<_Tp>& b)
+{
+    return !(a == b);
+}
+
+//! @}
+
+} // cv
+
+#endif
+
+#endif
diff --git a/thirdparty/linux/include/opencv2/core/cuda.hpp b/thirdparty/linux/include/opencv2/core/cuda.hpp
new file mode 100644
index 0000000..c538392
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda.hpp
@@ -0,0 +1,874 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CUDA_HPP
+#define OPENCV_CORE_CUDA_HPP
+
+#ifndef __cplusplus
+#  error cuda.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/cuda_types.hpp"
+
+/**
+  @defgroup cuda CUDA-accelerated Computer Vision
+  @{
+    @defgroup cudacore Core part
+    @{
+      @defgroup cudacore_init Initalization and Information
+      @defgroup cudacore_struct Data Structures
+    @}
+  @}
+ */
+
+namespace cv { namespace cuda {
+
+//! @addtogroup cudacore_struct
+//! @{
+
+//===================================================================================
+// GpuMat
+//===================================================================================
+
+/** @brief Base storage class for GPU memory with reference counting.
+
+Its interface matches the Mat interface with the following limitations:
+
+-   no arbitrary dimensions support (only 2D)
+-   no functions that return references to their data (because references on GPU are not valid for
+    CPU)
+-   no expression templates technique support
+
+Beware that the latter limitation may lead to overloaded matrix operators that cause memory
+allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
+passed directly to the kernel.
+
+@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
+aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
+
+@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
+on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
+release function returns error if the CUDA context has been destroyed before.
+
+@sa Mat
+ */
+class CV_EXPORTS GpuMat
+{
+public:
+    class CV_EXPORTS Allocator
+    {
+    public:
+        virtual ~Allocator() {}
+
+        // allocator must fill data, step and refcount fields
+        virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
+        virtual void free(GpuMat* mat) = 0;
+    };
+
+    //! default allocator
+    static Allocator* defaultAllocator();
+    static void setDefaultAllocator(Allocator* allocator);
+
+    //! default constructor
+    explicit GpuMat(Allocator* allocator = defaultAllocator());
+
+    //! constructs GpuMat of the specified size and type
+    GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
+    GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
+
+    //! constucts GpuMat and fills it with the specified value _s
+    GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
+    GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());
+
+    //! copy constructor
+    GpuMat(const GpuMat& m);
+
+    //! constructor for GpuMat headers pointing to user-allocated data
+    GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
+    GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
+
+    //! creates a GpuMat header for a part of the bigger matrix
+    GpuMat(const GpuMat& m, Range rowRange, Range colRange);
+    GpuMat(const GpuMat& m, Rect roi);
+
+    //! builds GpuMat from host memory (Blocking call)
+    explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());
+
+    //! destructor - calls release()
+    ~GpuMat();
+
+    //! assignment operators
+    GpuMat& operator =(const GpuMat& m);
+
+    //! allocates new GpuMat data unless the GpuMat already has specified size and type
+    void create(int rows, int cols, int type);
+    void create(Size size, int type);
+
+    //! decreases reference counter, deallocate the data when reference counter reaches 0
+    void release();
+
+    //! swaps with other smart pointer
+    void swap(GpuMat& mat);
+
+    //! pefroms upload data to GpuMat (Blocking call)
+    void upload(InputArray arr);
+
+    //! pefroms upload data to GpuMat (Non-Blocking call)
+    void upload(InputArray arr, Stream& stream);
+
+    //! pefroms download data from device to host memory (Blocking call)
+    void download(OutputArray dst) const;
+
+    //! pefroms download data from device to host memory (Non-Blocking call)
+    void download(OutputArray dst, Stream& stream) const;
+
+    //! returns deep copy of the GpuMat, i.e. the data is copied
+    GpuMat clone() const;
+
+    //! copies the GpuMat content to device memory (Blocking call)
+    void copyTo(OutputArray dst) const;
+
+    //! copies the GpuMat content to device memory (Non-Blocking call)
+    void copyTo(OutputArray dst, Stream& stream) const;
+
+    //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
+    void copyTo(OutputArray dst, InputArray mask) const;
+
+    //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
+    void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
+
+    //! sets some of the GpuMat elements to s (Blocking call)
+    GpuMat& setTo(Scalar s);
+
+    //! sets some of the GpuMat elements to s (Non-Blocking call)
+    GpuMat& setTo(Scalar s, Stream& stream);
+
+    //! sets some of the GpuMat elements to s, according to the mask (Blocking call)
+    GpuMat& setTo(Scalar s, InputArray mask);
+
+    //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
+    GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
+
+    //! converts GpuMat to another datatype (Blocking call)
+    void convertTo(OutputArray dst, int rtype) const;
+
+    //! converts GpuMat to another datatype (Non-Blocking call)
+    void convertTo(OutputArray dst, int rtype, Stream& stream) const;
+
+    //! converts GpuMat to another datatype with scaling (Blocking call)
+    void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
+
+    //! converts GpuMat to another datatype with scaling (Non-Blocking call)
+    void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
+
+    //! converts GpuMat to another datatype with scaling (Non-Blocking call)
+    void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
+
+    void assignTo(GpuMat& m, int type=-1) const;
+
+    //! returns pointer to y-th row
+    uchar* ptr(int y = 0);
+    const uchar* ptr(int y = 0) const;
+
+    //! template version of the above method
+    template<typename _Tp> _Tp* ptr(int y = 0);
+    template<typename _Tp> const _Tp* ptr(int y = 0) const;
+
+    template <typename _Tp> operator PtrStepSz<_Tp>() const;
+    template <typename _Tp> operator PtrStep<_Tp>() const;
+
+    //! returns a new GpuMat header for the specified row
+    GpuMat row(int y) const;
+
+    //! returns a new GpuMat header for the specified column
+    GpuMat col(int x) const;
+
+    //! ... for the specified row span
+    GpuMat rowRange(int startrow, int endrow) const;
+    GpuMat rowRange(Range r) const;
+
+    //! ... for the specified column span
+    GpuMat colRange(int startcol, int endcol) const;
+    GpuMat colRange(Range r) const;
+
+    //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
+    GpuMat operator ()(Range rowRange, Range colRange) const;
+    GpuMat operator ()(Rect roi) const;
+
+    //! creates alternative GpuMat header for the same data, with different
+    //! number of channels and/or different number of rows
+    GpuMat reshape(int cn, int rows = 0) const;
+
+    //! locates GpuMat header within a parent GpuMat
+    void locateROI(Size& wholeSize, Point& ofs) const;
+
+    //! moves/resizes the current GpuMat ROI inside the parent GpuMat
+    GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
+
+    //! returns true iff the GpuMat data is continuous
+    //! (i.e. when there are no gaps between successive rows)
+    bool isContinuous() const;
+
+    //! returns element size in bytes
+    size_t elemSize() const;
+
+    //! returns the size of element channel in bytes
+    size_t elemSize1() const;
+
+    //! returns element type
+    int type() const;
+
+    //! returns element type
+    int depth() const;
+
+    //! returns number of channels
+    int channels() const;
+
+    //! returns step/elemSize1()
+    size_t step1() const;
+
+    //! returns GpuMat size : width == number of columns, height == number of rows
+    Size size() const;
+
+    //! returns true if GpuMat data is NULL
+    bool empty() const;
+
+    /*! includes several bit-fields:
+    - the magic signature
+    - continuity flag
+    - depth
+    - number of channels
+    */
+    int flags;
+
+    //! the number of rows and columns
+    int rows, cols;
+
+    //! a distance between successive rows in bytes; includes the gap if any
+    size_t step;
+
+    //! pointer to the data
+    uchar* data;
+
+    //! pointer to the reference counter;
+    //! when GpuMat points to user-allocated data, the pointer is NULL
+    int* refcount;
+
+    //! helper fields used in locateROI and adjustROI
+    uchar* datastart;
+    const uchar* dataend;
+
+    //! allocator
+    Allocator* allocator;
+};
+
+/** @brief Creates a continuous matrix.
+
+@param rows Row count.
+@param cols Column count.
+@param type Type of the matrix.
+@param arr Destination matrix. This parameter changes only if it has a proper type and area (
+\f$\texttt{rows} \times \texttt{cols}\f$ ).
+
+Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
+end of each row.
+ */
+CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
+
+/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
+
+@param rows Minimum desired number of rows.
+@param cols Minimum desired number of columns.
+@param type Desired matrix type.
+@param arr Destination matrix.
+
+The function does not reallocate memory if the matrix has proper attributes already.
+ */
+CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
+
+//! BufferPool management (must be called before Stream creation)
+CV_EXPORTS void setBufferPoolUsage(bool on);
+CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
+
+//===================================================================================
+// HostMem
+//===================================================================================
+
+/** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
+
+Its interface is also Mat-like but with additional memory type parameters.
+
+-   **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
+    uploading/downloading data from/to GPU.
+-   **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
+    address space, if supported.
+-   **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
+    used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
+    utilization.
+
+@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
+Pinned Memory APIs* document or *CUDA C Programming Guide*.
+ */
+class CV_EXPORTS HostMem
+{
+public:
+    enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
+
+    static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED);
+
+    explicit HostMem(AllocType alloc_type = PAGE_LOCKED);
+
+    HostMem(const HostMem& m);
+
+    HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED);
+    HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED);
+
+    //! creates from host memory with coping data
+    explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED);
+
+    ~HostMem();
+
+    HostMem& operator =(const HostMem& m);
+
+    //! swaps with other smart pointer
+    void swap(HostMem& b);
+
+    //! returns deep copy of the matrix, i.e. the data is copied
+    HostMem clone() const;
+
+    //! allocates new matrix data unless the matrix already has specified size and type.
+    void create(int rows, int cols, int type);
+    void create(Size size, int type);
+
+    //! creates alternative HostMem header for the same data, with different
+    //! number of channels and/or different number of rows
+    HostMem reshape(int cn, int rows = 0) const;
+
+    //! decrements reference counter and released memory if needed.
+    void release();
+
+    //! returns matrix header with disabled reference counting for HostMem data.
+    Mat createMatHeader() const;
+
+    /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
+    for it.
+
+    This can be done only if memory was allocated with the SHARED flag and if it is supported by the
+    hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
+    eliminates an extra copy.
+     */
+    GpuMat createGpuMatHeader() const;
+
+    // Please see cv::Mat for descriptions
+    bool isContinuous() const;
+    size_t elemSize() const;
+    size_t elemSize1() const;
+    int type() const;
+    int depth() const;
+    int channels() const;
+    size_t step1() const;
+    Size size() const;
+    bool empty() const;
+
+    // Please see cv::Mat for descriptions
+    int flags;
+    int rows, cols;
+    size_t step;
+
+    uchar* data;
+    int* refcount;
+
+    uchar* datastart;
+    const uchar* dataend;
+
+    AllocType alloc_type;
+};
+
+/** @brief Page-locks the memory of matrix and maps it for the device(s).
+
+@param m Input matrix.
+ */
+CV_EXPORTS void registerPageLocked(Mat& m);
+
+/** @brief Unmaps the memory of matrix and makes it pageable again.
+
+@param m Input matrix.
+ */
+CV_EXPORTS void unregisterPageLocked(Mat& m);
+
+//===================================================================================
+// Stream
+//===================================================================================
+
+/** @brief This class encapsulates a queue of asynchronous calls.
+
+@note Currently, you may face problems if an operation is enqueued twice with different data. Some
+functions use the constant GPU memory, and next call may update the memory before the previous one
+has been finished. But calling different operations asynchronously is safe because each operation
+has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
+also safe.
+
+@note The Stream class is not thread-safe. Please use different Stream objects for different CPU threads.
+
+@code
+void thread1()
+{
+    cv::cuda::Stream stream1;
+    cv::cuda::func1(..., stream1);
+}
+
+void thread2()
+{
+    cv::cuda::Stream stream2;
+    cv::cuda::func2(..., stream2);
+}
+@endcode
+
+@note By default all CUDA routines are launched in Stream::Null() object, if the stream is not specified by user.
+In multi-threading environment the stream objects must be passed explicitly (see previous note).
+ */
+class CV_EXPORTS Stream
+{
+    typedef void (Stream::*bool_type)() const;
+    void this_type_does_not_support_comparisons() const {}
+
+public:
+    typedef void (*StreamCallback)(int status, void* userData);
+
+    //! creates a new asynchronous stream
+    Stream();
+
+    /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
+    */
+    bool queryIfComplete() const;
+
+    /** @brief Blocks the current CPU thread until all operations in the stream are complete.
+    */
+    void waitForCompletion();
+
+    /** @brief Makes a compute stream wait on an event.
+    */
+    void waitEvent(const Event& event);
+
+    /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
+    completed.
+
+    @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
+    that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
+    Callbacks without a mandated order (in independent streams) execute in undefined order and may be
+    serialized.
+     */
+    void enqueueHostCallback(StreamCallback callback, void* userData);
+
+    //! return Stream object for default CUDA stream
+    static Stream& Null();
+
+    //! returns true if stream object is not default (!= 0)
+    operator bool_type() const;
+
+    class Impl;
+
+private:
+    Ptr<Impl> impl_;
+    Stream(const Ptr<Impl>& impl);
+
+    friend struct StreamAccessor;
+    friend class BufferPool;
+    friend class DefaultDeviceInitializer;
+};
+
+class CV_EXPORTS Event
+{
+public:
+    enum CreateFlags
+    {
+        DEFAULT        = 0x00,  /**< Default event flag */
+        BLOCKING_SYNC  = 0x01,  /**< Event uses blocking synchronization */
+        DISABLE_TIMING = 0x02,  /**< Event will not record timing data */
+        INTERPROCESS   = 0x04   /**< Event is suitable for interprocess use. DisableTiming must be set */
+    };
+
+    explicit Event(CreateFlags flags = DEFAULT);
+
+    //! records an event
+    void record(Stream& stream = Stream::Null());
+
+    //! queries an event's status
+    bool queryIfComplete() const;
+
+    //! waits for an event to complete
+    void waitForCompletion();
+
+    //! computes the elapsed time between events
+    static float elapsedTime(const Event& start, const Event& end);
+
+    class Impl;
+
+private:
+    Ptr<Impl> impl_;
+    Event(const Ptr<Impl>& impl);
+
+    friend struct EventAccessor;
+};
+
+//! @} cudacore_struct
+
+//===================================================================================
+// Initialization & Info
+//===================================================================================
+
+//! @addtogroup cudacore_init
+//! @{
+
+/** @brief Returns the number of installed CUDA-enabled devices.
+
+Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
+this function returns 0.
+ */
+CV_EXPORTS int getCudaEnabledDeviceCount();
+
+/** @brief Sets a device and initializes it for the current thread.
+
+@param device System index of a CUDA device starting with 0.
+
+If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
+ */
+CV_EXPORTS void setDevice(int device);
+
+/** @brief Returns the current device index set by cuda::setDevice or initialized by default.
+ */
+CV_EXPORTS int getDevice();
+
+/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
+process.
+
+Any subsequent API call to this device will reinitialize the device.
+ */
+CV_EXPORTS void resetDevice();
+
+/** @brief Enumeration providing CUDA computing features.
+ */
+enum FeatureSet
+{
+    FEATURE_SET_COMPUTE_10 = 10,
+    FEATURE_SET_COMPUTE_11 = 11,
+    FEATURE_SET_COMPUTE_12 = 12,
+    FEATURE_SET_COMPUTE_13 = 13,
+    FEATURE_SET_COMPUTE_20 = 20,
+    FEATURE_SET_COMPUTE_21 = 21,
+    FEATURE_SET_COMPUTE_30 = 30,
+    FEATURE_SET_COMPUTE_32 = 32,
+    FEATURE_SET_COMPUTE_35 = 35,
+    FEATURE_SET_COMPUTE_50 = 50,
+
+    GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
+    SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
+    NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
+    WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
+    DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
+};
+
+//! checks whether current device supports the given feature
+CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
+
+/** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
+built for.
+
+According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
+capability can always be compiled to binary code of greater or equal compute capability".
+ */
+class CV_EXPORTS TargetArchs
+{
+public:
+    /** @brief The following method checks whether the module was built with the support of the given feature:
+
+    @param feature_set Features to be checked. See :ocvcuda::FeatureSet.
+     */
+    static bool builtWith(FeatureSet feature_set);
+
+    /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
+    code for the given architecture(s):
+
+    @param major Major compute capability version.
+    @param minor Minor compute capability version.
+     */
+    static bool has(int major, int minor);
+    static bool hasPtx(int major, int minor);
+    static bool hasBin(int major, int minor);
+
+    static bool hasEqualOrLessPtx(int major, int minor);
+    static bool hasEqualOrGreater(int major, int minor);
+    static bool hasEqualOrGreaterPtx(int major, int minor);
+    static bool hasEqualOrGreaterBin(int major, int minor);
+};
+
+/** @brief Class providing functionality for querying the specified GPU properties.
+ */
+class CV_EXPORTS DeviceInfo
+{
+public:
+    //! creates DeviceInfo object for the current GPU
+    DeviceInfo();
+
+    /** @brief The constructors.
+
+    @param device_id System index of the CUDA device starting with 0.
+
+    Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
+    constructs an object for the current device.
+     */
+    DeviceInfo(int device_id);
+
+    /** @brief Returns system index of the CUDA device starting with 0.
+    */
+    int deviceID() const;
+
+    //! ASCII string identifying device
+    const char* name() const;
+
+    //! global memory available on device in bytes
+    size_t totalGlobalMem() const;
+
+    //! shared memory available per block in bytes
+    size_t sharedMemPerBlock() const;
+
+    //! 32-bit registers available per block
+    int regsPerBlock() const;
+
+    //! warp size in threads
+    int warpSize() const;
+
+    //! maximum pitch in bytes allowed by memory copies
+    size_t memPitch() const;
+
+    //! maximum number of threads per block
+    int maxThreadsPerBlock() const;
+
+    //! maximum size of each dimension of a block
+    Vec3i maxThreadsDim() const;
+
+    //! maximum size of each dimension of a grid
+    Vec3i maxGridSize() const;
+
+    //! clock frequency in kilohertz
+    int clockRate() const;
+
+    //! constant memory available on device in bytes
+    size_t totalConstMem() const;
+
+    //! major compute capability
+    int majorVersion() const;
+
+    //! minor compute capability
+    int minorVersion() const;
+
+    //! alignment requirement for textures
+    size_t textureAlignment() const;
+
+    //! pitch alignment requirement for texture references bound to pitched memory
+    size_t texturePitchAlignment() const;
+
+    //! number of multiprocessors on device
+    int multiProcessorCount() const;
+
+    //! specified whether there is a run time limit on kernels
+    bool kernelExecTimeoutEnabled() const;
+
+    //! device is integrated as opposed to discrete
+    bool integrated() const;
+
+    //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
+    bool canMapHostMemory() const;
+
+    enum ComputeMode
+    {
+        ComputeModeDefault,         /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */
+        ComputeModeExclusive,       /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */
+        ComputeModeProhibited,      /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */
+        ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */
+    };
+
+    //! compute mode
+    ComputeMode computeMode() const;
+
+    //! maximum 1D texture size
+    int maxTexture1D() const;
+
+    //! maximum 1D mipmapped texture size
+    int maxTexture1DMipmap() const;
+
+    //! maximum size for 1D textures bound to linear memory
+    int maxTexture1DLinear() const;
+
+    //! maximum 2D texture dimensions
+    Vec2i maxTexture2D() const;
+
+    //! maximum 2D mipmapped texture dimensions
+    Vec2i maxTexture2DMipmap() const;
+
+    //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
+    Vec3i maxTexture2DLinear() const;
+
+    //! maximum 2D texture dimensions if texture gather operations have to be performed
+    Vec2i maxTexture2DGather() const;
+
+    //! maximum 3D texture dimensions
+    Vec3i maxTexture3D() const;
+
+    //! maximum Cubemap texture dimensions
+    int maxTextureCubemap() const;
+
+    //! maximum 1D layered texture dimensions
+    Vec2i maxTexture1DLayered() const;
+
+    //! maximum 2D layered texture dimensions
+    Vec3i maxTexture2DLayered() const;
+
+    //! maximum Cubemap layered texture dimensions
+    Vec2i maxTextureCubemapLayered() const;
+
+    //! maximum 1D surface size
+    int maxSurface1D() const;
+
+    //! maximum 2D surface dimensions
+    Vec2i maxSurface2D() const;
+
+    //! maximum 3D surface dimensions
+    Vec3i maxSurface3D() const;
+
+    //! maximum 1D layered surface dimensions
+    Vec2i maxSurface1DLayered() const;
+
+    //! maximum 2D layered surface dimensions
+    Vec3i maxSurface2DLayered() const;
+
+    //! maximum Cubemap surface dimensions
+    int maxSurfaceCubemap() const;
+
+    //! maximum Cubemap layered surface dimensions
+    Vec2i maxSurfaceCubemapLayered() const;
+
+    //! alignment requirements for surfaces
+    size_t surfaceAlignment() const;
+
+    //! device can possibly execute multiple kernels concurrently
+    bool concurrentKernels() const;
+
+    //! device has ECC support enabled
+    bool ECCEnabled() const;
+
+    //! PCI bus ID of the device
+    int pciBusID() const;
+
+    //! PCI device ID of the device
+    int pciDeviceID() const;
+
+    //! PCI domain ID of the device
+    int pciDomainID() const;
+
+    //! true if device is a Tesla device using TCC driver, false otherwise
+    bool tccDriver() const;
+
+    //! number of asynchronous engines
+    int asyncEngineCount() const;
+
+    //! device shares a unified address space with the host
+    bool unifiedAddressing() const;
+
+    //! peak memory clock frequency in kilohertz
+    int memoryClockRate() const;
+
+    //! global memory bus width in bits
+    int memoryBusWidth() const;
+
+    //! size of L2 cache in bytes
+    int l2CacheSize() const;
+
+    //! maximum resident threads per multiprocessor
+    int maxThreadsPerMultiProcessor() const;
+
+    //! gets free and total device memory
+    void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
+    size_t freeMemory() const;
+    size_t totalMemory() const;
+
+    /** @brief Provides information on CUDA feature support.
+
+    @param feature_set Features to be checked. See cuda::FeatureSet.
+
+    This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
+     */
+    bool supports(FeatureSet feature_set) const;
+
+    /** @brief Checks the CUDA module and device compatibility.
+
+    This function returns true if the CUDA module can be run on the specified device. Otherwise, it
+    returns false .
+     */
+    bool isCompatible() const;
+
+private:
+    int device_id_;
+};
+
+CV_EXPORTS void printCudaDeviceInfo(int device);
+CV_EXPORTS void printShortCudaDeviceInfo(int device);
+
+/** @brief Converts an array to half precision floating number.
+
+@param _src input array.
+@param _dst output array.
+@param stream Stream for the asynchronous version.
+@sa convertFp16
+*/
+CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());
+
+//! @} cudacore_init
+
+}} // namespace cv { namespace cuda {
+
+
+#include "opencv2/core/cuda.inl.hpp"
+
+#endif /* OPENCV_CORE_CUDA_HPP */
diff --git a/thirdparty/linux/include/opencv2/core/cuda.inl.hpp b/thirdparty/linux/include/opencv2/core/cuda.inl.hpp
new file mode 100644
index 0000000..35ae2e4
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda.inl.hpp
@@ -0,0 +1,631 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CUDAINL_HPP
+#define OPENCV_CORE_CUDAINL_HPP
+
+#include "opencv2/core/cuda.hpp"
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda {
+
+//===================================================================================
+// GpuMat
+//===================================================================================
+
+inline
+GpuMat::GpuMat(Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{}
+
+inline
+GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{
+    if (rows_ > 0 && cols_ > 0)
+        create(rows_, cols_, type_);
+}
+
+inline
+GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{
+    if (size_.height > 0 && size_.width > 0)
+        create(size_.height, size_.width, type_);
+}
+
+inline
+GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{
+    if (rows_ > 0 && cols_ > 0)
+    {
+        create(rows_, cols_, type_);
+        setTo(s_);
+    }
+}
+
+inline
+GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{
+    if (size_.height > 0 && size_.width > 0)
+    {
+        create(size_.height, size_.width, type_);
+        setTo(s_);
+    }
+}
+
+inline
+GpuMat::GpuMat(const GpuMat& m)
+    : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator)
+{
+    if (refcount)
+        CV_XADD(refcount, 1);
+}
+
+inline
+GpuMat::GpuMat(InputArray arr, Allocator* allocator_) :
+    flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{
+    upload(arr);
+}
+
+inline
+GpuMat::~GpuMat()
+{
+    release();
+}
+
+inline
+GpuMat& GpuMat::operator =(const GpuMat& m)
+{
+    if (this != &m)
+    {
+        GpuMat temp(m);
+        swap(temp);
+    }
+
+    return *this;
+}
+
+inline
+void GpuMat::create(Size size_, int type_)
+{
+    create(size_.height, size_.width, type_);
+}
+
+inline
+void GpuMat::swap(GpuMat& b)
+{
+    std::swap(flags, b.flags);
+    std::swap(rows, b.rows);
+    std::swap(cols, b.cols);
+    std::swap(step, b.step);
+    std::swap(data, b.data);
+    std::swap(datastart, b.datastart);
+    std::swap(dataend, b.dataend);
+    std::swap(refcount, b.refcount);
+    std::swap(allocator, b.allocator);
+}
+
+inline
+GpuMat GpuMat::clone() const
+{
+    GpuMat m;
+    copyTo(m);
+    return m;
+}
+
+inline
+void GpuMat::copyTo(OutputArray dst, InputArray mask) const
+{
+    copyTo(dst, mask, Stream::Null());
+}
+
+inline
+GpuMat& GpuMat::setTo(Scalar s)
+{
+    return setTo(s, Stream::Null());
+}
+
+inline
+GpuMat& GpuMat::setTo(Scalar s, InputArray mask)
+{
+    return setTo(s, mask, Stream::Null());
+}
+
+inline
+void GpuMat::convertTo(OutputArray dst, int rtype) const
+{
+    convertTo(dst, rtype, Stream::Null());
+}
+
+inline
+void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, double beta) const
+{
+    convertTo(dst, rtype, alpha, beta, Stream::Null());
+}
+
+inline
+void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const
+{
+    convertTo(dst, rtype, alpha, 0.0, stream);
+}
+
+inline
+void GpuMat::assignTo(GpuMat& m, int _type) const
+{
+    if (_type < 0)
+        m = *this;
+    else
+        convertTo(m, _type);
+}
+
+inline
+uchar* GpuMat::ptr(int y)
+{
+    CV_DbgAssert( (unsigned)y < (unsigned)rows );
+    return data + step * y;
+}
+
+inline
+const uchar* GpuMat::ptr(int y) const
+{
+    CV_DbgAssert( (unsigned)y < (unsigned)rows );
+    return data + step * y;
+}
+
+template<typename _Tp> inline
+_Tp* GpuMat::ptr(int y)
+{
+    return (_Tp*)ptr(y);
+}
+
+template<typename _Tp> inline
+const _Tp* GpuMat::ptr(int y) const
+{
+    return (const _Tp*)ptr(y);
+}
+
+template <class T> inline
+GpuMat::operator PtrStepSz<T>() const
+{
+    return PtrStepSz<T>(rows, cols, (T*)data, step);
+}
+
+template <class T> inline
+GpuMat::operator PtrStep<T>() const
+{
+    return PtrStep<T>((T*)data, step);
+}
+
+inline
+GpuMat GpuMat::row(int y) const
+{
+    return GpuMat(*this, Range(y, y+1), Range::all());
+}
+
+inline
+GpuMat GpuMat::col(int x) const
+{
+    return GpuMat(*this, Range::all(), Range(x, x+1));
+}
+
+inline
+GpuMat GpuMat::rowRange(int startrow, int endrow) const
+{
+    return GpuMat(*this, Range(startrow, endrow), Range::all());
+}
+
+inline
+GpuMat GpuMat::rowRange(Range r) const
+{
+    return GpuMat(*this, r, Range::all());
+}
+
+inline
+GpuMat GpuMat::colRange(int startcol, int endcol) const
+{
+    return GpuMat(*this, Range::all(), Range(startcol, endcol));
+}
+
+inline
+GpuMat GpuMat::colRange(Range r) const
+{
+    return GpuMat(*this, Range::all(), r);
+}
+
+inline
+GpuMat GpuMat::operator ()(Range rowRange_, Range colRange_) const
+{
+    return GpuMat(*this, rowRange_, colRange_);
+}
+
+inline
+GpuMat GpuMat::operator ()(Rect roi) const
+{
+    return GpuMat(*this, roi);
+}
+
+inline
+bool GpuMat::isContinuous() const
+{
+    return (flags & Mat::CONTINUOUS_FLAG) != 0;
+}
+
+inline
+size_t GpuMat::elemSize() const
+{
+    return CV_ELEM_SIZE(flags);
+}
+
+inline
+size_t GpuMat::elemSize1() const
+{
+    return CV_ELEM_SIZE1(flags);
+}
+
+inline
+int GpuMat::type() const
+{
+    return CV_MAT_TYPE(flags);
+}
+
+inline
+int GpuMat::depth() const
+{
+    return CV_MAT_DEPTH(flags);
+}
+
+inline
+int GpuMat::channels() const
+{
+    return CV_MAT_CN(flags);
+}
+
+inline
+size_t GpuMat::step1() const
+{
+    return step / elemSize1();
+}
+
+inline
+Size GpuMat::size() const
+{
+    return Size(cols, rows);
+}
+
+inline
+bool GpuMat::empty() const
+{
+    return data == 0;
+}
+
+static inline
+GpuMat createContinuous(int rows, int cols, int type)
+{
+    GpuMat m;
+    createContinuous(rows, cols, type, m);
+    return m;
+}
+
+static inline
+void createContinuous(Size size, int type, OutputArray arr)
+{
+    createContinuous(size.height, size.width, type, arr);
+}
+
+static inline
+GpuMat createContinuous(Size size, int type)
+{
+    GpuMat m;
+    createContinuous(size, type, m);
+    return m;
+}
+
+static inline
+void ensureSizeIsEnough(Size size, int type, OutputArray arr)
+{
+    ensureSizeIsEnough(size.height, size.width, type, arr);
+}
+
+static inline
+void swap(GpuMat& a, GpuMat& b)
+{
+    a.swap(b);
+}
+
+//===================================================================================
+// HostMem
+//===================================================================================
+
+inline
+HostMem::HostMem(AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+}
+
+inline
+HostMem::HostMem(const HostMem& m)
+    : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
+{
+    if( refcount )
+        CV_XADD(refcount, 1);
+}
+
+inline
+HostMem::HostMem(int rows_, int cols_, int type_, AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+    if (rows_ > 0 && cols_ > 0)
+        create(rows_, cols_, type_);
+}
+
+inline
+HostMem::HostMem(Size size_, int type_, AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+    if (size_.height > 0 && size_.width > 0)
+        create(size_.height, size_.width, type_);
+}
+
+inline
+HostMem::HostMem(InputArray arr, AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+    arr.getMat().copyTo(*this);
+}
+
+inline
+HostMem::~HostMem()
+{
+    release();
+}
+
+inline
+HostMem& HostMem::operator =(const HostMem& m)
+{
+    if (this != &m)
+    {
+        HostMem temp(m);
+        swap(temp);
+    }
+
+    return *this;
+}
+
+inline
+void HostMem::swap(HostMem& b)
+{
+    std::swap(flags, b.flags);
+    std::swap(rows, b.rows);
+    std::swap(cols, b.cols);
+    std::swap(step, b.step);
+    std::swap(data, b.data);
+    std::swap(datastart, b.datastart);
+    std::swap(dataend, b.dataend);
+    std::swap(refcount, b.refcount);
+    std::swap(alloc_type, b.alloc_type);
+}
+
+inline
+HostMem HostMem::clone() const
+{
+    HostMem m(size(), type(), alloc_type);
+    createMatHeader().copyTo(m);
+    return m;
+}
+
+inline
+void HostMem::create(Size size_, int type_)
+{
+    create(size_.height, size_.width, type_);
+}
+
+inline
+Mat HostMem::createMatHeader() const
+{
+    return Mat(size(), type(), data, step);
+}
+
+inline
+bool HostMem::isContinuous() const
+{
+    return (flags & Mat::CONTINUOUS_FLAG) != 0;
+}
+
+inline
+size_t HostMem::elemSize() const
+{
+    return CV_ELEM_SIZE(flags);
+}
+
+inline
+size_t HostMem::elemSize1() const
+{
+    return CV_ELEM_SIZE1(flags);
+}
+
+inline
+int HostMem::type() const
+{
+    return CV_MAT_TYPE(flags);
+}
+
+inline
+int HostMem::depth() const
+{
+    return CV_MAT_DEPTH(flags);
+}
+
+inline
+int HostMem::channels() const
+{
+    return CV_MAT_CN(flags);
+}
+
+inline
+size_t HostMem::step1() const
+{
+    return step / elemSize1();
+}
+
+inline
+Size HostMem::size() const
+{
+    return Size(cols, rows);
+}
+
+inline
+bool HostMem::empty() const
+{
+    return data == 0;
+}
+
+static inline
+void swap(HostMem& a, HostMem& b)
+{
+    a.swap(b);
+}
+
+//===================================================================================
+// Stream
+//===================================================================================
+
+inline
+Stream::Stream(const Ptr<Impl>& impl)
+    : impl_(impl)
+{
+}
+
+//===================================================================================
+// Event
+//===================================================================================
+
+inline
+Event::Event(const Ptr<Impl>& impl)
+    : impl_(impl)
+{
+}
+
+//===================================================================================
+// Initialization & Info
+//===================================================================================
+
+inline
+bool TargetArchs::has(int major, int minor)
+{
+    return hasPtx(major, minor) || hasBin(major, minor);
+}
+
+inline
+bool TargetArchs::hasEqualOrGreater(int major, int minor)
+{
+    return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
+}
+
+inline
+DeviceInfo::DeviceInfo()
+{
+    device_id_ = getDevice();
+}
+
+inline
+DeviceInfo::DeviceInfo(int device_id)
+{
+    CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
+    device_id_ = device_id;
+}
+
+inline
+int DeviceInfo::deviceID() const
+{
+    return device_id_;
+}
+
+inline
+size_t DeviceInfo::freeMemory() const
+{
+    size_t _totalMemory = 0, _freeMemory = 0;
+    queryMemory(_totalMemory, _freeMemory);
+    return _freeMemory;
+}
+
+inline
+size_t DeviceInfo::totalMemory() const
+{
+    size_t _totalMemory = 0, _freeMemory = 0;
+    queryMemory(_totalMemory, _freeMemory);
+    return _totalMemory;
+}
+
+inline
+bool DeviceInfo::supports(FeatureSet feature_set) const
+{
+    int version = majorVersion() * 10 + minorVersion();
+    return version >= feature_set;
+}
+
+
+}} // namespace cv { namespace cuda {
+
+//===================================================================================
+// Mat
+//===================================================================================
+
+namespace cv {
+
+inline
+Mat::Mat(const cuda::GpuMat& m)
+    : flags(0), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows)
+{
+    m.download(*this);
+}
+
+}
+
+//! @endcond
+
+#endif // OPENCV_CORE_CUDAINL_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/block.hpp b/thirdparty/linux/include/opencv2/core/cuda/block.hpp
new file mode 100644
index 0000000..330cf1d
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/block.hpp
@@ -0,0 +1,211 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_DEVICE_BLOCK_HPP
+#define OPENCV_CUDA_DEVICE_BLOCK_HPP
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    struct Block
+    {
+        static __device__ __forceinline__ unsigned int id()
+        {
+            return blockIdx.x;
+        }
+
+        static __device__ __forceinline__ unsigned int stride()
+        {
+            return blockDim.x * blockDim.y * blockDim.z;
+        }
+
+        static __device__ __forceinline__ void sync()
+        {
+            __syncthreads();
+        }
+
+        static __device__ __forceinline__ int flattenedThreadId()
+        {
+            return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
+        }
+
+        template<typename It, typename T>
+        static __device__ __forceinline__ void fill(It beg, It end, const T& value)
+        {
+            int STRIDE = stride();
+            It t = beg + flattenedThreadId();
+
+            for(; t < end; t += STRIDE)
+                *t = value;
+        }
+
+        template<typename OutIt, typename T>
+        static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
+        {
+            int STRIDE = stride();
+            int tid = flattenedThreadId();
+            value += tid;
+
+            for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE)
+                *t = value;
+        }
+
+        template<typename InIt, typename OutIt>
+        static __device__ __forceinline__ void copy(InIt beg, InIt end, OutIt out)
+        {
+            int STRIDE = stride();
+            InIt  t = beg + flattenedThreadId();
+            OutIt o = out + (t - beg);
+
+            for(; t < end; t += STRIDE, o += STRIDE)
+                *o = *t;
+        }
+
+        template<typename InIt, typename OutIt, class UnOp>
+        static __device__ __forceinline__ void transfrom(InIt beg, InIt end, OutIt out, UnOp op)
+        {
+            int STRIDE = stride();
+            InIt  t = beg + flattenedThreadId();
+            OutIt o = out + (t - beg);
+
+            for(; t < end; t += STRIDE, o += STRIDE)
+                *o = op(*t);
+        }
+
+        template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
+        static __device__ __forceinline__ void transfrom(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
+        {
+            int STRIDE = stride();
+            InIt1 t1 = beg1 + flattenedThreadId();
+            InIt2 t2 = beg2 + flattenedThreadId();
+            OutIt o  = out + (t1 - beg1);
+
+            for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE)
+                *o = op(*t1, *t2);
+        }
+
+        template<int CTA_SIZE, typename T, class BinOp>
+        static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op)
+        {
+            int tid = flattenedThreadId();
+            T val =  buffer[tid];
+
+            if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
+            if (CTA_SIZE >=  512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
+            if (CTA_SIZE >=  256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
+            if (CTA_SIZE >=  128) { if (tid <  64) buffer[tid] = val = op(val, buffer[tid +  64]); __syncthreads(); }
+
+            if (tid < 32)
+            {
+                if (CTA_SIZE >=   64) { buffer[tid] = val = op(val, buffer[tid +  32]); }
+                if (CTA_SIZE >=   32) { buffer[tid] = val = op(val, buffer[tid +  16]); }
+                if (CTA_SIZE >=   16) { buffer[tid] = val = op(val, buffer[tid +   8]); }
+                if (CTA_SIZE >=    8) { buffer[tid] = val = op(val, buffer[tid +   4]); }
+                if (CTA_SIZE >=    4) { buffer[tid] = val = op(val, buffer[tid +   2]); }
+                if (CTA_SIZE >=    2) { buffer[tid] = val = op(val, buffer[tid +   1]); }
+            }
+        }
+
+        template<int CTA_SIZE, typename T, class BinOp>
+        static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op)
+        {
+            int tid = flattenedThreadId();
+            T val =  buffer[tid] = init;
+            __syncthreads();
+
+            if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
+            if (CTA_SIZE >=  512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
+            if (CTA_SIZE >=  256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
+            if (CTA_SIZE >=  128) { if (tid <  64) buffer[tid] = val = op(val, buffer[tid +  64]); __syncthreads(); }
+
+            if (tid < 32)
+            {
+                if (CTA_SIZE >=   64) { buffer[tid] = val = op(val, buffer[tid +  32]); }
+                if (CTA_SIZE >=   32) { buffer[tid] = val = op(val, buffer[tid +  16]); }
+                if (CTA_SIZE >=   16) { buffer[tid] = val = op(val, buffer[tid +   8]); }
+                if (CTA_SIZE >=    8) { buffer[tid] = val = op(val, buffer[tid +   4]); }
+                if (CTA_SIZE >=    4) { buffer[tid] = val = op(val, buffer[tid +   2]); }
+                if (CTA_SIZE >=    2) { buffer[tid] = val = op(val, buffer[tid +   1]); }
+            }
+            __syncthreads();
+            return buffer[0];
+        }
+
+        template <typename T, class BinOp>
+        static __device__ __forceinline__ void reduce_n(T* data, unsigned int n, BinOp op)
+        {
+            int ftid = flattenedThreadId();
+            int sft = stride();
+
+            if (sft < n)
+            {
+                for (unsigned int i = sft + ftid; i < n; i += sft)
+                    data[ftid] = op(data[ftid], data[i]);
+
+                __syncthreads();
+
+                n = sft;
+            }
+
+            while (n > 1)
+            {
+                unsigned int half = n/2;
+
+                if (ftid < half)
+                    data[ftid] = op(data[ftid], data[n - ftid - 1]);
+
+                __syncthreads();
+
+                n = n - half;
+            }
+        }
+    };
+}}}
+
+//! @endcond
+
+#endif /* OPENCV_CUDA_DEVICE_BLOCK_HPP */
diff --git a/thirdparty/linux/include/opencv2/core/cuda/border_interpolate.hpp b/thirdparty/linux/include/opencv2/core/cuda/border_interpolate.hpp
new file mode 100644
index 0000000..874f705
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/border_interpolate.hpp
@@ -0,0 +1,722 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_BORDER_INTERPOLATE_HPP
+#define OPENCV_CUDA_BORDER_INTERPOLATE_HPP
+
+#include "saturate_cast.hpp"
+#include "vec_traits.hpp"
+#include "vec_math.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    //////////////////////////////////////////////////////////////
+    // BrdConstant
+
+    template <typename D> struct BrdRowConstant
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits<D>::all(0)) : width(width_), val(val_) {}
+
+        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
+        {
+            return x >= 0 ? saturate_cast<D>(data[x]) : val;
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
+        {
+            return x < width ? saturate_cast<D>(data[x]) : val;
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
+        {
+            return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;
+        }
+
+        int width;
+        D val;
+    };
+
+    template <typename D> struct BrdColConstant
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits<D>::all(0)) : height(height_), val(val_) {}
+
+        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
+        {
+            return y >= 0 ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
+        {
+            return y < height ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
+        {
+            return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
+        }
+
+        int height;
+        D val;
+    };
+
+    template <typename D> struct BrdConstant
+    {
+        typedef D result_type;
+
+        __host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : height(height_), width(width_), val(val_)
+        {
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
+        {
+            return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(((const T*)((const uchar*)data + y * step))[x]) : val;
+        }
+
+        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
+        {
+            return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
+        }
+
+        int height;
+        int width;
+        D val;
+    };
+
+    //////////////////////////////////////////////////////////////
+    // BrdReplicate
+
+    template <typename D> struct BrdRowReplicate
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdRowReplicate(int width, U) : last_col(width - 1) {}
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return ::max(x, 0);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return ::min(x, last_col);
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_low(idx_col_high(x));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_low(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_high(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col(x)]);
+        }
+
+        int last_col;
+    };
+
+    template <typename D> struct BrdColReplicate
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdColReplicate(int height, U) : last_row(height - 1) {}
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return ::max(y, 0);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return ::min(y, last_row);
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_low(idx_row_high(y));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const T*)((const char*)data + idx_row_low(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const T*)((const char*)data + idx_row_high(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));
+        }
+
+        int last_row;
+    };
+
+    template <typename D> struct BrdReplicate
+    {
+        typedef D result_type;
+
+        __host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return ::max(y, 0);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return ::min(y, last_row);
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_low(idx_row_high(y));
+        }
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return ::max(x, 0);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return ::min(x, last_col);
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_low(idx_col_high(x));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
+        }
+
+        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
+        {
+            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
+        }
+
+        int last_row;
+        int last_col;
+    };
+
+    //////////////////////////////////////////////////////////////
+    // BrdReflect101
+
+    template <typename D> struct BrdRowReflect101
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdRowReflect101(int width, U) : last_col(width - 1) {}
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return ::abs(x) % (last_col + 1);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_low(idx_col_high(x));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_low(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_high(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col(x)]);
+        }
+
+        int last_col;
+    };
+
+    template <typename D> struct BrdColReflect101
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdColReflect101(int height, U) : last_row(height - 1) {}
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return ::abs(y) % (last_row + 1);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_low(idx_row_high(y));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
+        }
+
+        int last_row;
+    };
+
+    template <typename D> struct BrdReflect101
+    {
+        typedef D result_type;
+
+        __host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return ::abs(y) % (last_row + 1);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_low(idx_row_high(y));
+        }
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return ::abs(x) % (last_col + 1);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_low(idx_col_high(x));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
+        }
+
+        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
+        {
+            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
+        }
+
+        int last_row;
+        int last_col;
+    };
+
+    //////////////////////////////////////////////////////////////
+    // BrdReflect
+
+    template <typename D> struct BrdRowReflect
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdRowReflect(int width, U) : last_col(width - 1) {}
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return (::abs(x) - (x < 0)) % (last_col + 1);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return ::abs(last_col - ::abs(last_col - x) + (x > last_col)) % (last_col + 1);
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_high(::abs(x) - (x < 0));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_low(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_high(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col(x)]);
+        }
+
+        int last_col;
+    };
+
+    template <typename D> struct BrdColReflect
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdColReflect(int height, U) : last_row(height - 1) {}
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return (::abs(y) - (y < 0)) % (last_row + 1);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return ::abs(last_row - ::abs(last_row - y) + (y > last_row)) % (last_row + 1);
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_high(::abs(y) - (y < 0));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
+        }
+
+        int last_row;
+    };
+
+    template <typename D> struct BrdReflect
+    {
+        typedef D result_type;
+
+        __host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return (::abs(y) - (y < 0)) % (last_row + 1);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return /*::abs*/(last_row - ::abs(last_row - y) + (y > last_row)) /*% (last_row + 1)*/;
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_low(idx_row_high(y));
+        }
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return (::abs(x) - (x < 0)) % (last_col + 1);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return (last_col - ::abs(last_col - x) + (x > last_col));
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_low(idx_col_high(x));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
+        }
+
+        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
+        {
+            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
+        }
+
+        int last_row;
+        int last_col;
+    };
+
+    //////////////////////////////////////////////////////////////
+    // BrdWrap
+
+    template <typename D> struct BrdRowWrap
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdRowWrap(int width_, U) : width(width_) {}
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return (x < width) * x + (x >= width) * (x % width);
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_high(idx_col_low(x));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_low(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_high(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col(x)]);
+        }
+
+        int width;
+    };
+
+    template <typename D> struct BrdColWrap
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdColWrap(int height_, U) : height(height_) {}
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return (y < height) * y + (y >= height) * (y % height);
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_high(idx_row_low(y));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
+        }
+
+        int height;
+    };
+
+    template <typename D> struct BrdWrap
+    {
+        typedef D result_type;
+
+        __host__ __device__ __forceinline__ BrdWrap(int height_, int width_) :
+            height(height_), width(width_)
+        {
+        }
+        template <typename U>
+        __host__ __device__ __forceinline__ BrdWrap(int height_, int width_, U) :
+            height(height_), width(width_)
+        {
+        }
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return (y >= 0) ? y : (y - ((y - height + 1) / height) * height);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return (y < height) ? y : (y % height);
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_high(idx_row_low(y));
+        }
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return (x >= 0) ? x : (x - ((x - width + 1) / width) * width);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return (x < width) ? x : (x % width);
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_high(idx_col_low(x));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
+        }
+
+        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
+        {
+            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
+        }
+
+        int height;
+        int width;
+    };
+
+    //////////////////////////////////////////////////////////////
+    // BorderReader
+
+    template <typename Ptr2D, typename B> struct BorderReader
+    {
+        typedef typename B::result_type elem_type;
+        typedef typename Ptr2D::index_type index_type;
+
+        __host__ __device__ __forceinline__ BorderReader(const Ptr2D& ptr_, const B& b_) : ptr(ptr_), b(b_) {}
+
+        __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const
+        {
+            return b.at(y, x, ptr);
+        }
+
+        Ptr2D ptr;
+        B b;
+    };
+
+    // under win32 there is some bug with templated types that passed as kernel parameters
+    // with this specialization all works fine
+    template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
+    {
+        typedef typename BrdConstant<D>::result_type elem_type;
+        typedef typename Ptr2D::index_type index_type;
+
+        __host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant<D>& b) :
+            src(src_), height(b.height), width(b.width), val(b.val)
+        {
+        }
+
+        __device__ __forceinline__ D operator ()(index_type y, index_type x) const
+        {
+            return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
+        }
+
+        Ptr2D src;
+        int height;
+        int width;
+        D val;
+    };
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_BORDER_INTERPOLATE_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/color.hpp b/thirdparty/linux/include/opencv2/core/cuda/color.hpp
new file mode 100644
index 0000000..dcce280
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/color.hpp
@@ -0,0 +1,309 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_COLOR_HPP
+#define OPENCV_CUDA_COLOR_HPP
+
+#include "detail/color_detail.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    // All OPENCV_CUDA_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
+    // template <typename T> class ColorSpace1_to_ColorSpace2_traits
+    // {
+    //     typedef ... functor_type;
+    //     static __host__ __device__ functor_type create_functor();
+    // };
+
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
+    OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
+
+    #undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
+    OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
+
+    #undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab, 3, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab, 4, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab4, 3, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab4, 4, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab, 3, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab, 4, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab4, 3, 4, true, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab4, 4, 4, true, 0)
+
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab, 3, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab, 4, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab4, 3, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab4, 4, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab, 3, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab, 4, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab4, 3, 4, false, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab4, 4, 4, false, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgb, 3, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgb, 4, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgba, 3, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgba, 4, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgr, 3, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgr, 4, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgra, 3, 4, true, 0)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgra, 4, 4, true, 0)
+
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgb, 3, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgb, 4, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgba, 3, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgba, 4, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgr, 3, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgr, 4, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgra, 3, 4, false, 0)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgra, 4, 4, false, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv, 3, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv, 4, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv4, 3, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv4, 4, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv, 3, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv, 4, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv4, 3, 4, true, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv4, 4, 4, true, 0)
+
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv, 3, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv, 4, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv4, 3, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv4, 4, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv, 3, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv, 4, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv4, 3, 4, false, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv4, 4, 4, false, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgb, 3, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgb, 4, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgba, 3, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgba, 4, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgr, 3, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgr, 4, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgra, 3, 4, true, 0)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgra, 4, 4, true, 0)
+
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgb, 3, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgb, 4, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgba, 3, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgba, 4, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgr, 3, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgr, 4, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgra, 3, 4, false, 0)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgra, 4, 4, false, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_COLOR_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/common.hpp b/thirdparty/linux/include/opencv2/core/cuda/common.hpp
new file mode 100644
index 0000000..14b1f3f
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/common.hpp
@@ -0,0 +1,109 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_COMMON_HPP
+#define OPENCV_CUDA_COMMON_HPP
+
+#include <cuda_runtime.h>
+#include "opencv2/core/cuda_types.hpp"
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/base.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+#ifndef CV_PI_F
+    #ifndef CV_PI
+        #define CV_PI_F 3.14159265f
+    #else
+        #define CV_PI_F ((float)CV_PI)
+    #endif
+#endif
+
+namespace cv { namespace cuda {
+    static inline void checkCudaError(cudaError_t err, const char* file, const int line, const char* func)
+    {
+        if (cudaSuccess != err)
+            cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
+    }
+}}
+
+#ifndef cudaSafeCall
+    #define cudaSafeCall(expr)  cv::cuda::checkCudaError(expr, __FILE__, __LINE__, CV_Func)
+#endif
+
+namespace cv { namespace cuda
+{
+    template <typename T> static inline bool isAligned(const T* ptr, size_t size)
+    {
+        return reinterpret_cast<size_t>(ptr) % size == 0;
+    }
+
+    static inline bool isAligned(size_t step, size_t size)
+    {
+        return step % size == 0;
+    }
+}}
+
+namespace cv { namespace cuda
+{
+    namespace device
+    {
+        __host__ __device__ __forceinline__ int divUp(int total, int grain)
+        {
+            return (total + grain - 1) / grain;
+        }
+
+        template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
+        {
+            cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
+            cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
+        }
+    }
+}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_COMMON_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/datamov_utils.hpp b/thirdparty/linux/include/opencv2/core/cuda/datamov_utils.hpp
new file mode 100644
index 0000000..6820d0f
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/datamov_utils.hpp
@@ -0,0 +1,113 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_DATAMOV_UTILS_HPP
+#define OPENCV_CUDA_DATAMOV_UTILS_HPP
+
+#include "common.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
+
+        // for Fermi memory space is detected automatically
+        template <typename T> struct ForceGlob
+        {
+            __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val)  { val = ptr[offset];  }
+        };
+
+    #else // __CUDA_ARCH__ >= 200
+
+        #if defined(_WIN64) || defined(__LP64__)
+            // 64-bit register modifier for inlined asm
+            #define OPENCV_CUDA_ASM_PTR "l"
+        #else
+            // 32-bit register modifier for inlined asm
+            #define OPENCV_CUDA_ASM_PTR "r"
+        #endif
+
+        template<class T> struct ForceGlob;
+
+        #define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
+            template <> struct ForceGlob<base_type> \
+            { \
+                __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
+                { \
+                    asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
+                } \
+            };
+
+        #define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
+            template <> struct ForceGlob<base_type> \
+            { \
+                __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
+                { \
+                    asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
+                } \
+            };
+
+            OPENCV_CUDA_DEFINE_FORCE_GLOB_B(uchar,  u8)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB_B(schar,  s8)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB_B(char,   b8)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB  (ushort, u16, h)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB  (short,  s16, h)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB  (uint,   u32, r)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB  (int,    s32, r)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB  (float,  f32, f)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB  (double, f64, d)
+
+        #undef OPENCV_CUDA_DEFINE_FORCE_GLOB
+        #undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B
+        #undef OPENCV_CUDA_ASM_PTR
+
+    #endif // __CUDA_ARCH__ >= 200
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_DATAMOV_UTILS_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/detail/color_detail.hpp b/thirdparty/linux/include/opencv2/core/cuda/detail/color_detail.hpp
new file mode 100644
index 0000000..bfb4055
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/detail/color_detail.hpp
@@ -0,0 +1,1980 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_COLOR_DETAIL_HPP
+#define OPENCV_CUDA_COLOR_DETAIL_HPP
+
+#include "../common.hpp"
+#include "../vec_traits.hpp"
+#include "../saturate_cast.hpp"
+#include "../limits.hpp"
+#include "../functional.hpp"
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    #ifndef CV_DESCALE
+        #define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
+    #endif
+
+    namespace color_detail
+    {
+        template<typename T> struct ColorChannel
+        {
+            typedef float worktype_f;
+            static __device__ __forceinline__ T max() { return numeric_limits<T>::max(); }
+            static __device__ __forceinline__ T half() { return (T)(max()/2 + 1); }
+        };
+
+        template<> struct ColorChannel<float>
+        {
+            typedef float worktype_f;
+            static __device__ __forceinline__ float max() { return 1.f; }
+            static __device__ __forceinline__ float half() { return 0.5f; }
+        };
+
+        template <typename T> static __device__ __forceinline__ void setAlpha(typename TypeVec<T, 3>::vec_type& vec, T val)
+        {
+        }
+
+        template <typename T> static __device__ __forceinline__ void setAlpha(typename TypeVec<T, 4>::vec_type& vec, T val)
+        {
+            vec.w = val;
+        }
+
+        template <typename T> static __device__ __forceinline__ T getAlpha(const typename TypeVec<T, 3>::vec_type& vec)
+        {
+            return ColorChannel<T>::max();
+        }
+
+        template <typename T> static __device__ __forceinline__ T getAlpha(const typename TypeVec<T, 4>::vec_type& vec)
+        {
+            return vec.w;
+        }
+
+        enum
+        {
+            yuv_shift  = 14,
+            xyz_shift  = 12,
+            R2Y        = 4899,
+            G2Y        = 9617,
+            B2Y        = 1868,
+            BLOCK_SIZE = 256
+        };
+    }
+
+////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
+
+    namespace color_detail
+    {
+        template <typename T, int scn, int dcn, int bidx> struct RGB2RGB
+            : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>
+        {
+            __device__ typename TypeVec<T, dcn>::vec_type operator()(const typename TypeVec<T, scn>::vec_type& src) const
+            {
+                typename TypeVec<T, dcn>::vec_type dst;
+
+                dst.x = (&src.x)[bidx];
+                dst.y = src.y;
+                dst.z = (&src.x)[bidx^2];
+                setAlpha(dst, getAlpha<T>(src));
+
+                return dst;
+            }
+
+            __host__ __device__ __forceinline__ RGB2RGB() {}
+            __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {}
+        };
+
+        template <> struct RGB2RGB<uchar, 4, 4, 2> : unary_function<uint, uint>
+        {
+            __device__ uint operator()(uint src) const
+            {
+                uint dst = 0;
+
+                dst |= (0xffu & (src >> 16));
+                dst |= (0xffu & (src >> 8)) << 8;
+                dst |= (0xffu & (src)) << 16;
+                dst |= (0xffu & (src >> 24)) << 24;
+
+                return dst;
+            }
+
+            __host__ __device__ __forceinline__ RGB2RGB() {}
+            __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(name, scn, dcn, bidx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2RGB<T, scn, dcn, bidx> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
+
+    namespace color_detail
+    {
+        template <int green_bits, int bidx> struct RGB2RGB5x5Converter;
+        template<int bidx> struct RGB2RGB5x5Converter<6, bidx>
+        {
+            static __device__ __forceinline__ ushort cvt(const uchar3& src)
+            {
+                return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~3) << 3) | (((&src.x)[bidx^2] & ~7) << 8));
+            }
+
+            static __device__ __forceinline__ ushort cvt(uint src)
+            {
+                uint b = 0xffu & (src >> (bidx * 8));
+                uint g = 0xffu & (src >> 8);
+                uint r = 0xffu & (src >> ((bidx ^ 2) * 8));
+                return (ushort)((b >> 3) | ((g & ~3) << 3) | ((r & ~7) << 8));
+            }
+        };
+
+        template<int bidx> struct RGB2RGB5x5Converter<5, bidx>
+        {
+            static __device__ __forceinline__ ushort cvt(const uchar3& src)
+            {
+                return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~7) << 2) | (((&src.x)[bidx^2] & ~7) << 7));
+            }
+
+            static __device__ __forceinline__ ushort cvt(uint src)
+            {
+                uint b = 0xffu & (src >> (bidx * 8));
+                uint g = 0xffu & (src >> 8);
+                uint r = 0xffu & (src >> ((bidx ^ 2) * 8));
+                uint a = 0xffu & (src >> 24);
+                return (ushort)((b >> 3) | ((g & ~7) << 2) | ((r & ~7) << 7) | (a * 0x8000));
+            }
+        };
+
+        template<int scn, int bidx, int green_bits> struct RGB2RGB5x5;
+
+        template<int bidx, int green_bits> struct RGB2RGB5x5<3, bidx,green_bits> : unary_function<uchar3, ushort>
+        {
+            __device__ __forceinline__ ushort operator()(const uchar3& src) const
+            {
+                return RGB2RGB5x5Converter<green_bits, bidx>::cvt(src);
+            }
+
+            __host__ __device__ __forceinline__ RGB2RGB5x5() {}
+            __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {}
+        };
+
+        template<int bidx, int green_bits> struct RGB2RGB5x5<4, bidx,green_bits> : unary_function<uint, ushort>
+        {
+            __device__ __forceinline__ ushort operator()(uint src) const
+            {
+                return RGB2RGB5x5Converter<green_bits, bidx>::cvt(src);
+            }
+
+            __host__ __device__ __forceinline__ RGB2RGB5x5() {}
+            __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(name, scn, bidx, green_bits) \
+    struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2RGB5x5<scn, bidx, green_bits> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+    namespace color_detail
+    {
+        template <int green_bits, int bidx> struct RGB5x52RGBConverter;
+
+        template <int bidx> struct RGB5x52RGBConverter<5, bidx>
+        {
+            static __device__ __forceinline__ void cvt(uint src, uchar3& dst)
+            {
+                (&dst.x)[bidx] = src << 3;
+                dst.y = (src >> 2) & ~7;
+                (&dst.x)[bidx ^ 2] = (src >> 7) & ~7;
+            }
+
+            static __device__ __forceinline__ void cvt(uint src, uint& dst)
+            {
+                dst = 0;
+
+                dst |= (0xffu & (src << 3)) << (bidx * 8);
+                dst |= (0xffu & ((src >> 2) & ~7)) << 8;
+                dst |= (0xffu & ((src >> 7) & ~7)) << ((bidx ^ 2) * 8);
+                dst |= ((src & 0x8000) * 0xffu) << 24;
+            }
+        };
+
+        template <int bidx> struct RGB5x52RGBConverter<6, bidx>
+        {
+            static __device__ __forceinline__ void cvt(uint src, uchar3& dst)
+            {
+                (&dst.x)[bidx] = src << 3;
+                dst.y = (src >> 3) & ~3;
+                (&dst.x)[bidx ^ 2] = (src >> 8) & ~7;
+            }
+
+            static __device__ __forceinline__ void cvt(uint src, uint& dst)
+            {
+                dst = 0xffu << 24;
+
+                dst |= (0xffu & (src << 3)) << (bidx * 8);
+                dst |= (0xffu &((src >> 3) & ~3)) << 8;
+                dst |= (0xffu & ((src >> 8) & ~7)) << ((bidx ^ 2) * 8);
+            }
+        };
+
+        template <int dcn, int bidx, int green_bits> struct RGB5x52RGB;
+
+        template <int bidx, int green_bits> struct RGB5x52RGB<3, bidx, green_bits> : unary_function<ushort, uchar3>
+        {
+            __device__ __forceinline__ uchar3 operator()(ushort src) const
+            {
+                uchar3 dst;
+                RGB5x52RGBConverter<green_bits, bidx>::cvt(src, dst);
+                return dst;
+            }
+            __host__ __device__ __forceinline__ RGB5x52RGB() {}
+            __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {}
+
+        };
+
+        template <int bidx, int green_bits> struct RGB5x52RGB<4, bidx, green_bits> : unary_function<ushort, uint>
+        {
+            __device__ __forceinline__ uint operator()(ushort src) const
+            {
+                uint dst;
+                RGB5x52RGBConverter<green_bits, bidx>::cvt(src, dst);
+                return dst;
+            }
+            __host__ __device__ __forceinline__ RGB5x52RGB() {}
+            __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(name, dcn, bidx, green_bits) \
+    struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB5x52RGB<dcn, bidx, green_bits> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+///////////////////////////////// Grayscale to Color ////////////////////////////////
+
+    namespace color_detail
+    {
+        template <typename T, int dcn> struct Gray2RGB : unary_function<T, typename TypeVec<T, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<T, dcn>::vec_type operator()(T src) const
+            {
+                typename TypeVec<T, dcn>::vec_type dst;
+
+                dst.z = dst.y = dst.x = src;
+                setAlpha(dst, ColorChannel<T>::max());
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ Gray2RGB() {}
+            __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {}
+        };
+
+        template <> struct Gray2RGB<uchar, 4> : unary_function<uchar, uint>
+        {
+            __device__ __forceinline__ uint operator()(uint src) const
+            {
+                uint dst = 0xffu << 24;
+
+                dst |= src;
+                dst |= src << 8;
+                dst |= src << 16;
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ Gray2RGB() {}
+            __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(name, dcn) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::Gray2RGB<T, dcn> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+    namespace color_detail
+    {
+        template <int green_bits> struct Gray2RGB5x5Converter;
+        template<> struct Gray2RGB5x5Converter<6>
+        {
+            static __device__ __forceinline__ ushort cvt(uint t)
+            {
+                return (ushort)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8));
+            }
+        };
+
+        template<> struct Gray2RGB5x5Converter<5>
+        {
+            static __device__ __forceinline__ ushort cvt(uint t)
+            {
+                t >>= 3;
+                return (ushort)(t | (t << 5) | (t << 10));
+            }
+        };
+
+        template<int green_bits> struct Gray2RGB5x5 : unary_function<uchar, ushort>
+        {
+            __device__ __forceinline__ ushort operator()(uint src) const
+            {
+                return Gray2RGB5x5Converter<green_bits>::cvt(src);
+            }
+
+            __host__ __device__ __forceinline__ Gray2RGB5x5() {}
+            __host__ __device__ __forceinline__ Gray2RGB5x5(const Gray2RGB5x5&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(name, green_bits) \
+    struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::Gray2RGB5x5<green_bits> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+///////////////////////////////// Color to Grayscale ////////////////////////////////
+
+    namespace color_detail
+    {
+        template <int green_bits> struct RGB5x52GrayConverter;
+        template <> struct RGB5x52GrayConverter<6>
+        {
+            static __device__ __forceinline__ uchar cvt(uint t)
+            {
+                return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 3) & 0xfc) * G2Y + ((t >> 8) & 0xf8) * R2Y, yuv_shift);
+            }
+        };
+
+        template <> struct RGB5x52GrayConverter<5>
+        {
+            static __device__ __forceinline__ uchar cvt(uint t)
+            {
+                return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 2) & 0xf8) * G2Y + ((t >> 7) & 0xf8) * R2Y, yuv_shift);
+            }
+        };
+
+        template<int green_bits> struct RGB5x52Gray : unary_function<ushort, uchar>
+        {
+            __device__ __forceinline__ uchar operator()(uint src) const
+            {
+                return RGB5x52GrayConverter<green_bits>::cvt(src);
+            }
+            __host__ __device__ __forceinline__ RGB5x52Gray() {}
+            __host__ __device__ __forceinline__ RGB5x52Gray(const RGB5x52Gray&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(name, green_bits) \
+    struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB5x52Gray<green_bits> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+    namespace color_detail
+    {
+        template <int bidx, typename T> static __device__ __forceinline__ T RGB2GrayConvert(const T* src)
+        {
+            return (T)CV_DESCALE((unsigned)(src[bidx] * B2Y + src[1] * G2Y + src[bidx^2] * R2Y), yuv_shift);
+        }
+
+        template <int bidx> static __device__ __forceinline__ uchar RGB2GrayConvert(uint src)
+        {
+            uint b = 0xffu & (src >> (bidx * 8));
+            uint g = 0xffu & (src >> 8);
+            uint r = 0xffu & (src >> ((bidx ^ 2) * 8));
+            return CV_DESCALE((uint)(b * B2Y + g * G2Y + r * R2Y), yuv_shift);
+        }
+
+        template <int bidx> static __device__ __forceinline__ float RGB2GrayConvert(const float* src)
+        {
+            return src[bidx] * 0.114f + src[1] * 0.587f + src[bidx^2] * 0.299f;
+        }
+
+        template <typename T, int scn, int bidx> struct RGB2Gray : unary_function<typename TypeVec<T, scn>::vec_type, T>
+        {
+            __device__ __forceinline__ T operator()(const typename TypeVec<T, scn>::vec_type& src) const
+            {
+                return RGB2GrayConvert<bidx>(&src.x);
+            }
+            __host__ __device__ __forceinline__ RGB2Gray() {}
+            __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {}
+        };
+
+        template <int bidx> struct RGB2Gray<uchar, 4, bidx> : unary_function<uint, uchar>
+        {
+            __device__ __forceinline__ uchar operator()(uint src) const
+            {
+                return RGB2GrayConvert<bidx>(src);
+            }
+            __host__ __device__ __forceinline__ RGB2Gray() {}
+            __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(name, scn, bidx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2Gray<T, scn, bidx> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+///////////////////////////////////// RGB <-> YUV //////////////////////////////////////
+
+    namespace color_detail
+    {
+        __constant__ float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
+        __constant__ int   c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 };
+
+        template <int bidx, typename T, typename D> static __device__ void RGB2YUVConvert(const T* src, D& dst)
+        {
+            const int delta = ColorChannel<T>::half() * (1 << yuv_shift);
+
+            const int Y = CV_DESCALE(src[0] * c_RGB2YUVCoeffs_i[bidx^2] + src[1] * c_RGB2YUVCoeffs_i[1] + src[2] * c_RGB2YUVCoeffs_i[bidx], yuv_shift);
+            const int Cr = CV_DESCALE((src[bidx^2] - Y) * c_RGB2YUVCoeffs_i[3] + delta, yuv_shift);
+            const int Cb = CV_DESCALE((src[bidx] - Y) * c_RGB2YUVCoeffs_i[4] + delta, yuv_shift);
+
+            dst.x = saturate_cast<T>(Y);
+            dst.y = saturate_cast<T>(Cr);
+            dst.z = saturate_cast<T>(Cb);
+        }
+
+        template <int bidx, typename D> static __device__ __forceinline__ void RGB2YUVConvert(const float* src, D& dst)
+        {
+            dst.x = src[0] * c_RGB2YUVCoeffs_f[bidx^2] + src[1] * c_RGB2YUVCoeffs_f[1] + src[2] * c_RGB2YUVCoeffs_f[bidx];
+            dst.y = (src[bidx^2] - dst.x) * c_RGB2YUVCoeffs_f[3] + ColorChannel<float>::half();
+            dst.z = (src[bidx] - dst.x) * c_RGB2YUVCoeffs_f[4] + ColorChannel<float>::half();
+        }
+
+        template <typename T, int scn, int dcn, int bidx> struct RGB2YUV
+            : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<T, dcn>::vec_type operator ()(const typename TypeVec<T, scn>::vec_type& src) const
+            {
+                typename TypeVec<T, dcn>::vec_type dst;
+                RGB2YUVConvert<bidx>(&src.x, dst);
+                return dst;
+            }
+            __host__ __device__ __forceinline__ RGB2YUV() {}
+            __host__ __device__ __forceinline__ RGB2YUV(const RGB2YUV&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(name, scn, dcn, bidx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2YUV<T, scn, dcn, bidx> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+    namespace color_detail
+    {
+        __constant__ float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
+        __constant__ int   c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 };
+
+        template <int bidx, typename T, typename D> static __device__ void YUV2RGBConvert(const T& src, D* dst)
+        {
+            const int b = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * c_YUV2RGBCoeffs_i[3], yuv_shift);
+
+            const int g = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * c_YUV2RGBCoeffs_i[2]
+                                             + (src.y - ColorChannel<D>::half()) * c_YUV2RGBCoeffs_i[1], yuv_shift);
+
+            const int r = src.x + CV_DESCALE((src.y - ColorChannel<D>::half()) * c_YUV2RGBCoeffs_i[0], yuv_shift);
+
+            dst[bidx] = saturate_cast<D>(b);
+            dst[1] = saturate_cast<D>(g);
+            dst[bidx^2] = saturate_cast<D>(r);
+        }
+
+        template <int bidx> static __device__ uint YUV2RGBConvert(uint src)
+        {
+            const int x = 0xff & (src);
+            const int y = 0xff & (src >> 8);
+            const int z = 0xff & (src >> 16);
+
+            const int b = x + CV_DESCALE((z - ColorChannel<uchar>::half()) * c_YUV2RGBCoeffs_i[3], yuv_shift);
+
+            const int g = x + CV_DESCALE((z - ColorChannel<uchar>::half()) * c_YUV2RGBCoeffs_i[2]
+                                         + (y - ColorChannel<uchar>::half()) * c_YUV2RGBCoeffs_i[1], yuv_shift);
+
+            const int r = x + CV_DESCALE((y - ColorChannel<uchar>::half()) * c_YUV2RGBCoeffs_i[0], yuv_shift);
+
+            uint dst = 0xffu << 24;
+
+            dst |= saturate_cast<uchar>(b) << (bidx * 8);
+            dst |= saturate_cast<uchar>(g) << 8;
+            dst |= saturate_cast<uchar>(r) << ((bidx ^ 2) * 8);
+
+            return dst;
+        }
+
+        template <int bidx, typename T> static __device__ __forceinline__ void YUV2RGBConvert(const T& src, float* dst)
+        {
+            dst[bidx] = src.x + (src.z - ColorChannel<float>::half()) * c_YUV2RGBCoeffs_f[3];
+
+            dst[1] = src.x + (src.z - ColorChannel<float>::half()) * c_YUV2RGBCoeffs_f[2]
+                     + (src.y - ColorChannel<float>::half()) * c_YUV2RGBCoeffs_f[1];
+
+            dst[bidx^2] = src.x + (src.y - ColorChannel<float>::half()) * c_YUV2RGBCoeffs_f[0];
+        }
+
+        template <typename T, int scn, int dcn, int bidx> struct YUV2RGB
+            : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<T, dcn>::vec_type operator ()(const typename TypeVec<T, scn>::vec_type& src) const
+            {
+                typename TypeVec<T, dcn>::vec_type dst;
+
+                YUV2RGBConvert<bidx>(src, &dst.x);
+                setAlpha(dst, ColorChannel<T>::max());
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ YUV2RGB() {}
+            __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {}
+        };
+
+        template <int bidx> struct YUV2RGB<uchar, 4, 4, bidx> : unary_function<uint, uint>
+        {
+            __device__ __forceinline__ uint operator ()(uint src) const
+            {
+                return YUV2RGBConvert<bidx>(src);
+            }
+            __host__ __device__ __forceinline__ YUV2RGB() {}
+            __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(name, scn, dcn, bidx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::YUV2RGB<T, scn, dcn, bidx> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
+
+    namespace color_detail
+    {
+        __constant__ float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
+        __constant__ int   c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241};
+
+        template <int bidx, typename T, typename D> static __device__ void RGB2YCrCbConvert(const T* src, D& dst)
+        {
+            const int delta = ColorChannel<T>::half() * (1 << yuv_shift);
+
+            const int Y = CV_DESCALE(src[0] * c_RGB2YCrCbCoeffs_i[bidx^2] + src[1] * c_RGB2YCrCbCoeffs_i[1] + src[2] * c_RGB2YCrCbCoeffs_i[bidx], yuv_shift);
+            const int Cr = CV_DESCALE((src[bidx^2] - Y) * c_RGB2YCrCbCoeffs_i[3] + delta, yuv_shift);
+            const int Cb = CV_DESCALE((src[bidx] - Y) * c_RGB2YCrCbCoeffs_i[4] + delta, yuv_shift);
+
+            dst.x = saturate_cast<T>(Y);
+            dst.y = saturate_cast<T>(Cr);
+            dst.z = saturate_cast<T>(Cb);
+        }
+
+        template <int bidx> static __device__ uint RGB2YCrCbConvert(uint src)
+        {
+            const int delta = ColorChannel<uchar>::half() * (1 << yuv_shift);
+
+            const int Y = CV_DESCALE((0xffu & src) * c_RGB2YCrCbCoeffs_i[bidx^2] + (0xffu & (src >> 8)) * c_RGB2YCrCbCoeffs_i[1] + (0xffu & (src >> 16)) * c_RGB2YCrCbCoeffs_i[bidx], yuv_shift);
+            const int Cr = CV_DESCALE(((0xffu & (src >> ((bidx ^ 2) * 8))) - Y) * c_RGB2YCrCbCoeffs_i[3] + delta, yuv_shift);
+            const int Cb = CV_DESCALE(((0xffu & (src >> (bidx * 8))) - Y) * c_RGB2YCrCbCoeffs_i[4] + delta, yuv_shift);
+
+            uint dst = 0;
+
+            dst |= saturate_cast<uchar>(Y);
+            dst |= saturate_cast<uchar>(Cr) << 8;
+            dst |= saturate_cast<uchar>(Cb) << 16;
+
+            return dst;
+        }
+
+        template <int bidx, typename D> static __device__ __forceinline__ void RGB2YCrCbConvert(const float* src, D& dst)
+        {
+            dst.x = src[0] * c_RGB2YCrCbCoeffs_f[bidx^2] + src[1] * c_RGB2YCrCbCoeffs_f[1] + src[2] * c_RGB2YCrCbCoeffs_f[bidx];
+            dst.y = (src[bidx^2] - dst.x) * c_RGB2YCrCbCoeffs_f[3] + ColorChannel<float>::half();
+            dst.z = (src[bidx] - dst.x) * c_RGB2YCrCbCoeffs_f[4] + ColorChannel<float>::half();
+        }
+
+        template <typename T, int scn, int dcn, int bidx> struct RGB2YCrCb
+            : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<T, dcn>::vec_type operator ()(const typename TypeVec<T, scn>::vec_type& src) const
+            {
+                typename TypeVec<T, dcn>::vec_type dst;
+                RGB2YCrCbConvert<bidx>(&src.x, dst);
+                return dst;
+            }
+            __host__ __device__ __forceinline__ RGB2YCrCb() {}
+            __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {}
+        };
+
+        template <int bidx> struct RGB2YCrCb<uchar, 4, 4, bidx> : unary_function<uint, uint>
+        {
+            __device__ __forceinline__ uint operator ()(uint src) const
+            {
+                return RGB2YCrCbConvert<bidx>(src);
+            }
+
+            __host__ __device__ __forceinline__ RGB2YCrCb() {}
+            __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(name, scn, dcn, bidx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2YCrCb<T, scn, dcn, bidx> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+    namespace color_detail
+    {
+        __constant__ float c_YCrCb2RGBCoeffs_f[5] = {1.403f, -0.714f, -0.344f, 1.773f};
+        __constant__ int   c_YCrCb2RGBCoeffs_i[5] = {22987, -11698, -5636, 29049};
+
+        template <int bidx, typename T, typename D> static __device__ void YCrCb2RGBConvert(const T& src, D* dst)
+        {
+            const int b = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * c_YCrCb2RGBCoeffs_i[3], yuv_shift);
+            const int g = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * c_YCrCb2RGBCoeffs_i[2] + (src.y - ColorChannel<D>::half()) * c_YCrCb2RGBCoeffs_i[1], yuv_shift);
+            const int r = src.x + CV_DESCALE((src.y - ColorChannel<D>::half()) * c_YCrCb2RGBCoeffs_i[0], yuv_shift);
+
+            dst[bidx] = saturate_cast<D>(b);
+            dst[1] = saturate_cast<D>(g);
+            dst[bidx^2] = saturate_cast<D>(r);
+        }
+
+        template <int bidx> static __device__ uint YCrCb2RGBConvert(uint src)
+        {
+            const int x = 0xff & (src);
+            const int y = 0xff & (src >> 8);
+            const int z = 0xff & (src >> 16);
+
+            const int b = x + CV_DESCALE((z - ColorChannel<uchar>::half()) * c_YCrCb2RGBCoeffs_i[3], yuv_shift);
+            const int g = x + CV_DESCALE((z - ColorChannel<uchar>::half()) * c_YCrCb2RGBCoeffs_i[2] + (y - ColorChannel<uchar>::half()) * c_YCrCb2RGBCoeffs_i[1], yuv_shift);
+            const int r = x + CV_DESCALE((y - ColorChannel<uchar>::half()) * c_YCrCb2RGBCoeffs_i[0], yuv_shift);
+
+            uint dst = 0xffu << 24;
+
+            dst |= saturate_cast<uchar>(b) << (bidx * 8);
+            dst |= saturate_cast<uchar>(g) << 8;
+            dst |= saturate_cast<uchar>(r) << ((bidx ^ 2) * 8);
+
+            return dst;
+        }
+
+        template <int bidx, typename T> __device__ __forceinline__ void YCrCb2RGBConvert(const T& src, float* dst)
+        {
+            dst[bidx] = src.x + (src.z - ColorChannel<float>::half()) * c_YCrCb2RGBCoeffs_f[3];
+            dst[1] = src.x + (src.z - ColorChannel<float>::half()) * c_YCrCb2RGBCoeffs_f[2] + (src.y - ColorChannel<float>::half()) * c_YCrCb2RGBCoeffs_f[1];
+            dst[bidx^2] = src.x + (src.y - ColorChannel<float>::half()) * c_YCrCb2RGBCoeffs_f[0];
+        }
+
+        template <typename T, int scn, int dcn, int bidx> struct YCrCb2RGB
+            : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<T, dcn>::vec_type operator ()(const typename TypeVec<T, scn>::vec_type& src) const
+            {
+                typename TypeVec<T, dcn>::vec_type dst;
+
+                YCrCb2RGBConvert<bidx>(src, &dst.x);
+                setAlpha(dst, ColorChannel<T>::max());
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ YCrCb2RGB() {}
+            __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {}
+        };
+
+        template <int bidx> struct YCrCb2RGB<uchar, 4, 4, bidx> : unary_function<uint, uint>
+        {
+            __device__ __forceinline__ uint operator ()(uint src) const
+            {
+                return YCrCb2RGBConvert<bidx>(src);
+            }
+            __host__ __device__ __forceinline__ YCrCb2RGB() {}
+            __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(name, scn, dcn, bidx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::YCrCb2RGB<T, scn, dcn, bidx> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
+
+    namespace color_detail
+    {
+        __constant__ float c_RGB2XYZ_D65f[9] = { 0.412453f, 0.357580f, 0.180423f, 0.212671f, 0.715160f, 0.072169f, 0.019334f, 0.119193f, 0.950227f };
+        __constant__ int   c_RGB2XYZ_D65i[9] = { 1689, 1465, 739, 871, 2929, 296, 79, 488, 3892 };
+
+        template <int bidx, typename T, typename D> static __device__ __forceinline__ void RGB2XYZConvert(const T* src, D& dst)
+        {
+            dst.z = saturate_cast<T>(CV_DESCALE(src[bidx^2] * c_RGB2XYZ_D65i[6] + src[1] * c_RGB2XYZ_D65i[7] + src[bidx] * c_RGB2XYZ_D65i[8], xyz_shift));
+            dst.x = saturate_cast<T>(CV_DESCALE(src[bidx^2] * c_RGB2XYZ_D65i[0] + src[1] * c_RGB2XYZ_D65i[1] + src[bidx] * c_RGB2XYZ_D65i[2], xyz_shift));
+            dst.y = saturate_cast<T>(CV_DESCALE(src[bidx^2] * c_RGB2XYZ_D65i[3] + src[1] * c_RGB2XYZ_D65i[4] + src[bidx] * c_RGB2XYZ_D65i[5], xyz_shift));
+        }
+
+        template <int bidx> static __device__ __forceinline__ uint RGB2XYZConvert(uint src)
+        {
+            const uint b = 0xffu & (src >> (bidx * 8));
+            const uint g = 0xffu & (src >> 8);
+            const uint r = 0xffu & (src >> ((bidx ^ 2) * 8));
+
+            const uint x = saturate_cast<uchar>(CV_DESCALE(r * c_RGB2XYZ_D65i[0] + g * c_RGB2XYZ_D65i[1] + b * c_RGB2XYZ_D65i[2], xyz_shift));
+            const uint y = saturate_cast<uchar>(CV_DESCALE(r * c_RGB2XYZ_D65i[3] + g * c_RGB2XYZ_D65i[4] + b * c_RGB2XYZ_D65i[5], xyz_shift));
+            const uint z = saturate_cast<uchar>(CV_DESCALE(r * c_RGB2XYZ_D65i[6] + g * c_RGB2XYZ_D65i[7] + b * c_RGB2XYZ_D65i[8], xyz_shift));
+
+            uint dst = 0;
+
+            dst |= x;
+            dst |= y << 8;
+            dst |= z << 16;
+
+            return dst;
+        }
+
+        template <int bidx, typename D> static __device__ __forceinline__ void RGB2XYZConvert(const float* src, D& dst)
+        {
+            dst.x = src[bidx^2] * c_RGB2XYZ_D65f[0] + src[1] * c_RGB2XYZ_D65f[1] + src[bidx] * c_RGB2XYZ_D65f[2];
+            dst.y = src[bidx^2] * c_RGB2XYZ_D65f[3] + src[1] * c_RGB2XYZ_D65f[4] + src[bidx] * c_RGB2XYZ_D65f[5];
+            dst.z = src[bidx^2] * c_RGB2XYZ_D65f[6] + src[1] * c_RGB2XYZ_D65f[7] + src[bidx] * c_RGB2XYZ_D65f[8];
+        }
+
+        template <typename T, int scn, int dcn, int bidx> struct RGB2XYZ
+            : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<T, dcn>::vec_type operator()(const typename TypeVec<T, scn>::vec_type& src) const
+            {
+                typename TypeVec<T, dcn>::vec_type dst;
+
+                RGB2XYZConvert<bidx>(&src.x, dst);
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ RGB2XYZ() {}
+            __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {}
+        };
+
+        template <int bidx> struct RGB2XYZ<uchar, 4, 4, bidx> : unary_function<uint, uint>
+        {
+            __device__ __forceinline__ uint operator()(uint src) const
+            {
+                return RGB2XYZConvert<bidx>(src);
+            }
+            __host__ __device__ __forceinline__ RGB2XYZ() {}
+            __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(name, scn, dcn, bidx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2XYZ<T, scn, dcn, bidx> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+    namespace color_detail
+    {
+        __constant__ float c_XYZ2sRGB_D65f[9] = { 3.240479f, -1.53715f, -0.498535f, -0.969256f, 1.875991f, 0.041556f, 0.055648f, -0.204043f, 1.057311f };
+        __constant__ int   c_XYZ2sRGB_D65i[9] = { 13273, -6296, -2042, -3970, 7684, 170, 228, -836, 4331 };
+
+        template <int bidx, typename T, typename D> static __device__ __forceinline__ void XYZ2RGBConvert(const T& src, D* dst)
+        {
+            dst[bidx^2] = saturate_cast<D>(CV_DESCALE(src.x * c_XYZ2sRGB_D65i[0] + src.y * c_XYZ2sRGB_D65i[1] + src.z * c_XYZ2sRGB_D65i[2], xyz_shift));
+            dst[1]      = saturate_cast<D>(CV_DESCALE(src.x * c_XYZ2sRGB_D65i[3] + src.y * c_XYZ2sRGB_D65i[4] + src.z * c_XYZ2sRGB_D65i[5], xyz_shift));
+            dst[bidx]   = saturate_cast<D>(CV_DESCALE(src.x * c_XYZ2sRGB_D65i[6] + src.y * c_XYZ2sRGB_D65i[7] + src.z * c_XYZ2sRGB_D65i[8], xyz_shift));
+        }
+
+        template <int bidx> static __device__ __forceinline__ uint XYZ2RGBConvert(uint src)
+        {
+            const int x = 0xff & src;
+            const int y = 0xff & (src >> 8);
+            const int z = 0xff & (src >> 16);
+
+            const uint r = saturate_cast<uchar>(CV_DESCALE(x * c_XYZ2sRGB_D65i[0] + y * c_XYZ2sRGB_D65i[1] + z * c_XYZ2sRGB_D65i[2], xyz_shift));
+            const uint g = saturate_cast<uchar>(CV_DESCALE(x * c_XYZ2sRGB_D65i[3] + y * c_XYZ2sRGB_D65i[4] + z * c_XYZ2sRGB_D65i[5], xyz_shift));
+            const uint b = saturate_cast<uchar>(CV_DESCALE(x * c_XYZ2sRGB_D65i[6] + y * c_XYZ2sRGB_D65i[7] + z * c_XYZ2sRGB_D65i[8], xyz_shift));
+
+            uint dst = 0xffu << 24;
+
+            dst |= b << (bidx * 8);
+            dst |= g << 8;
+            dst |= r << ((bidx ^ 2) * 8);
+
+            return dst;
+        }
+
+        template <int bidx, typename T> static __device__ __forceinline__ void XYZ2RGBConvert(const T& src, float* dst)
+        {
+            dst[bidx^2] = src.x * c_XYZ2sRGB_D65f[0] + src.y * c_XYZ2sRGB_D65f[1] + src.z * c_XYZ2sRGB_D65f[2];
+            dst[1]      = src.x * c_XYZ2sRGB_D65f[3] + src.y * c_XYZ2sRGB_D65f[4] + src.z * c_XYZ2sRGB_D65f[5];
+            dst[bidx]   = src.x * c_XYZ2sRGB_D65f[6] + src.y * c_XYZ2sRGB_D65f[7] + src.z * c_XYZ2sRGB_D65f[8];
+        }
+
+        template <typename T, int scn, int dcn, int bidx> struct XYZ2RGB
+            : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<T, dcn>::vec_type operator()(const typename TypeVec<T, scn>::vec_type& src) const
+            {
+                typename TypeVec<T, dcn>::vec_type dst;
+
+                XYZ2RGBConvert<bidx>(src, &dst.x);
+                setAlpha(dst, ColorChannel<T>::max());
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ XYZ2RGB() {}
+            __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {}
+        };
+
+        template <int bidx> struct XYZ2RGB<uchar, 4, 4, bidx> : unary_function<uint, uint>
+        {
+            __device__ __forceinline__ uint operator()(uint src) const
+            {
+                return XYZ2RGBConvert<bidx>(src);
+            }
+            __host__ __device__ __forceinline__ XYZ2RGB() {}
+            __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(name, scn, dcn, bidx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::XYZ2RGB<T, scn, dcn, bidx> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
+
+    namespace color_detail
+    {
+        __constant__ int c_HsvDivTable   [256] = {0, 1044480, 522240, 348160, 261120, 208896, 174080, 149211, 130560, 116053, 104448, 94953, 87040, 80345, 74606, 69632, 65280, 61440, 58027, 54973, 52224, 49737, 47476, 45412, 43520, 41779, 40172, 38684, 37303, 36017, 34816, 33693, 32640, 31651, 30720, 29842, 29013, 28229, 27486, 26782, 26112, 25475, 24869, 24290, 23738, 23211, 22706, 22223, 21760, 21316, 20890, 20480, 20086, 19707, 19342, 18991, 18651, 18324, 18008, 17703, 17408, 17123, 16846, 16579, 16320, 16069, 15825, 15589, 15360, 15137, 14921, 14711, 14507, 14308, 14115, 13926, 13743, 13565, 13391, 13221, 13056, 12895, 12738, 12584, 12434, 12288, 12145, 12006, 11869, 11736, 11605, 11478, 11353, 11231, 11111, 10995, 10880, 10768, 10658, 10550, 10445, 10341, 10240, 10141, 10043, 9947, 9854, 9761, 9671, 9582, 9495, 9410, 9326, 9243, 9162, 9082, 9004, 8927, 8852, 8777, 8704, 8632, 8561, 8492, 8423, 8356, 8290, 8224, 8160, 8097, 8034, 7973, 7913, 7853, 7795, 7737, 7680, 7624, 7569, 7514, 7461, 7408, 7355, 7304, 7253, 7203, 7154, 7105, 7057, 7010, 6963, 6917, 6872, 6827, 6782, 6739, 6695, 6653, 6611, 6569, 6528, 6487, 6447, 6408, 6369, 6330, 6292, 6254, 6217, 6180, 6144, 6108, 6073, 6037, 6003, 5968, 5935, 5901, 5868, 5835, 5803, 5771, 5739, 5708, 5677, 5646, 5615, 5585, 5556, 5526, 5497, 5468, 5440, 5412, 5384, 5356, 5329, 5302, 5275, 5249, 5222, 5196, 5171, 5145, 5120, 5095, 5070, 5046, 5022, 4998, 4974, 4950, 4927, 4904, 4881, 4858, 4836, 4813, 4791, 4769, 4748, 4726, 4705, 4684, 4663, 4642, 4622, 4601, 4581, 4561, 4541, 4522, 4502, 4483, 4464, 4445, 4426, 4407, 4389, 4370, 4352, 4334, 4316, 4298, 4281, 4263, 4246, 4229, 4212, 4195, 4178, 4161, 4145, 4128, 4112, 4096};
+        __constant__ int c_HsvDivTable180[256] = {0, 122880, 61440, 40960, 30720, 24576, 20480, 17554, 15360, 13653, 12288, 11171, 10240, 9452, 8777, 8192, 7680, 7228, 6827, 6467, 6144, 5851, 5585, 5343, 5120, 4915, 4726, 4551, 4389, 4237, 4096, 3964, 3840, 3724, 3614, 3511, 3413, 3321, 3234, 3151, 3072, 2997, 2926, 2858, 2793, 2731, 2671, 2614, 2560, 2508, 2458, 2409, 2363, 2318, 2276, 2234, 2194, 2156, 2119, 2083, 2048, 2014, 1982, 1950, 1920, 1890, 1862, 1834, 1807, 1781, 1755, 1731, 1707, 1683, 1661, 1638, 1617, 1596, 1575, 1555, 1536, 1517, 1499, 1480, 1463, 1446, 1429, 1412, 1396, 1381, 1365, 1350, 1336, 1321, 1307, 1293, 1280, 1267, 1254, 1241, 1229, 1217, 1205, 1193, 1182, 1170, 1159, 1148, 1138, 1127, 1117, 1107, 1097, 1087, 1078, 1069, 1059, 1050, 1041, 1033, 1024, 1016, 1007, 999, 991, 983, 975, 968, 960, 953, 945, 938, 931, 924, 917, 910, 904, 897, 890, 884, 878, 871, 865, 859, 853, 847, 842, 836, 830, 825, 819, 814, 808, 803, 798, 793, 788, 783, 778, 773, 768, 763, 759, 754, 749, 745, 740, 736, 731, 727, 723, 719, 714, 710, 706, 702, 698, 694, 690, 686, 683, 679, 675, 671, 668, 664, 661, 657, 654, 650, 647, 643, 640, 637, 633, 630, 627, 624, 621, 617, 614, 611, 608, 605, 602, 599, 597, 594, 591, 588, 585, 582, 580, 577, 574, 572, 569, 566, 564, 561, 559, 556, 554, 551, 549, 546, 544, 541, 539, 537, 534, 532, 530, 527, 525, 523, 521, 518, 516, 514, 512, 510, 508, 506, 504, 502, 500, 497, 495, 493, 492, 490, 488, 486, 484, 482};
+        __constant__ int c_HsvDivTable256[256] = {0, 174763, 87381, 58254, 43691, 34953, 29127, 24966, 21845, 19418, 17476, 15888, 14564, 13443, 12483, 11651, 10923, 10280, 9709, 9198, 8738, 8322, 7944, 7598, 7282, 6991, 6722, 6473, 6242, 6026, 5825, 5638, 5461, 5296, 5140, 4993, 4855, 4723, 4599, 4481, 4369, 4263, 4161, 4064, 3972, 3884, 3799, 3718, 3641, 3567, 3495, 3427, 3361, 3297, 3236, 3178, 3121, 3066, 3013, 2962, 2913, 2865, 2819, 2774, 2731, 2689, 2648, 2608, 2570, 2533, 2497, 2461, 2427, 2394, 2362, 2330, 2300, 2270, 2241, 2212, 2185, 2158, 2131, 2106, 2081, 2056, 2032, 2009, 1986, 1964, 1942, 1920, 1900, 1879, 1859, 1840, 1820, 1802, 1783, 1765, 1748, 1730, 1713, 1697, 1680, 1664, 1649, 1633, 1618, 1603, 1589, 1574, 1560, 1547, 1533, 1520, 1507, 1494, 1481, 1469, 1456, 1444, 1432, 1421, 1409, 1398, 1387, 1376, 1365, 1355, 1344, 1334, 1324, 1314, 1304, 1295, 1285, 1276, 1266, 1257, 1248, 1239, 1231, 1222, 1214, 1205, 1197, 1189, 1181, 1173, 1165, 1157, 1150, 1142, 1135, 1128, 1120, 1113, 1106, 1099, 1092, 1085, 1079, 1072, 1066, 1059, 1053, 1046, 1040, 1034, 1028, 1022, 1016, 1010, 1004, 999, 993, 987, 982, 976, 971, 966, 960, 955, 950, 945, 940, 935, 930, 925, 920, 915, 910, 906, 901, 896, 892, 887, 883, 878, 874, 869, 865, 861, 857, 853, 848, 844, 840, 836, 832, 828, 824, 820, 817, 813, 809, 805, 802, 798, 794, 791, 787, 784, 780, 777, 773, 770, 767, 763, 760, 757, 753, 750, 747, 744, 741, 737, 734, 731, 728, 725, 722, 719, 716, 713, 710, 708, 705, 702, 699, 696, 694, 691, 688, 685};
+
+        template <int bidx, int hr, typename D> static __device__ void RGB2HSVConvert(const uchar* src, D& dst)
+        {
+            const int hsv_shift = 12;
+            const int* hdiv_table = hr == 180 ? c_HsvDivTable180 : c_HsvDivTable256;
+
+            int b = src[bidx], g = src[1], r = src[bidx^2];
+            int h, s, v = b;
+            int vmin = b, diff;
+            int vr, vg;
+
+            v = ::max(v, g);
+            v = ::max(v, r);
+            vmin = ::min(vmin, g);
+            vmin = ::min(vmin, r);
+
+            diff = v - vmin;
+            vr = (v == r) * -1;
+            vg = (v == g) * -1;
+
+            s = (diff * c_HsvDivTable[v] + (1 << (hsv_shift-1))) >> hsv_shift;
+            h = (vr & (g - b)) + (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
+            h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
+            h += (h < 0) * hr;
+
+            dst.x = saturate_cast<uchar>(h);
+            dst.y = (uchar)s;
+            dst.z = (uchar)v;
+        }
+
+        template <int bidx, int hr> static __device__ uint RGB2HSVConvert(uint src)
+        {
+            const int hsv_shift = 12;
+            const int* hdiv_table = hr == 180 ? c_HsvDivTable180 : c_HsvDivTable256;
+
+            const int b = 0xff & (src >> (bidx * 8));
+            const int g = 0xff & (src >> 8);
+            const int r = 0xff & (src >> ((bidx ^ 2) * 8));
+
+            int h, s, v = b;
+            int vmin = b, diff;
+            int vr, vg;
+
+            v = ::max(v, g);
+            v = ::max(v, r);
+            vmin = ::min(vmin, g);
+            vmin = ::min(vmin, r);
+
+            diff = v - vmin;
+            vr = (v == r) * -1;
+            vg = (v == g) * -1;
+
+            s = (diff * c_HsvDivTable[v] + (1 << (hsv_shift-1))) >> hsv_shift;
+            h = (vr & (g - b)) + (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
+            h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
+            h += (h < 0) * hr;
+
+            uint dst = 0;
+
+            dst |= saturate_cast<uchar>(h);
+            dst |= (0xffu & s) << 8;
+            dst |= (0xffu & v) << 16;
+
+            return dst;
+        }
+
+        template <int bidx, int hr, typename D> static __device__ void RGB2HSVConvert(const float* src, D& dst)
+        {
+            const float hscale = hr * (1.f / 360.f);
+
+            float b = src[bidx], g = src[1], r = src[bidx^2];
+            float h, s, v;
+
+            float vmin, diff;
+
+            v = vmin = r;
+            v = fmax(v, g);
+            v = fmax(v, b);
+            vmin = fmin(vmin, g);
+            vmin = fmin(vmin, b);
+
+            diff = v - vmin;
+            s = diff / (float)(::fabs(v) + numeric_limits<float>::epsilon());
+            diff = (float)(60. / (diff + numeric_limits<float>::epsilon()));
+
+            h  = (v == r) * (g - b) * diff;
+            h += (v != r && v == g) * ((b - r) * diff + 120.f);
+            h += (v != r && v != g) * ((r - g) * diff + 240.f);
+            h += (h < 0) * 360.f;
+
+            dst.x = h * hscale;
+            dst.y = s;
+            dst.z = v;
+        }
+
+        template <typename T, int scn, int dcn, int bidx, int hr> struct RGB2HSV
+            : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<T, dcn>::vec_type operator()(const typename TypeVec<T, scn>::vec_type& src) const
+            {
+                typename TypeVec<T, dcn>::vec_type dst;
+
+                RGB2HSVConvert<bidx, hr>(&src.x, dst);
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ RGB2HSV() {}
+            __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {}
+        };
+
+        template <int bidx, int hr> struct RGB2HSV<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
+        {
+            __device__ __forceinline__ uint operator()(uint src) const
+            {
+                return RGB2HSVConvert<bidx, hr>(src);
+            }
+            __host__ __device__ __forceinline__ RGB2HSV() {}
+            __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(name, scn, dcn, bidx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2HSV<T, scn, dcn, bidx, 180> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    }; \
+    template <typename T> struct name ## _full_traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2HSV<T, scn, dcn, bidx, 256> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    }; \
+    template <> struct name ## _traits<float> \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    }; \
+    template <> struct name ## _full_traits<float> \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+    namespace color_detail
+    {
+        __constant__ int c_HsvSectorData[6][3] = { {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0} };
+
+        template <int bidx, int hr, typename T> static __device__ void HSV2RGBConvert(const T& src, float* dst)
+        {
+            const float hscale = 6.f / hr;
+
+            float h = src.x, s = src.y, v = src.z;
+            float b = v, g = v, r = v;
+
+            if (s != 0)
+            {
+                h *= hscale;
+
+                if( h < 0 )
+                    do h += 6; while( h < 0 );
+                else if( h >= 6 )
+                    do h -= 6; while( h >= 6 );
+
+                int sector = __float2int_rd(h);
+                h -= sector;
+
+                if ( (unsigned)sector >= 6u )
+                {
+                    sector = 0;
+                    h = 0.f;
+                }
+
+                float tab[4];
+                tab[0] = v;
+                tab[1] = v * (1.f - s);
+                tab[2] = v * (1.f - s * h);
+                tab[3] = v * (1.f - s * (1.f - h));
+
+                b = tab[c_HsvSectorData[sector][0]];
+                g = tab[c_HsvSectorData[sector][1]];
+                r = tab[c_HsvSectorData[sector][2]];
+            }
+
+            dst[bidx] = b;
+            dst[1] = g;
+            dst[bidx^2] = r;
+        }
+
+        template <int bidx, int HR, typename T> static __device__ void HSV2RGBConvert(const T& src, uchar* dst)
+        {
+            float3 buf;
+
+            buf.x = src.x;
+            buf.y = src.y * (1.f / 255.f);
+            buf.z = src.z * (1.f / 255.f);
+
+            HSV2RGBConvert<bidx, HR>(buf, &buf.x);
+
+            dst[0] = saturate_cast<uchar>(buf.x * 255.f);
+            dst[1] = saturate_cast<uchar>(buf.y * 255.f);
+            dst[2] = saturate_cast<uchar>(buf.z * 255.f);
+        }
+
+        template <int bidx, int hr> static __device__ uint HSV2RGBConvert(uint src)
+        {
+            float3 buf;
+
+            buf.x = src & 0xff;
+            buf.y = ((src >> 8) & 0xff) * (1.f/255.f);
+            buf.z = ((src >> 16) & 0xff) * (1.f/255.f);
+
+            HSV2RGBConvert<bidx, hr>(buf, &buf.x);
+
+            uint dst = 0xffu << 24;
+
+            dst |= saturate_cast<uchar>(buf.x * 255.f);
+            dst |= saturate_cast<uchar>(buf.y * 255.f) << 8;
+            dst |= saturate_cast<uchar>(buf.z * 255.f) << 16;
+
+            return dst;
+        }
+
+        template <typename T, int scn, int dcn, int bidx, int hr> struct HSV2RGB
+            : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<T, dcn>::vec_type operator()(const typename TypeVec<T, scn>::vec_type& src) const
+            {
+                typename TypeVec<T, dcn>::vec_type dst;
+
+                HSV2RGBConvert<bidx, hr>(src, &dst.x);
+                setAlpha(dst, ColorChannel<T>::max());
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ HSV2RGB() {}
+            __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {}
+        };
+
+        template <int bidx, int hr> struct HSV2RGB<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
+        {
+            __device__ __forceinline__ uint operator()(uint src) const
+            {
+                return HSV2RGBConvert<bidx, hr>(src);
+            }
+            __host__ __device__ __forceinline__ HSV2RGB() {}
+            __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(name, scn, dcn, bidx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::HSV2RGB<T, scn, dcn, bidx, 180> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    }; \
+    template <typename T> struct name ## _full_traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::HSV2RGB<T, scn, dcn, bidx, 255> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    }; \
+    template <> struct name ## _traits<float> \
+    { \
+        typedef ::cv::cuda::device::color_detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    }; \
+    template <> struct name ## _full_traits<float> \
+    { \
+        typedef ::cv::cuda::device::color_detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+/////////////////////////////////////// RGB <-> HLS ////////////////////////////////////////
+
+    namespace color_detail
+    {
+        template <int bidx, int hr, typename D> static __device__ void RGB2HLSConvert(const float* src, D& dst)
+        {
+            const float hscale = hr * (1.f / 360.f);
+
+            float b = src[bidx], g = src[1], r = src[bidx^2];
+            float h = 0.f, s = 0.f, l;
+            float vmin, vmax, diff;
+
+            vmax = vmin = r;
+            vmax = fmax(vmax, g);
+            vmax = fmax(vmax, b);
+            vmin = fmin(vmin, g);
+            vmin = fmin(vmin, b);
+
+            diff = vmax - vmin;
+            l = (vmax + vmin) * 0.5f;
+
+            if (diff > numeric_limits<float>::epsilon())
+            {
+                s = (l < 0.5f) * diff / (vmax + vmin);
+                s += (l >= 0.5f) * diff / (2.0f - vmax - vmin);
+
+                diff = 60.f / diff;
+
+                h  = (vmax == r) * (g - b) * diff;
+                h += (vmax != r && vmax == g) * ((b - r) * diff + 120.f);
+                h += (vmax != r && vmax != g) * ((r - g) * diff + 240.f);
+                h += (h < 0.f) * 360.f;
+            }
+
+            dst.x = h * hscale;
+            dst.y = l;
+            dst.z = s;
+        }
+
+        template <int bidx, int hr, typename D> static __device__ void RGB2HLSConvert(const uchar* src, D& dst)
+        {
+            float3 buf;
+
+            buf.x = src[0] * (1.f / 255.f);
+            buf.y = src[1] * (1.f / 255.f);
+            buf.z = src[2] * (1.f / 255.f);
+
+            RGB2HLSConvert<bidx, hr>(&buf.x, buf);
+
+            dst.x = saturate_cast<uchar>(buf.x);
+            dst.y = saturate_cast<uchar>(buf.y*255.f);
+            dst.z = saturate_cast<uchar>(buf.z*255.f);
+        }
+
+        template <int bidx, int hr> static __device__ uint RGB2HLSConvert(uint src)
+        {
+            float3 buf;
+
+            buf.x = (0xff & src) * (1.f / 255.f);
+            buf.y = (0xff & (src >> 8)) * (1.f / 255.f);
+            buf.z = (0xff & (src >> 16)) * (1.f / 255.f);
+
+            RGB2HLSConvert<bidx, hr>(&buf.x, buf);
+
+            uint dst = 0xffu << 24;
+
+            dst |= saturate_cast<uchar>(buf.x);
+            dst |= saturate_cast<uchar>(buf.y * 255.f) << 8;
+            dst |= saturate_cast<uchar>(buf.z * 255.f) << 16;
+
+            return dst;
+        }
+
+        template <typename T, int scn, int dcn, int bidx, int hr> struct RGB2HLS
+            : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<T, dcn>::vec_type operator()(const typename TypeVec<T, scn>::vec_type& src) const
+            {
+                typename TypeVec<T, dcn>::vec_type dst;
+
+                RGB2HLSConvert<bidx, hr>(&src.x, dst);
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ RGB2HLS() {}
+            __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {}
+        };
+
+        template <int bidx, int hr> struct RGB2HLS<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
+        {
+            __device__ __forceinline__ uint operator()(uint src) const
+            {
+                return RGB2HLSConvert<bidx, hr>(src);
+            }
+            __host__ __device__ __forceinline__ RGB2HLS() {}
+            __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(name, scn, dcn, bidx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2HLS<T, scn, dcn, bidx, 180> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    }; \
+    template <typename T> struct name ## _full_traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2HLS<T, scn, dcn, bidx, 256> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    }; \
+    template <> struct name ## _traits<float> \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    }; \
+    template <> struct name ## _full_traits<float> \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+    namespace color_detail
+    {
+        __constant__ int c_HlsSectorData[6][3] = { {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0} };
+
+        template <int bidx, int hr, typename T> static __device__ void HLS2RGBConvert(const T& src, float* dst)
+        {
+            const float hscale = 6.0f / hr;
+
+            float h = src.x, l = src.y, s = src.z;
+            float b = l, g = l, r = l;
+
+            if (s != 0)
+            {
+                float p2  = (l <= 0.5f) * l * (1 + s);
+                      p2 += (l > 0.5f) * (l + s - l * s);
+                float p1 = 2 * l - p2;
+
+                h *= hscale;
+
+                if( h < 0 )
+                    do h += 6; while( h < 0 );
+                else if( h >= 6 )
+                    do h -= 6; while( h >= 6 );
+
+                int sector;
+                sector = __float2int_rd(h);
+
+                h -= sector;
+
+                float tab[4];
+                tab[0] = p2;
+                tab[1] = p1;
+                tab[2] = p1 + (p2 - p1) * (1 - h);
+                tab[3] = p1 + (p2 - p1) * h;
+
+                b = tab[c_HlsSectorData[sector][0]];
+                g = tab[c_HlsSectorData[sector][1]];
+                r = tab[c_HlsSectorData[sector][2]];
+            }
+
+            dst[bidx] = b;
+            dst[1] = g;
+            dst[bidx^2] = r;
+        }
+
+        template <int bidx, int hr, typename T> static __device__ void HLS2RGBConvert(const T& src, uchar* dst)
+        {
+            float3 buf;
+
+            buf.x = src.x;
+            buf.y = src.y * (1.f / 255.f);
+            buf.z = src.z * (1.f / 255.f);
+
+            HLS2RGBConvert<bidx, hr>(buf, &buf.x);
+
+            dst[0] = saturate_cast<uchar>(buf.x * 255.f);
+            dst[1] = saturate_cast<uchar>(buf.y * 255.f);
+            dst[2] = saturate_cast<uchar>(buf.z * 255.f);
+        }
+
+        template <int bidx, int hr> static __device__ uint HLS2RGBConvert(uint src)
+        {
+            float3 buf;
+
+            buf.x = 0xff & src;
+            buf.y = (0xff & (src >> 8)) * (1.f / 255.f);
+            buf.z = (0xff & (src >> 16)) * (1.f / 255.f);
+
+            HLS2RGBConvert<bidx, hr>(buf, &buf.x);
+
+            uint dst = 0xffu << 24;
+
+            dst |= saturate_cast<uchar>(buf.x * 255.f);
+            dst |= saturate_cast<uchar>(buf.y * 255.f) << 8;
+            dst |= saturate_cast<uchar>(buf.z * 255.f) << 16;
+
+            return dst;
+        }
+
+        template <typename T, int scn, int dcn, int bidx, int hr> struct HLS2RGB
+            : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<T, dcn>::vec_type operator()(const typename TypeVec<T, scn>::vec_type& src) const
+            {
+                typename TypeVec<T, dcn>::vec_type dst;
+
+                HLS2RGBConvert<bidx, hr>(src, &dst.x);
+                setAlpha(dst, ColorChannel<T>::max());
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ HLS2RGB() {}
+            __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {}
+        };
+
+        template <int bidx, int hr> struct HLS2RGB<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
+        {
+            __device__ __forceinline__ uint operator()(uint src) const
+            {
+                return HLS2RGBConvert<bidx, hr>(src);
+            }
+            __host__ __device__ __forceinline__ HLS2RGB() {}
+            __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(name, scn, dcn, bidx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::HLS2RGB<T, scn, dcn, bidx, 180> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    }; \
+    template <typename T> struct name ## _full_traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::HLS2RGB<T, scn, dcn, bidx, 255> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    }; \
+    template <> struct name ## _traits<float> \
+    { \
+        typedef ::cv::cuda::device::color_detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    }; \
+    template <> struct name ## _full_traits<float> \
+    { \
+        typedef ::cv::cuda::device::color_detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+///////////////////////////////////// RGB <-> Lab /////////////////////////////////////
+
+    namespace color_detail
+    {
+        enum
+        {
+            LAB_CBRT_TAB_SIZE = 1024,
+            GAMMA_TAB_SIZE = 1024,
+            lab_shift = xyz_shift,
+            gamma_shift = 3,
+            lab_shift2 = (lab_shift + gamma_shift),
+            LAB_CBRT_TAB_SIZE_B = (256 * 3 / 2 * (1 << gamma_shift))
+        };
+
+        __constant__ ushort c_sRGBGammaTab_b[] = {0,1,1,2,2,3,4,4,5,6,6,7,8,8,9,10,11,11,12,13,14,15,16,17,19,20,21,22,24,25,26,28,29,31,33,34,36,38,40,41,43,45,47,49,51,54,56,58,60,63,65,68,70,73,75,78,81,83,86,89,92,95,98,101,105,108,111,115,118,121,125,129,132,136,140,144,147,151,155,160,164,168,172,176,181,185,190,194,199,204,209,213,218,223,228,233,239,244,249,255,260,265,271,277,282,288,294,300,306,312,318,324,331,337,343,350,356,363,370,376,383,390,397,404,411,418,426,433,440,448,455,463,471,478,486,494,502,510,518,527,535,543,552,560,569,578,586,595,604,613,622,631,641,650,659,669,678,688,698,707,717,727,737,747,757,768,778,788,799,809,820,831,842,852,863,875,886,897,908,920,931,943,954,966,978,990,1002,1014,1026,1038,1050,1063,1075,1088,1101,1113,1126,1139,1152,1165,1178,1192,1205,1218,1232,1245,1259,1273,1287,1301,1315,1329,1343,1357,1372,1386,1401,1415,1430,1445,1460,1475,1490,1505,1521,1536,1551,1567,1583,1598,1614,1630,1646,1662,1678,1695,1711,1728,1744,1761,1778,1794,1811,1828,1846,1863,1880,1897,1915,1933,1950,1968,1986,2004,2022,2040};
+
+        __device__ __forceinline__ int LabCbrt_b(int i)
+        {
+            float x = i * (1.f / (255.f * (1 << gamma_shift)));
+            return (1 << lab_shift2) * (x < 0.008856f ? x * 7.787f + 0.13793103448275862f : ::cbrtf(x));
+        }
+
+        template <bool srgb, int blueIdx, typename T, typename D>
+        __device__ __forceinline__ void RGB2LabConvert_b(const T& src, D& dst)
+        {
+            const int Lscale = (116 * 255 + 50) / 100;
+            const int Lshift = -((16 * 255 * (1 << lab_shift2) + 50) / 100);
+
+            int B = blueIdx == 0 ? src.x : src.z;
+            int G = src.y;
+            int R = blueIdx == 0 ? src.z : src.x;
+
+            if (srgb)
+            {
+                B = c_sRGBGammaTab_b[B];
+                G = c_sRGBGammaTab_b[G];
+                R = c_sRGBGammaTab_b[R];
+            }
+            else
+            {
+                B <<= 3;
+                G <<= 3;
+                R <<= 3;
+            }
+
+            int fX = LabCbrt_b(CV_DESCALE(B * 778 + G * 1541 + R * 1777, lab_shift));
+            int fY = LabCbrt_b(CV_DESCALE(B * 296 + G * 2929 + R * 871, lab_shift));
+            int fZ = LabCbrt_b(CV_DESCALE(B * 3575 + G * 448 + R * 73, lab_shift));
+
+            int L = CV_DESCALE(Lscale * fY + Lshift, lab_shift2);
+            int a = CV_DESCALE(500 * (fX - fY) + 128 * (1 << lab_shift2), lab_shift2);
+            int b = CV_DESCALE(200 * (fY - fZ) + 128 * (1 << lab_shift2), lab_shift2);
+
+            dst.x = saturate_cast<uchar>(L);
+            dst.y = saturate_cast<uchar>(a);
+            dst.z = saturate_cast<uchar>(b);
+        }
+
+        __device__ __forceinline__ float splineInterpolate(float x, const float* tab, int n)
+        {
+            int ix = ::min(::max(int(x), 0), n-1);
+            x -= ix;
+            tab += ix * 4;
+            return ((tab[3] * x + tab[2]) * x + tab[1]) * x + tab[0];
+        }
+
+        __constant__ float c_sRGBGammaTab[] = {0,7.55853e-05,0.,-7.51331e-13,7.55853e-05,7.55853e-05,-2.25399e-12,3.75665e-12,0.000151171,7.55853e-05,9.01597e-12,-6.99932e-12,0.000226756,7.55853e-05,-1.1982e-11,2.41277e-12,0.000302341,7.55853e-05,-4.74369e-12,1.19001e-11,0.000377927,7.55853e-05,3.09568e-11,-2.09095e-11,0.000453512,7.55853e-05,-3.17718e-11,1.35303e-11,0.000529097,7.55853e-05,8.81905e-12,-4.10782e-12,0.000604683,7.55853e-05,-3.50439e-12,2.90097e-12,0.000680268,7.55853e-05,5.19852e-12,-7.49607e-12,0.000755853,7.55853e-05,-1.72897e-11,2.70833e-11,0.000831439,7.55854e-05,6.39602e-11,-4.26295e-11,0.000907024,7.55854e-05,-6.39282e-11,2.70193e-11,0.000982609,7.55853e-05,1.71298e-11,-7.24017e-12,0.00105819,7.55853e-05,-4.59077e-12,1.94137e-12,0.00113378,7.55853e-05,1.23333e-12,-5.25291e-13,0.00120937,7.55853e-05,-3.42545e-13,1.59799e-13,0.00128495,7.55853e-05,1.36852e-13,-1.13904e-13,0.00136054,7.55853e-05,-2.04861e-13,2.95818e-13,0.00143612,7.55853e-05,6.82594e-13,-1.06937e-12,0.00151171,7.55853e-05,-2.52551e-12,3.98166e-12,0.00158729,7.55853e-05,9.41946e-12,-1.48573e-11,0.00166288,7.55853e-05,-3.51523e-11,5.54474e-11,0.00173846,7.55854e-05,1.3119e-10,-9.0517e-11,0.00181405,7.55854e-05,-1.40361e-10,7.37899e-11,0.00188963,7.55853e-05,8.10085e-11,-8.82272e-11,0.00196522,7.55852e-05,-1.83673e-10,1.62704e-10,0.0020408,7.55853e-05,3.04438e-10,-2.13341e-10,0.00211639,7.55853e-05,-3.35586e-10,2.25e-10,0.00219197,7.55853e-05,3.39414e-10,-2.20997e-10,0.00226756,7.55853e-05,-3.23576e-10,1.93326e-10,0.00234315,7.55853e-05,2.564e-10,-8.66446e-11,0.00241873,7.55855e-05,-3.53328e-12,-7.9578e-11,0.00249432,7.55853e-05,-2.42267e-10,1.72126e-10,0.0025699,7.55853e-05,2.74111e-10,-1.43265e-10,0.00264549,7.55854e-05,-1.55683e-10,-6.47292e-11,0.00272107,7.55849e-05,-3.4987e-10,8.67842e-10,0.00279666,7.55868e-05,2.25366e-09,-3.8723e-09,0.00287224,7.55797e-05,-9.36325e-09,1.5087e-08,0.00294783,7.56063e-05,3.58978e-08,-5.69415e-08,0.00302341,7.55072e-05,-1.34927e-07,2.13144e-07,0.003099,7.58768e-05,5.04507e-07,1.38713e-07,0.00317552,7.7302e-05,9.20646e-07,-1.55186e-07,0.00325359,7.86777e-05,4.55087e-07,4.26813e-08,0.00333276,7.97159e-05,5.83131e-07,-1.06495e-08,0.00341305,8.08502e-05,5.51182e-07,3.87467e-09,0.00349446,8.19642e-05,5.62806e-07,-1.92586e-10,0.00357698,8.30892e-05,5.62228e-07,1.0866e-09,0.00366063,8.4217e-05,5.65488e-07,5.02818e-10,0.00374542,8.53494e-05,5.66997e-07,8.60211e-10,0.00383133,8.6486e-05,5.69577e-07,7.13044e-10,0.00391839,8.76273e-05,5.71716e-07,4.78527e-10,0.00400659,8.87722e-05,5.73152e-07,1.09818e-09,0.00409594,8.99218e-05,5.76447e-07,2.50964e-10,0.00418644,9.10754e-05,5.772e-07,1.15762e-09,0.00427809,9.22333e-05,5.80672e-07,2.40865e-10,0.0043709,9.33954e-05,5.81395e-07,1.13854e-09,0.00446488,9.45616e-05,5.84811e-07,3.27267e-10,0.00456003,9.57322e-05,5.85792e-07,8.1197e-10,0.00465635,9.69062e-05,5.88228e-07,6.15823e-10,0.00475384,9.80845e-05,5.90076e-07,9.15747e-10,0.00485252,9.92674e-05,5.92823e-07,3.778e-10,0.00495238,0.000100454,5.93956e-07,8.32623e-10,0.00505343,0.000101645,5.96454e-07,4.82695e-10,0.00515567,0.000102839,5.97902e-07,9.61904e-10,0.00525911,0.000104038,6.00788e-07,3.26281e-10,0.00536375,0.00010524,6.01767e-07,9.926e-10,0.00546959,0.000106447,6.04745e-07,3.59933e-10,0.00557664,0.000107657,6.05824e-07,8.2728e-10,0.0056849,0.000108871,6.08306e-07,5.21898e-10,0.00579438,0.00011009,6.09872e-07,8.10492e-10,0.00590508,0.000111312,6.12303e-07,4.27046e-10,0.00601701,0.000112538,6.13585e-07,7.40878e-10,0.00613016,0.000113767,6.15807e-07,8.00469e-10,0.00624454,0.000115001,6.18209e-07,2.48178e-10,0.00636016,0.000116238,6.18953e-07,1.00073e-09,0.00647702,0.000117479,6.21955e-07,4.05654e-10,0.00659512,0.000118724,6.23172e-07,6.36192e-10,0.00671447,0.000119973,6.25081e-07,7.74927e-10,0.00683507,0.000121225,6.27406e-07,4.54975e-10,0.00695692,0.000122481,6.28771e-07,6.64841e-10,0.00708003,0.000123741,6.30765e-07,6.10972e-10,0.00720441,0.000125004,6.32598e-07,6.16543e-10,0.00733004,0.000126271,6.34448e-07,6.48204e-10,0.00745695,0.000127542,6.36392e-07,5.15835e-10,0.00758513,0.000128816,6.3794e-07,5.48103e-10,0.00771458,0.000130094,6.39584e-07,1.01706e-09,0.00784532,0.000131376,6.42635e-07,4.0283e-11,0.00797734,0.000132661,6.42756e-07,6.84471e-10,0.00811064,0.000133949,6.4481e-07,9.47144e-10,0.00824524,0.000135241,6.47651e-07,1.83472e-10,0.00838112,0.000136537,6.48201e-07,1.11296e-09,0.00851831,0.000137837,6.5154e-07,2.13163e-11,0.0086568,0.00013914,6.51604e-07,6.64462e-10,0.00879659,0.000140445,6.53598e-07,1.04613e-09,0.00893769,0.000141756,6.56736e-07,-1.92377e-10,0.0090801,0.000143069,6.56159e-07,1.58601e-09,0.00922383,0.000144386,6.60917e-07,-5.63754e-10,0.00936888,0.000145706,6.59226e-07,1.60033e-09,0.00951524,0.000147029,6.64027e-07,-2.49543e-10,0.00966294,0.000148356,6.63278e-07,1.26043e-09,0.00981196,0.000149687,6.67059e-07,-1.35572e-10,0.00996231,0.00015102,6.66653e-07,1.14458e-09,0.010114,0.000152357,6.70086e-07,2.13864e-10,0.010267,0.000153698,6.70728e-07,7.93856e-10,0.0104214,0.000155042,6.73109e-07,3.36077e-10,0.0105771,0.000156389,6.74118e-07,6.55765e-10,0.0107342,0.000157739,6.76085e-07,7.66211e-10,0.0108926,0.000159094,6.78384e-07,4.66116e-12,0.0110524,0.000160451,6.78398e-07,1.07775e-09,0.0112135,0.000161811,6.81631e-07,3.41023e-10,0.011376,0.000163175,6.82654e-07,3.5205e-10,0.0115398,0.000164541,6.8371e-07,1.04473e-09,0.0117051,0.000165912,6.86844e-07,1.25757e-10,0.0118717,0.000167286,6.87222e-07,3.14818e-10,0.0120396,0.000168661,6.88166e-07,1.40886e-09,0.012209,0.000170042,6.92393e-07,-3.62244e-10,0.0123797,0.000171425,6.91306e-07,9.71397e-10,0.0125518,0.000172811,6.9422e-07,2.02003e-10,0.0127253,0.0001742,6.94826e-07,1.01448e-09,0.0129002,0.000175593,6.97869e-07,3.96653e-10,0.0130765,0.00017699,6.99059e-07,1.92927e-10,0.0132542,0.000178388,6.99638e-07,6.94305e-10,0.0134333,0.00017979,7.01721e-07,7.55108e-10,0.0136138,0.000181195,7.03986e-07,1.05918e-11,0.0137957,0.000182603,7.04018e-07,1.06513e-09,0.013979,0.000184015,7.07214e-07,3.85512e-10,0.0141637,0.00018543,7.0837e-07,1.86769e-10,0.0143499,0.000186848,7.0893e-07,7.30116e-10,0.0145374,0.000188268,7.11121e-07,6.17983e-10,0.0147264,0.000189692,7.12975e-07,5.23282e-10,0.0149168,0.000191119,7.14545e-07,8.28398e-11,0.0151087,0.000192549,7.14793e-07,1.0081e-09,0.0153019,0.000193981,7.17817e-07,5.41244e-10,0.0154966,0.000195418,7.19441e-07,-3.7907e-10,0.0156928,0.000196856,7.18304e-07,1.90641e-09,0.0158903,0.000198298,7.24023e-07,-7.27387e-10,0.0160893,0.000199744,7.21841e-07,1.00317e-09,0.0162898,0.000201191,7.24851e-07,4.39949e-10,0.0164917,0.000202642,7.2617e-07,9.6234e-10,0.0166951,0.000204097,7.29057e-07,-5.64019e-10,0.0168999,0.000205554,7.27365e-07,1.29374e-09,0.0171062,0.000207012,7.31247e-07,9.77025e-10,0.017314,0.000208478,7.34178e-07,-1.47651e-09,0.0175232,0.000209942,7.29748e-07,3.06636e-09,0.0177338,0.00021141,7.38947e-07,-1.47573e-09,0.017946,0.000212884,7.3452e-07,9.7386e-10,0.0181596,0.000214356,7.37442e-07,1.30562e-09,0.0183747,0.000215835,7.41358e-07,-6.08376e-10,0.0185913,0.000217315,7.39533e-07,1.12785e-09,0.0188093,0.000218798,7.42917e-07,-1.77711e-10,0.0190289,0.000220283,7.42384e-07,1.44562e-09,0.0192499,0.000221772,7.46721e-07,-1.68825e-11,0.0194724,0.000223266,7.4667e-07,4.84533e-10,0.0196964,0.000224761,7.48124e-07,-5.85298e-11,0.0199219,0.000226257,7.47948e-07,1.61217e-09,0.0201489,0.000227757,7.52785e-07,-8.02136e-10,0.0203775,0.00022926,7.50378e-07,1.59637e-09,0.0206075,0.000230766,7.55167e-07,4.47168e-12,0.020839,0.000232276,7.55181e-07,2.48387e-10,0.021072,0.000233787,7.55926e-07,8.6474e-10,0.0213066,0.000235302,7.5852e-07,1.78299e-11,0.0215426,0.000236819,7.58573e-07,9.26567e-10,0.0217802,0.000238339,7.61353e-07,1.34529e-12,0.0220193,0.000239862,7.61357e-07,9.30659e-10,0.0222599,0.000241387,7.64149e-07,1.34529e-12,0.0225021,0.000242915,7.64153e-07,9.26567e-10,0.0227458,0.000244447,7.66933e-07,1.76215e-11,0.022991,0.00024598,7.66986e-07,8.65536e-10,0.0232377,0.000247517,7.69582e-07,2.45677e-10,0.023486,0.000249057,7.70319e-07,1.44193e-11,0.0237358,0.000250598,7.70363e-07,1.55918e-09,0.0239872,0.000252143,7.7504e-07,-6.63173e-10,0.0242401,0.000253691,7.73051e-07,1.09357e-09,0.0244946,0.000255241,7.76331e-07,1.41919e-11,0.0247506,0.000256793,7.76374e-07,7.12248e-10,0.0250082,0.000258348,7.78511e-07,8.62049e-10,0.0252673,0.000259908,7.81097e-07,-4.35061e-10,0.025528,0.000261469,7.79792e-07,8.7825e-10,0.0257902,0.000263031,7.82426e-07,6.47181e-10,0.0260541,0.000264598,7.84368e-07,2.58448e-10,0.0263194,0.000266167,7.85143e-07,1.81558e-10,0.0265864,0.000267738,7.85688e-07,8.78041e-10,0.0268549,0.000269312,7.88322e-07,3.15102e-11,0.027125,0.000270889,7.88417e-07,8.58525e-10,0.0273967,0.000272468,7.90992e-07,2.59812e-10,0.02767,0.000274051,7.91772e-07,-3.5224e-11,0.0279448,0.000275634,7.91666e-07,1.74377e-09,0.0282212,0.000277223,7.96897e-07,-1.35196e-09,0.0284992,0.000278813,7.92841e-07,1.80141e-09,0.0287788,0.000280404,7.98246e-07,-2.65629e-10,0.0290601,0.000281999,7.97449e-07,1.12374e-09,0.0293428,0.000283598,8.0082e-07,-5.04106e-10,0.0296272,0.000285198,7.99308e-07,8.92764e-10,0.0299132,0.000286799,8.01986e-07,6.58379e-10,0.0302008,0.000288405,8.03961e-07,1.98971e-10,0.0304901,0.000290014,8.04558e-07,4.08382e-10,0.0307809,0.000291624,8.05783e-07,3.01839e-11,0.0310733,0.000293236,8.05874e-07,1.33343e-09,0.0313673,0.000294851,8.09874e-07,2.2419e-10,0.031663,0.000296472,8.10547e-07,-3.67606e-10,0.0319603,0.000298092,8.09444e-07,1.24624e-09,0.0322592,0.000299714,8.13182e-07,-8.92025e-10,0.0325597,0.000301338,8.10506e-07,2.32183e-09,0.0328619,0.000302966,8.17472e-07,-9.44719e-10,0.0331657,0.000304598,8.14638e-07,1.45703e-09,0.0334711,0.000306232,8.19009e-07,-1.15805e-09,0.0337781,0.000307866,8.15535e-07,3.17507e-09,0.0340868,0.000309507,8.2506e-07,-4.09161e-09,0.0343971,0.000311145,8.12785e-07,5.74079e-09,0.0347091,0.000312788,8.30007e-07,-3.97034e-09,0.0350227,0.000314436,8.18096e-07,2.68985e-09,0.035338,0.00031608,8.26166e-07,6.61676e-10,0.0356549,0.000317734,8.28151e-07,-1.61123e-09,0.0359734,0.000319386,8.23317e-07,2.05786e-09,0.0362936,0.000321038,8.29491e-07,8.30388e-10,0.0366155,0.0003227,8.31982e-07,-1.65424e-09,0.036939,0.000324359,8.27019e-07,2.06129e-09,0.0372642,0.000326019,8.33203e-07,8.59719e-10,0.0375911,0.000327688,8.35782e-07,-1.77488e-09,0.0379196,0.000329354,8.30458e-07,2.51464e-09,0.0382498,0.000331023,8.38002e-07,-8.33135e-10,0.0385817,0.000332696,8.35502e-07,8.17825e-10,0.0389152,0.00033437,8.37956e-07,1.28718e-09,0.0392504,0.00033605,8.41817e-07,-2.2413e-09,0.0395873,0.000337727,8.35093e-07,3.95265e-09,0.0399258,0.000339409,8.46951e-07,-2.39332e-09,0.0402661,0.000341095,8.39771e-07,1.89533e-09,0.040608,0.000342781,8.45457e-07,-1.46271e-09,0.0409517,0.000344467,8.41069e-07,3.95554e-09,0.041297,0.000346161,8.52936e-07,-3.18369e-09,0.041644,0.000347857,8.43385e-07,1.32873e-09,0.0419927,0.000349548,8.47371e-07,1.59402e-09,0.0423431,0.000351248,8.52153e-07,-2.54336e-10,0.0426952,0.000352951,8.5139e-07,-5.76676e-10,0.043049,0.000354652,8.4966e-07,2.56114e-09,0.0434045,0.000356359,8.57343e-07,-2.21744e-09,0.0437617,0.000358067,8.50691e-07,2.58344e-09,0.0441206,0.000359776,8.58441e-07,-6.65826e-10,0.0444813,0.000361491,8.56444e-07,7.99218e-11,0.0448436,0.000363204,8.56684e-07,3.46063e-10,0.0452077,0.000364919,8.57722e-07,2.26116e-09,0.0455734,0.000366641,8.64505e-07,-1.94005e-09,0.045941,0.000368364,8.58685e-07,1.77384e-09,0.0463102,0.000370087,8.64007e-07,-1.43005e-09,0.0466811,0.000371811,8.59717e-07,3.94634e-09,0.0470538,0.000373542,8.71556e-07,-3.17946e-09,0.0474282,0.000375276,8.62017e-07,1.32104e-09,0.0478043,0.000377003,8.6598e-07,1.62045e-09,0.0481822,0.00037874,8.70842e-07,-3.52297e-10,0.0485618,0.000380481,8.69785e-07,-2.11211e-10,0.0489432,0.00038222,8.69151e-07,1.19716e-09,0.0493263,0.000383962,8.72743e-07,-8.52026e-10,0.0497111,0.000385705,8.70187e-07,2.21092e-09,0.0500977,0.000387452,8.76819e-07,-5.41339e-10,0.050486,0.000389204,8.75195e-07,-4.5361e-11,0.0508761,0.000390954,8.75059e-07,7.22669e-10,0.0512679,0.000392706,8.77227e-07,8.79936e-10,0.0516615,0.000394463,8.79867e-07,-5.17048e-10,0.0520568,0.000396222,8.78316e-07,1.18833e-09,0.0524539,0.000397982,8.81881e-07,-5.11022e-10,0.0528528,0.000399744,8.80348e-07,8.55683e-10,0.0532534,0.000401507,8.82915e-07,8.13562e-10,0.0536558,0.000403276,8.85356e-07,-3.84603e-10,0.05406,0.000405045,8.84202e-07,7.24962e-10,0.0544659,0.000406816,8.86377e-07,1.20986e-09,0.0548736,0.000408592,8.90006e-07,-1.83896e-09,0.0552831,0.000410367,8.84489e-07,2.42071e-09,0.0556944,0.000412143,8.91751e-07,-3.93413e-10,0.0561074,0.000413925,8.90571e-07,-8.46967e-10,0.0565222,0.000415704,8.8803e-07,3.78122e-09,0.0569388,0.000417491,8.99374e-07,-3.1021e-09,0.0573572,0.000419281,8.90068e-07,1.17658e-09,0.0577774,0.000421064,8.93597e-07,2.12117e-09,0.0581993,0.000422858,8.99961e-07,-2.21068e-09,0.0586231,0.000424651,8.93329e-07,2.9961e-09,0.0590486,0.000426447,9.02317e-07,-2.32311e-09,0.059476,0.000428244,8.95348e-07,2.57122e-09,0.0599051,0.000430043,9.03062e-07,-5.11098e-10,0.0603361,0.000431847,9.01528e-07,-5.27166e-10,0.0607688,0.000433649,8.99947e-07,2.61984e-09,0.0612034,0.000435457,9.07806e-07,-2.50141e-09,0.0616397,0.000437265,9.00302e-07,3.66045e-09,0.0620779,0.000439076,9.11283e-07,-4.68977e-09,0.0625179,0.000440885,8.97214e-07,7.64783e-09,0.0629597,0.000442702,9.20158e-07,-7.27499e-09,0.0634033,0.000444521,8.98333e-07,6.55113e-09,0.0638487,0.000446337,9.17986e-07,-4.02844e-09,0.0642959,0.000448161,9.05901e-07,2.11196e-09,0.064745,0.000449979,9.12236e-07,3.03125e-09,0.0651959,0.000451813,9.2133e-07,-6.78648e-09,0.0656486,0.000453635,9.00971e-07,9.21375e-09,0.0661032,0.000455464,9.28612e-07,-7.71684e-09,0.0665596,0.000457299,9.05462e-07,6.7522e-09,0.0670178,0.00045913,9.25718e-07,-4.3907e-09,0.0674778,0.000460968,9.12546e-07,3.36e-09,0.0679397,0.000462803,9.22626e-07,-1.59876e-09,0.0684034,0.000464644,9.1783e-07,3.0351e-09,0.068869,0.000466488,9.26935e-07,-3.09101e-09,0.0693364,0.000468333,9.17662e-07,1.8785e-09,0.0698057,0.000470174,9.23298e-07,3.02733e-09,0.0702768,0.00047203,9.3238e-07,-6.53722e-09,0.0707497,0.000473875,9.12768e-07,8.22054e-09,0.0712245,0.000475725,9.37429e-07,-3.99325e-09,0.0717012,0.000477588,9.2545e-07,3.01839e-10,0.0721797,0.00047944,9.26355e-07,2.78597e-09,0.0726601,0.000481301,9.34713e-07,-3.99507e-09,0.0731423,0.000483158,9.22728e-07,5.7435e-09,0.0736264,0.000485021,9.39958e-07,-4.07776e-09,0.0741123,0.000486888,9.27725e-07,3.11695e-09,0.0746002,0.000488753,9.37076e-07,-9.39394e-10,0.0750898,0.000490625,9.34258e-07,6.4055e-10,0.0755814,0.000492495,9.3618e-07,-1.62265e-09,0.0760748,0.000494363,9.31312e-07,5.84995e-09,0.0765701,0.000496243,9.48861e-07,-6.87601e-09,0.0770673,0.00049812,9.28233e-07,6.75296e-09,0.0775664,0.000499997,9.48492e-07,-5.23467e-09,0.0780673,0.000501878,9.32788e-07,6.73523e-09,0.0785701,0.000503764,9.52994e-07,-6.80514e-09,0.0790748,0.000505649,9.32578e-07,5.5842e-09,0.0795814,0.000507531,9.49331e-07,-6.30583e-10,0.0800899,0.000509428,9.47439e-07,-3.0618e-09,0.0806003,0.000511314,9.38254e-07,5.4273e-09,0.0811125,0.000513206,9.54536e-07,-3.74627e-09,0.0816267,0.000515104,9.43297e-07,2.10713e-09,0.0821427,0.000516997,9.49618e-07,2.76839e-09,0.0826607,0.000518905,9.57924e-07,-5.73006e-09,0.0831805,0.000520803,9.40733e-07,5.25072e-09,0.0837023,0.0005227,9.56486e-07,-3.71718e-10,0.084226,0.000524612,9.5537e-07,-3.76404e-09,0.0847515,0.000526512,9.44078e-07,7.97735e-09,0.085279,0.000528424,9.6801e-07,-5.79367e-09,0.0858084,0.000530343,9.50629e-07,2.96268e-10,0.0863397,0.000532245,9.51518e-07,4.6086e-09,0.0868729,0.000534162,9.65344e-07,-3.82947e-09,0.087408,0.000536081,9.53856e-07,3.25861e-09,0.087945,0.000537998,9.63631e-07,-1.7543e-09,0.088484,0.00053992,9.58368e-07,3.75849e-09,0.0890249,0.000541848,9.69644e-07,-5.82891e-09,0.0895677,0.00054377,9.52157e-07,4.65593e-09,0.0901124,0.000545688,9.66125e-07,2.10643e-09,0.0906591,0.000547627,9.72444e-07,-5.63099e-09,0.0912077,0.000549555,9.55551e-07,5.51627e-09,0.0917582,0.000551483,9.721e-07,-1.53292e-09,0.0923106,0.000553422,9.67501e-07,6.15311e-10,0.092865,0.000555359,9.69347e-07,-9.28291e-10,0.0934213,0.000557295,9.66562e-07,3.09774e-09,0.0939796,0.000559237,9.75856e-07,-4.01186e-09,0.0945398,0.000561177,9.6382e-07,5.49892e-09,0.095102,0.000563121,9.80317e-07,-3.08258e-09,0.0956661,0.000565073,9.71069e-07,-6.19176e-10,0.0962321,0.000567013,9.69212e-07,5.55932e-09,0.0968001,0.000568968,9.8589e-07,-6.71704e-09,0.09737,0.00057092,9.65738e-07,6.40762e-09,0.0979419,0.00057287,9.84961e-07,-4.0122e-09,0.0985158,0.000574828,9.72925e-07,2.19059e-09,0.0990916,0.000576781,9.79496e-07,2.70048e-09,0.0996693,0.000578748,9.87598e-07,-5.54193e-09,0.100249,0.000580706,9.70972e-07,4.56597e-09,0.100831,0.000582662,9.8467e-07,2.17923e-09,0.101414,0.000584638,9.91208e-07,-5.83232e-09,0.102,0.000586603,9.73711e-07,6.24884e-09,0.102588,0.000588569,9.92457e-07,-4.26178e-09,0.103177,0.000590541,9.79672e-07,3.34781e-09,0.103769,0.00059251,9.89715e-07,-1.67904e-09,0.104362,0.000594485,9.84678e-07,3.36839e-09,0.104958,0.000596464,9.94783e-07,-4.34397e-09,0.105555,0.000598441,9.81751e-07,6.55696e-09,0.106155,0.000600424,1.00142e-06,-6.98272e-09,0.106756,0.000602406,9.80474e-07,6.4728e-09,0.107359,0.000604386,9.99893e-07,-4.00742e-09,0.107965,0.000606374,9.8787e-07,2.10654e-09,0.108572,0.000608356,9.9419e-07,3.0318e-09,0.109181,0.000610353,1.00329e-06,-6.7832e-09,0.109793,0.00061234,9.82936e-07,9.1998e-09,0.110406,0.000614333,1.01054e-06,-7.6642e-09,0.111021,0.000616331,9.87543e-07,6.55579e-09,0.111639,0.000618326,1.00721e-06,-3.65791e-09,0.112258,0.000620329,9.96236e-07,6.25467e-10,0.112879,0.000622324,9.98113e-07,1.15593e-09,0.113503,0.000624323,1.00158e-06,2.20158e-09,0.114128,0.000626333,1.00819e-06,-2.51191e-09,0.114755,0.000628342,1.00065e-06,3.95517e-10,0.115385,0.000630345,1.00184e-06,9.29807e-10,0.116016,0.000632351,1.00463e-06,3.33599e-09,0.116649,0.00063437,1.01463e-06,-6.82329e-09,0.117285,0.000636379,9.94163e-07,9.05595e-09,0.117922,0.000638395,1.02133e-06,-7.04862e-09,0.118562,0.000640416,1.00019e-06,4.23737e-09,0.119203,0.000642429,1.0129e-06,-2.45033e-09,0.119847,0.000644448,1.00555e-06,5.56395e-09,0.120492,0.000646475,1.02224e-06,-4.9043e-09,0.121139,0.000648505,1.00753e-06,-8.47952e-10,0.121789,0.000650518,1.00498e-06,8.29622e-09,0.122441,0.000652553,1.02987e-06,-9.98538e-09,0.123094,0.000654582,9.99914e-07,9.2936e-09,0.12375,0.00065661,1.02779e-06,-4.83707e-09,0.124407,0.000658651,1.01328e-06,2.60411e-09,0.125067,0.000660685,1.0211e-06,-5.57945e-09,0.125729,0.000662711,1.00436e-06,1.22631e-08,0.126392,0.000664756,1.04115e-06,-1.36704e-08,0.127058,0.000666798,1.00014e-06,1.26161e-08,0.127726,0.000668836,1.03798e-06,-6.99155e-09,0.128396,0.000670891,1.01701e-06,4.48836e-10,0.129068,0.000672926,1.01836e-06,5.19606e-09,0.129742,0.000674978,1.03394e-06,-6.3319e-09,0.130418,0.000677027,1.01495e-06,5.2305e-09,0.131096,0.000679073,1.03064e-06,3.11123e-10,0.131776,0.000681135,1.03157e-06,-6.47511e-09,0.132458,0.000683179,1.01215e-06,1.06882e-08,0.133142,0.000685235,1.04421e-06,-6.47519e-09,0.133829,0.000687304,1.02479e-06,3.11237e-10,0.134517,0.000689355,1.02572e-06,5.23035e-09,0.135207,0.000691422,1.04141e-06,-6.3316e-09,0.1359,0.000693486,1.02242e-06,5.19484e-09,0.136594,0.000695546,1.038e-06,4.53497e-10,0.137291,0.000697623,1.03936e-06,-7.00891e-09,0.137989,0.000699681,1.01834e-06,1.2681e-08,0.13869,0.000701756,1.05638e-06,-1.39128e-08,0.139393,0.000703827,1.01464e-06,1.31679e-08,0.140098,0.000705896,1.05414e-06,-8.95659e-09,0.140805,0.000707977,1.02727e-06,7.75742e-09,0.141514,0.000710055,1.05055e-06,-7.17182e-09,0.142225,0.000712135,1.02903e-06,6.02862e-09,0.142938,0.000714211,1.04712e-06,-2.04163e-09,0.143653,0.000716299,1.04099e-06,2.13792e-09,0.144371,0.000718387,1.04741e-06,-6.51009e-09,0.14509,0.000720462,1.02787e-06,9.00123e-09,0.145812,0.000722545,1.05488e-06,3.07523e-10,0.146535,0.000724656,1.0558e-06,-1.02312e-08,0.147261,0.000726737,1.02511e-06,1.0815e-08,0.147989,0.000728819,1.05755e-06,-3.22681e-09,0.148719,0.000730925,1.04787e-06,2.09244e-09,0.14945,0.000733027,1.05415e-06,-5.143e-09,0.150185,0.00073512,1.03872e-06,3.57844e-09,0.150921,0.000737208,1.04946e-06,5.73027e-09,0.151659,0.000739324,1.06665e-06,-1.15983e-08,0.152399,0.000741423,1.03185e-06,1.08605e-08,0.153142,0.000743519,1.06443e-06,-2.04106e-09,0.153886,0.000745642,1.05831e-06,-2.69642e-09,0.154633,0.00074775,1.05022e-06,-2.07425e-09,0.155382,0.000749844,1.044e-06,1.09934e-08,0.156133,0.000751965,1.07698e-06,-1.20972e-08,0.156886,0.000754083,1.04069e-06,7.59288e-09,0.157641,0.000756187,1.06347e-06,-3.37305e-09,0.158398,0.000758304,1.05335e-06,5.89921e-09,0.159158,0.000760428,1.07104e-06,-5.32248e-09,0.159919,0.000762554,1.05508e-06,4.8927e-10,0.160683,0.000764666,1.05654e-06,3.36547e-09,0.161448,0.000766789,1.06664e-06,9.50081e-10,0.162216,0.000768925,1.06949e-06,-7.16568e-09,0.162986,0.000771043,1.04799e-06,1.28114e-08,0.163758,0.000773177,1.08643e-06,-1.42774e-08,0.164533,0.000775307,1.0436e-06,1.44956e-08,0.165309,0.000777438,1.08708e-06,-1.39025e-08,0.166087,0.00077957,1.04538e-06,1.13118e-08,0.166868,0.000781695,1.07931e-06,-1.54224e-09,0.167651,0.000783849,1.07468e-06,-5.14312e-09,0.168436,0.000785983,1.05925e-06,7.21381e-09,0.169223,0.000788123,1.0809e-06,-8.81096e-09,0.170012,0.000790259,1.05446e-06,1.31289e-08,0.170803,0.000792407,1.09385e-06,-1.39022e-08,0.171597,0.000794553,1.05214e-06,1.26775e-08,0.172392,0.000796695,1.09018e-06,-7.00557e-09,0.17319,0.000798855,1.06916e-06,4.43796e-10,0.17399,0.000800994,1.07049e-06,5.23031e-09,0.174792,0.000803151,1.08618e-06,-6.46397e-09,0.175596,0.000805304,1.06679e-06,5.72444e-09,0.176403,0.000807455,1.08396e-06,-1.53254e-09,0.177211,0.000809618,1.07937e-06,4.05673e-10,0.178022,0.000811778,1.08058e-06,-9.01916e-11,0.178835,0.000813939,1.08031e-06,-4.49821e-11,0.17965,0.000816099,1.08018e-06,2.70234e-10,0.180467,0.00081826,1.08099e-06,-1.03603e-09,0.181286,0.000820419,1.07788e-06,3.87392e-09,0.182108,0.000822587,1.0895e-06,4.41522e-10,0.182932,0.000824767,1.09083e-06,-5.63997e-09,0.183758,0.000826932,1.07391e-06,7.21707e-09,0.184586,0.000829101,1.09556e-06,-8.32718e-09,0.185416,0.000831267,1.07058e-06,1.11907e-08,0.186248,0.000833442,1.10415e-06,-6.63336e-09,0.187083,0.00083563,1.08425e-06,4.41484e-10,0.187919,0.0008378,1.08557e-06,4.86754e-09,0.188758,0.000839986,1.10017e-06,-5.01041e-09,0.189599,0.000842171,1.08514e-06,2.72811e-10,0.190443,0.000844342,1.08596e-06,3.91916e-09,0.191288,0.000846526,1.09772e-06,-1.04819e-09,0.192136,0.000848718,1.09457e-06,2.73531e-10,0.192985,0.000850908,1.0954e-06,-4.58916e-11,0.193837,0.000853099,1.09526e-06,-9.01158e-11,0.194692,0.000855289,1.09499e-06,4.06506e-10,0.195548,0.00085748,1.09621e-06,-1.53595e-09,0.196407,0.000859668,1.0916e-06,5.73717e-09,0.197267,0.000861869,1.10881e-06,-6.51164e-09,0.19813,0.000864067,1.08928e-06,5.40831e-09,0.198995,0.000866261,1.1055e-06,-2.20401e-10,0.199863,0.000868472,1.10484e-06,-4.52652e-09,0.200732,0.000870668,1.09126e-06,3.42508e-09,0.201604,0.000872861,1.10153e-06,5.72762e-09,0.202478,0.000875081,1.11872e-06,-1.14344e-08,0.203354,0.000877284,1.08441e-06,1.02076e-08,0.204233,0.000879484,1.11504e-06,4.06355e-10,0.205113,0.000881715,1.11626e-06,-1.18329e-08,0.205996,0.000883912,1.08076e-06,1.71227e-08,0.206881,0.000886125,1.13213e-06,-1.19546e-08,0.207768,0.000888353,1.09626e-06,8.93465e-10,0.208658,0.000890548,1.09894e-06,8.38062e-09,0.209549,0.000892771,1.12408e-06,-4.61353e-09,0.210443,0.000895006,1.11024e-06,-4.82756e-09,0.211339,0.000897212,1.09576e-06,9.02245e-09,0.212238,0.00089943,1.12283e-06,-1.45997e-09,0.213138,0.000901672,1.11845e-06,-3.18255e-09,0.214041,0.000903899,1.1089e-06,-7.11073e-10,0.214946,0.000906115,1.10677e-06,6.02692e-09,0.215853,0.000908346,1.12485e-06,-8.49548e-09,0.216763,0.00091057,1.09936e-06,1.30537e-08,0.217675,0.000912808,1.13852e-06,-1.3917e-08,0.218588,0.000915044,1.09677e-06,1.28121e-08,0.219505,0.000917276,1.13521e-06,-7.5288e-09,0.220423,0.000919523,1.11262e-06,2.40205e-09,0.221344,0.000921756,1.11983e-06,-2.07941e-09,0.222267,0.000923989,1.11359e-06,5.91551e-09,0.223192,0.000926234,1.13134e-06,-6.68149e-09,0.224119,0.000928477,1.11129e-06,5.90929e-09,0.225049,0.000930717,1.12902e-06,-2.05436e-09,0.22598,0.000932969,1.12286e-06,2.30807e-09,0.226915,0.000935222,1.12978e-06,-7.17796e-09,0.227851,0.00093746,1.10825e-06,1.15028e-08,0.228789,0.000939711,1.14276e-06,-9.03083e-09,0.22973,0.000941969,1.11566e-06,9.71932e-09,0.230673,0.00094423,1.14482e-06,-1.49452e-08,0.231619,0.000946474,1.09998e-06,2.02591e-08,0.232566,0.000948735,1.16076e-06,-2.13879e-08,0.233516,0.000950993,1.0966e-06,2.05888e-08,0.234468,0.000953247,1.15837e-06,-1.62642e-08,0.235423,0.000955515,1.10957e-06,1.46658e-08,0.236379,0.000957779,1.15357e-06,-1.25966e-08,0.237338,0.000960048,1.11578e-06,5.91793e-09,0.238299,0.000962297,1.13353e-06,3.82602e-09,0.239263,0.000964576,1.14501e-06,-6.3208e-09,0.240229,0.000966847,1.12605e-06,6.55613e-09,0.241197,0.000969119,1.14572e-06,-5.00268e-09,0.242167,0.000971395,1.13071e-06,-1.44659e-09,0.243139,0.000973652,1.12637e-06,1.07891e-08,0.244114,0.000975937,1.15874e-06,-1.19073e-08,0.245091,0.000978219,1.12302e-06,7.03782e-09,0.246071,0.000980486,1.14413e-06,-1.34276e-09,0.247052,0.00098277,1.1401e-06,-1.66669e-09,0.248036,0.000985046,1.1351e-06,8.00935e-09,0.249022,0.00098734,1.15913e-06,-1.54694e-08,0.250011,0.000989612,1.11272e-06,2.4066e-08,0.251002,0.000991909,1.18492e-06,-2.11901e-08,0.251995,0.000994215,1.12135e-06,1.08973e-09,0.25299,0.000996461,1.12462e-06,1.68311e-08,0.253988,0.000998761,1.17511e-06,-8.8094e-09,0.254987,0.00100109,1.14868e-06,-1.13958e-08,0.25599,0.00100335,1.1145e-06,2.45902e-08,0.256994,0.00100565,1.18827e-06,-2.73603e-08,0.258001,0.00100795,1.10618e-06,2.52464e-08,0.25901,0.00101023,1.18192e-06,-1.40207e-08,0.260021,0.00101256,1.13986e-06,1.03387e-09,0.261035,0.00101484,1.14296e-06,9.8853e-09,0.262051,0.00101715,1.17262e-06,-1.07726e-08,0.263069,0.00101947,1.1403e-06,3.40272e-09,0.26409,0.00102176,1.15051e-06,-2.83827e-09,0.265113,0.00102405,1.142e-06,7.95039e-09,0.266138,0.00102636,1.16585e-06,8.39047e-10,0.267166,0.00102869,1.16836e-06,-1.13066e-08,0.268196,0.00103099,1.13444e-06,1.4585e-08,0.269228,0.00103331,1.1782e-06,-1.72314e-08,0.270262,0.00103561,1.1265e-06,2.45382e-08,0.271299,0.00103794,1.20012e-06,-2.13166e-08,0.272338,0.00104028,1.13617e-06,1.12364e-09,0.273379,0.00104255,1.13954e-06,1.68221e-08,0.274423,0.00104488,1.19001e-06,-8.80736e-09,0.275469,0.00104723,1.16358e-06,-1.13948e-08,0.276518,0.00104953,1.1294e-06,2.45839e-08,0.277568,0.00105186,1.20315e-06,-2.73361e-08,0.278621,0.00105418,1.12114e-06,2.51559e-08,0.279677,0.0010565,1.19661e-06,-1.36832e-08,0.280734,0.00105885,1.15556e-06,-2.25706e-10,0.281794,0.00106116,1.15488e-06,1.45862e-08,0.282857,0.00106352,1.19864e-06,-2.83167e-08,0.283921,0.00106583,1.11369e-06,3.90759e-08,0.284988,0.00106817,1.23092e-06,-3.85801e-08,0.286058,0.00107052,1.11518e-06,2.58375e-08,0.287129,0.00107283,1.19269e-06,-5.16498e-09,0.288203,0.0010752,1.1772e-06,-5.17768e-09,0.28928,0.00107754,1.16167e-06,-3.92671e-09,0.290358,0.00107985,1.14988e-06,2.08846e-08,0.29144,0.00108221,1.21254e-06,-2.00072e-08,0.292523,0.00108458,1.15252e-06,-4.60659e-10,0.293609,0.00108688,1.15114e-06,2.18499e-08,0.294697,0.00108925,1.21669e-06,-2.73343e-08,0.295787,0.0010916,1.13468e-06,2.78826e-08,0.29688,0.00109395,1.21833e-06,-2.45915e-08,0.297975,0.00109632,1.14456e-06,1.08787e-08,0.299073,0.00109864,1.17719e-06,1.08788e-08,0.300172,0.00110102,1.20983e-06,-2.45915e-08,0.301275,0.00110337,1.13605e-06,2.78828e-08,0.302379,0.00110573,1.2197e-06,-2.73348e-08,0.303486,0.00110808,1.1377e-06,2.18518e-08,0.304595,0.00111042,1.20325e-06,-4.67556e-10,0.305707,0.00111283,1.20185e-06,-1.99816e-08,0.306821,0.00111517,1.14191e-06,2.07891e-08,0.307937,0.00111752,1.20427e-06,-3.57026e-09,0.309056,0.00111992,1.19356e-06,-6.50797e-09,0.310177,0.00112228,1.17404e-06,-2.00165e-10,0.3113,0.00112463,1.17344e-06,7.30874e-09,0.312426,0.001127,1.19536e-06,7.67424e-10,0.313554,0.00112939,1.19767e-06,-1.03784e-08,0.314685,0.00113176,1.16653e-06,1.09437e-08,0.315818,0.00113412,1.19936e-06,-3.59406e-09,0.316953,0.00113651,1.18858e-06,3.43251e-09,0.318091,0.0011389,1.19888e-06,-1.0136e-08,0.319231,0.00114127,1.16847e-06,7.30915e-09,0.320374,0.00114363,1.1904e-06,1.07018e-08,0.321518,0.00114604,1.2225e-06,-2.03137e-08,0.322666,0.00114842,1.16156e-06,1.09484e-08,0.323815,0.00115078,1.19441e-06,6.32224e-09,0.324967,0.00115319,1.21337e-06,-6.43509e-09,0.326122,0.00115559,1.19407e-06,-1.03842e-08,0.327278,0.00115795,1.16291e-06,1.81697e-08,0.328438,0.00116033,1.21742e-06,-2.6901e-09,0.329599,0.00116276,1.20935e-06,-7.40939e-09,0.330763,0.00116515,1.18713e-06,2.52533e-09,0.331929,0.00116754,1.1947e-06,-2.69191e-09,0.333098,0.00116992,1.18663e-06,8.24218e-09,0.334269,0.00117232,1.21135e-06,-4.74377e-10,0.335443,0.00117474,1.20993e-06,-6.34471e-09,0.336619,0.00117714,1.1909e-06,-3.94922e-09,0.337797,0.00117951,1.17905e-06,2.21417e-08,0.338978,0.00118193,1.24547e-06,-2.50128e-08,0.340161,0.00118435,1.17043e-06,1.8305e-08,0.341346,0.00118674,1.22535e-06,-1.84048e-08,0.342534,0.00118914,1.17013e-06,2.55121e-08,0.343725,0.00119156,1.24667e-06,-2.40389e-08,0.344917,0.00119398,1.17455e-06,1.10389e-08,0.346113,0.00119636,1.20767e-06,9.68574e-09,0.34731,0.0011988,1.23673e-06,-1.99797e-08,0.34851,0.00120122,1.17679e-06,1.06284e-08,0.349713,0.0012036,1.20867e-06,7.26868e-09,0.350917,0.00120604,1.23048e-06,-9.90072e-09,0.352125,0.00120847,1.20078e-06,2.53177e-09,0.353334,0.00121088,1.20837e-06,-2.26199e-10,0.354546,0.0012133,1.20769e-06,-1.62705e-09,0.355761,0.00121571,1.20281e-06,6.73435e-09,0.356978,0.00121813,1.22302e-06,4.49207e-09,0.358197,0.00122059,1.23649e-06,-2.47027e-08,0.359419,0.00122299,1.16238e-06,3.47142e-08,0.360643,0.00122542,1.26653e-06,-2.47472e-08,0.36187,0.00122788,1.19229e-06,4.66965e-09,0.363099,0.00123028,1.20629e-06,6.06872e-09,0.36433,0.00123271,1.2245e-06,8.57729e-10,0.365564,0.00123516,1.22707e-06,-9.49952e-09,0.366801,0.00123759,1.19858e-06,7.33792e-09,0.36804,0.00124001,1.22059e-06,9.95025e-09,0.369281,0.00124248,1.25044e-06,-1.73366e-08,0.370525,0.00124493,1.19843e-06,-2.08464e-10,0.371771,0.00124732,1.1978e-06,1.81704e-08,0.373019,0.00124977,1.25232e-06,-1.28683e-08,0.37427,0.00125224,1.21371e-06,3.50042e-09,0.375524,0.00125468,1.22421e-06,-1.1335e-09,0.37678,0.00125712,1.22081e-06,1.03345e-09,0.378038,0.00125957,1.22391e-06,-3.00023e-09,0.379299,0.00126201,1.21491e-06,1.09676e-08,0.380562,0.00126447,1.24781e-06,-1.10676e-08,0.381828,0.00126693,1.21461e-06,3.50042e-09,0.383096,0.00126937,1.22511e-06,-2.93403e-09,0.384366,0.00127181,1.21631e-06,8.23574e-09,0.385639,0.00127427,1.24102e-06,-2.06607e-10,0.386915,0.00127675,1.2404e-06,-7.40935e-09,0.388193,0.00127921,1.21817e-06,4.1761e-11,0.389473,0.00128165,1.21829e-06,7.24223e-09,0.390756,0.0012841,1.24002e-06,7.91564e-10,0.392042,0.00128659,1.2424e-06,-1.04086e-08,0.393329,0.00128904,1.21117e-06,1.10405e-08,0.39462,0.0012915,1.24429e-06,-3.951e-09,0.395912,0.00129397,1.23244e-06,4.7634e-09,0.397208,0.00129645,1.24673e-06,-1.51025e-08,0.398505,0.0012989,1.20142e-06,2.58443e-08,0.399805,0.00130138,1.27895e-06,-2.86702e-08,0.401108,0.00130385,1.19294e-06,2.92318e-08,0.402413,0.00130632,1.28064e-06,-2.86524e-08,0.403721,0.0013088,1.19468e-06,2.57731e-08,0.405031,0.00131127,1.272e-06,-1.48355e-08,0.406343,0.00131377,1.2275e-06,3.76652e-09,0.407658,0.00131623,1.23879e-06,-2.30784e-10,0.408976,0.00131871,1.2381e-06,-2.84331e-09,0.410296,0.00132118,1.22957e-06,1.16041e-08,0.411618,0.00132367,1.26438e-06,-1.37708e-08,0.412943,0.00132616,1.22307e-06,1.36768e-08,0.41427,0.00132865,1.2641e-06,-1.1134e-08,0.4156,0.00133114,1.2307e-06,1.05714e-09,0.416933,0.00133361,1.23387e-06,6.90538e-09,0.418267,0.00133609,1.25459e-06,1.12372e-09,0.419605,0.00133861,1.25796e-06,-1.14002e-08,0.420945,0.00134109,1.22376e-06,1.46747e-08,0.422287,0.00134358,1.26778e-06,-1.7496e-08,0.423632,0.00134606,1.21529e-06,2.5507e-08,0.424979,0.00134857,1.29182e-06,-2.49272e-08,0.426329,0.00135108,1.21703e-06,1.45972e-08,0.427681,0.00135356,1.26083e-06,-3.65935e-09,0.429036,0.00135607,1.24985e-06,4.00178e-11,0.430393,0.00135857,1.24997e-06,3.49917e-09,0.431753,0.00136108,1.26047e-06,-1.40366e-08,0.433116,0.00136356,1.21836e-06,2.28448e-08,0.43448,0.00136606,1.28689e-06,-1.77378e-08,0.435848,0.00136858,1.23368e-06,1.83043e-08,0.437218,0.0013711,1.28859e-06,-2.56769e-08,0.43859,0.0013736,1.21156e-06,2.47987e-08,0.439965,0.0013761,1.28595e-06,-1.39133e-08,0.441342,0.00137863,1.24421e-06,1.05202e-09,0.442722,0.00138112,1.24737e-06,9.70507e-09,0.444104,0.00138365,1.27649e-06,-1.00698e-08,0.445489,0.00138617,1.24628e-06,7.72123e-10,0.446877,0.00138867,1.24859e-06,6.98132e-09,0.448267,0.00139118,1.26954e-06,1.10477e-09,0.449659,0.00139373,1.27285e-06,-1.14003e-08,0.451054,0.00139624,1.23865e-06,1.4694e-08,0.452452,0.00139876,1.28273e-06,-1.75734e-08,0.453852,0.00140127,1.23001e-06,2.5797e-08,0.455254,0.00140381,1.3074e-06,-2.60097e-08,0.456659,0.00140635,1.22937e-06,1.86371e-08,0.458067,0.00140886,1.28529e-06,-1.8736e-08,0.459477,0.00141137,1.22908e-06,2.65048e-08,0.46089,0.00141391,1.30859e-06,-2.76784e-08,0.462305,0.00141645,1.22556e-06,2.46043e-08,0.463722,0.00141897,1.29937e-06,-1.11341e-08,0.465143,0.00142154,1.26597e-06,-9.87033e-09,0.466565,0.00142404,1.23636e-06,2.08131e-08,0.467991,0.00142657,1.2988e-06,-1.37773e-08,0.469419,0.00142913,1.25746e-06,4.49378e-09,0.470849,0.00143166,1.27094e-06,-4.19781e-09,0.472282,0.00143419,1.25835e-06,1.22975e-08,0.473717,0.00143674,1.29524e-06,-1.51902e-08,0.475155,0.00143929,1.24967e-06,1.86608e-08,0.476596,0.00144184,1.30566e-06,-2.96506e-08,0.478039,0.00144436,1.2167e-06,4.03368e-08,0.479485,0.00144692,1.33771e-06,-4.22896e-08,0.480933,0.00144947,1.21085e-06,3.94148e-08,0.482384,0.00145201,1.32909e-06,-2.59626e-08,0.483837,0.00145459,1.2512e-06,4.83124e-09,0.485293,0.0014571,1.2657e-06,6.63757e-09,0.486751,0.00145966,1.28561e-06,-1.57911e-09,0.488212,0.00146222,1.28087e-06,-3.21468e-10,0.489676,0.00146478,1.27991e-06,2.86517e-09,0.491142,0.00146735,1.2885e-06,-1.11392e-08,0.49261,0.00146989,1.25508e-06,1.18893e-08,0.494081,0.00147244,1.29075e-06,-6.61574e-09,0.495555,0.001475,1.27091e-06,1.45736e-08,0.497031,0.00147759,1.31463e-06,-2.18759e-08,0.49851,0.00148015,1.249e-06,1.33252e-08,0.499992,0.00148269,1.28897e-06,-1.62277e-09,0.501476,0.00148526,1.28411e-06,-6.83421e-09,0.502962,0.00148781,1.2636e-06,2.89596e-08,0.504451,0.00149042,1.35048e-06,-4.93997e-08,0.505943,0.00149298,1.20228e-06,4.94299e-08,0.507437,0.00149553,1.35057e-06,-2.91107e-08,0.508934,0.00149814,1.26324e-06,7.40848e-09,0.510434,0.00150069,1.28547e-06,-5.23187e-10,0.511936,0.00150326,1.2839e-06,-5.31585e-09,0.51344,0.00150581,1.26795e-06,2.17866e-08,0.514947,0.00150841,1.33331e-06,-2.22257e-08,0.516457,0.00151101,1.26663e-06,7.51178e-09,0.517969,0.00151357,1.28917e-06,-7.82128e-09,0.519484,0.00151613,1.2657e-06,2.37733e-08,0.521002,0.00151873,1.33702e-06,-2.76674e-08,0.522522,0.00152132,1.25402e-06,2.72917e-08,0.524044,0.00152391,1.3359e-06,-2.18949e-08,0.525569,0.00152652,1.27021e-06,6.83372e-10,0.527097,0.00152906,1.27226e-06,1.91613e-08,0.528628,0.00153166,1.32974e-06,-1.77241e-08,0.53016,0.00153427,1.27657e-06,-7.86963e-09,0.531696,0.0015368,1.25296e-06,4.92027e-08,0.533234,0.00153945,1.40057e-06,-6.9732e-08,0.534775,0.00154204,1.19138e-06,5.09114e-08,0.536318,0.00154458,1.34411e-06,-1.4704e-08,0.537864,0.00154722,1.3e-06,7.9048e-09,0.539413,0.00154984,1.32371e-06,-1.69152e-08,0.540964,0.00155244,1.27297e-06,1.51355e-10,0.542517,0.00155499,1.27342e-06,1.63099e-08,0.544074,0.00155758,1.32235e-06,-5.78647e-09,0.545633,0.00156021,1.30499e-06,6.83599e-09,0.547194,0.00156284,1.3255e-06,-2.15575e-08,0.548758,0.00156543,1.26083e-06,1.97892e-08,0.550325,0.00156801,1.32019e-06,2.00525e-09,0.551894,0.00157065,1.32621e-06,-2.78103e-08,0.553466,0.00157322,1.24278e-06,4.96314e-08,0.555041,0.00157586,1.39167e-06,-5.1506e-08,0.556618,0.00157849,1.23716e-06,3.71835e-08,0.558198,0.00158107,1.34871e-06,-3.76233e-08,0.55978,0.00158366,1.23584e-06,5.37052e-08,0.561365,0.00158629,1.39695e-06,-5.79884e-08,0.562953,0.00158891,1.22299e-06,5.90392e-08,0.564543,0.00159153,1.4001e-06,-5.89592e-08,0.566136,0.00159416,1.22323e-06,5.7588e-08,0.567731,0.00159678,1.39599e-06,-5.21835e-08,0.569329,0.00159941,1.23944e-06,3.19369e-08,0.57093,0.00160199,1.33525e-06,-1.59594e-08,0.572533,0.00160461,1.28737e-06,3.19006e-08,0.574139,0.00160728,1.38307e-06,-5.20383e-08,0.575748,0.00160989,1.22696e-06,5.70431e-08,0.577359,0.00161251,1.39809e-06,-5.69247e-08,0.578973,0.00161514,1.22731e-06,5.14463e-08,0.580589,0.00161775,1.38165e-06,-2.9651e-08,0.582208,0.00162042,1.2927e-06,7.55339e-09,0.58383,0.00162303,1.31536e-06,-5.62636e-10,0.585455,0.00162566,1.31367e-06,-5.30281e-09,0.587081,0.00162827,1.29776e-06,2.17738e-08,0.588711,0.00163093,1.36309e-06,-2.21875e-08,0.590343,0.00163359,1.29652e-06,7.37164e-09,0.591978,0.00163621,1.31864e-06,-7.29907e-09,0.593616,0.00163882,1.29674e-06,2.18247e-08,0.595256,0.00164148,1.36221e-06,-2.03952e-08,0.596899,0.00164414,1.30103e-06,1.51241e-10,0.598544,0.00164675,1.30148e-06,1.97902e-08,0.600192,0.00164941,1.36085e-06,-1.97074e-08,0.601843,0.00165207,1.30173e-06,-5.65175e-10,0.603496,0.00165467,1.30004e-06,2.1968e-08,0.605152,0.00165734,1.36594e-06,-2.77024e-08,0.606811,0.00165999,1.28283e-06,2.92369e-08,0.608472,0.00166264,1.37054e-06,-2.96407e-08,0.610136,0.00166529,1.28162e-06,2.97215e-08,0.611803,0.00166795,1.37079e-06,-2.96408e-08,0.613472,0.0016706,1.28186e-06,2.92371e-08,0.615144,0.00167325,1.36957e-06,-2.77031e-08,0.616819,0.00167591,1.28647e-06,2.19708e-08,0.618496,0.00167855,1.35238e-06,-5.75407e-10,0.620176,0.00168125,1.35065e-06,-1.9669e-08,0.621858,0.00168389,1.29164e-06,1.96468e-08,0.623544,0.00168653,1.35058e-06,6.86403e-10,0.625232,0.00168924,1.35264e-06,-2.23924e-08,0.626922,0.00169187,1.28547e-06,2.92788e-08,0.628615,0.00169453,1.3733e-06,-3.51181e-08,0.630311,0.00169717,1.26795e-06,5.15889e-08,0.63201,0.00169987,1.42272e-06,-5.2028e-08,0.633711,0.00170255,1.26663e-06,3.73139e-08,0.635415,0.0017052,1.37857e-06,-3.76227e-08,0.637121,0.00170784,1.2657e-06,5.35722e-08,0.63883,0.00171054,1.42642e-06,-5.74567e-08,0.640542,0.00171322,1.25405e-06,5.70456e-08,0.642257,0.0017159,1.42519e-06,-5.15163e-08,0.643974,0.00171859,1.27064e-06,2.98103e-08,0.645694,0.00172122,1.36007e-06,-8.12016e-09,0.647417,0.00172392,1.33571e-06,2.67039e-09,0.649142,0.0017266,1.34372e-06,-2.56152e-09,0.65087,0.00172928,1.33604e-06,7.57571e-09,0.6526,0.00173197,1.35876e-06,-2.77413e-08,0.654334,0.00173461,1.27554e-06,4.3785e-08,0.65607,0.00173729,1.40689e-06,-2.81896e-08,0.657808,0.00174002,1.32233e-06,9.36893e-09,0.65955,0.00174269,1.35043e-06,-9.28617e-09,0.661294,0.00174536,1.32257e-06,2.77757e-08,0.66304,0.00174809,1.4059e-06,-4.2212e-08,0.66479,0.00175078,1.27926e-06,2.1863e-08,0.666542,0.0017534,1.34485e-06,1.43648e-08,0.668297,0.00175613,1.38795e-06,-1.97177e-08,0.670054,0.00175885,1.3288e-06,4.90115e-09,0.671814,0.00176152,1.3435e-06,1.13232e-10,0.673577,0.00176421,1.34384e-06,-5.3542e-09,0.675343,0.00176688,1.32778e-06,2.13035e-08,0.677111,0.0017696,1.39169e-06,-2.02553e-08,0.678882,0.00177232,1.33092e-06,1.13005e-10,0.680656,0.00177499,1.33126e-06,1.98031e-08,0.682432,0.00177771,1.39067e-06,-1.97211e-08,0.684211,0.00178043,1.33151e-06,-5.2349e-10,0.685993,0.00178309,1.32994e-06,2.18151e-08,0.687777,0.00178582,1.39538e-06,-2.71325e-08,0.689564,0.00178853,1.31398e-06,2.71101e-08,0.691354,0.00179124,1.39531e-06,-2.17035e-08,0.693147,0.00179396,1.3302e-06,9.92865e-11,0.694942,0.00179662,1.3305e-06,2.13063e-08,0.69674,0.00179935,1.39442e-06,-2.57198e-08,0.698541,0.00180206,1.31726e-06,2.19682e-08,0.700344,0.00180476,1.38317e-06,-2.54852e-09,0.70215,0.00180752,1.37552e-06,-1.17741e-08,0.703959,0.00181023,1.3402e-06,-9.95999e-09,0.705771,0.00181288,1.31032e-06,5.16141e-08,0.707585,0.00181566,1.46516e-06,-7.72869e-08,0.709402,0.00181836,1.2333e-06,7.87197e-08,0.711222,0.00182106,1.46946e-06,-5.87781e-08,0.713044,0.00182382,1.29312e-06,3.71834e-08,0.714869,0.00182652,1.40467e-06,-3.03511e-08,0.716697,0.00182924,1.31362e-06,2.46161e-08,0.718528,0.00183194,1.38747e-06,-8.5087e-09,0.720361,0.00183469,1.36194e-06,9.41892e-09,0.722197,0.00183744,1.3902e-06,-2.91671e-08,0.724036,0.00184014,1.3027e-06,4.76448e-08,0.725878,0.00184288,1.44563e-06,-4.22028e-08,0.727722,0.00184565,1.31902e-06,1.95682e-09,0.729569,0.00184829,1.3249e-06,3.43754e-08,0.731419,0.00185104,1.42802e-06,-2.0249e-08,0.733271,0.00185384,1.36727e-06,-1.29838e-08,0.735126,0.00185654,1.32832e-06,1.25794e-08,0.736984,0.00185923,1.36606e-06,2.22711e-08,0.738845,0.00186203,1.43287e-06,-4.20594e-08,0.740708,0.00186477,1.3067e-06,2.67571e-08,0.742574,0.00186746,1.38697e-06,-5.36424e-09,0.744443,0.00187022,1.37087e-06,-5.30023e-09,0.746315,0.00187295,1.35497e-06,2.65653e-08,0.748189,0.00187574,1.43467e-06,-4.13564e-08,0.750066,0.00187848,1.3106e-06,1.9651e-08,0.751946,0.00188116,1.36955e-06,2.23572e-08,0.753828,0.00188397,1.43663e-06,-4.9475e-08,0.755714,0.00188669,1.2882e-06,5.63335e-08,0.757602,0.00188944,1.4572e-06,-5.66499e-08,0.759493,0.00189218,1.28725e-06,5.10567e-08,0.761386,0.00189491,1.44042e-06,-2.83677e-08,0.763283,0.00189771,1.35532e-06,2.80962e-09,0.765182,0.00190042,1.36375e-06,1.71293e-08,0.767083,0.0019032,1.41513e-06,-1.17221e-08,0.768988,0.001906,1.37997e-06,-2.98453e-08,0.770895,0.00190867,1.29043e-06,7.14987e-08,0.772805,0.00191146,1.50493e-06,-7.73354e-08,0.774718,0.00191424,1.27292e-06,5.90292e-08,0.776634,0.00191697,1.45001e-06,-3.9572e-08,0.778552,0.00191975,1.33129e-06,3.9654e-08,0.780473,0.00192253,1.45026e-06,-5.94395e-08,0.782397,0.00192525,1.27194e-06,7.88945e-08,0.784324,0.00192803,1.50862e-06,-7.73249e-08,0.786253,0.00193082,1.27665e-06,5.15913e-08,0.788185,0.00193352,1.43142e-06,-9.83099e-09,0.79012,0.00193636,1.40193e-06,-1.22672e-08,0.792058,0.00193912,1.36513e-06,-7.05275e-10,0.793999,0.00194185,1.36301e-06,1.50883e-08,0.795942,0.00194462,1.40828e-06,-4.33147e-11,0.797888,0.00194744,1.40815e-06,-1.49151e-08,0.799837,0.00195021,1.3634e-06,9.93244e-11,0.801788,0.00195294,1.3637e-06,1.45179e-08,0.803743,0.00195571,1.40725e-06,1.43363e-09,0.8057,0.00195853,1.41155e-06,-2.02525e-08,0.80766,0.00196129,1.35079e-06,1.99718e-08,0.809622,0.00196405,1.41071e-06,-3.01649e-11,0.811588,0.00196687,1.41062e-06,-1.9851e-08,0.813556,0.00196964,1.35107e-06,1.98296e-08,0.815527,0.0019724,1.41056e-06,1.37485e-10,0.817501,0.00197522,1.41097e-06,-2.03796e-08,0.819477,0.00197798,1.34983e-06,2.17763e-08,0.821457,0.00198074,1.41516e-06,-7.12085e-09,0.823439,0.00198355,1.3938e-06,6.70707e-09,0.825424,0.00198636,1.41392e-06,-1.97074e-08,0.827412,0.00198913,1.35479e-06,1.25179e-08,0.829402,0.00199188,1.39235e-06,2.92405e-08,0.831396,0.00199475,1.48007e-06,-6.98755e-08,0.833392,0.0019975,1.27044e-06,7.14477e-08,0.835391,0.00200026,1.48479e-06,-3.71014e-08,0.837392,0.00200311,1.37348e-06,1.73533e-08,0.839397,0.00200591,1.42554e-06,-3.23118e-08,0.841404,0.00200867,1.32861e-06,5.2289e-08,0.843414,0.00201148,1.48547e-06,-5.76348e-08,0.845427,0.00201428,1.31257e-06,5.9041e-08,0.847443,0.00201708,1.48969e-06,-5.93197e-08,0.849461,0.00201988,1.31173e-06,5.90289e-08,0.851482,0.00202268,1.48882e-06,-5.75864e-08,0.853507,0.00202549,1.31606e-06,5.21075e-08,0.855533,0.00202828,1.47238e-06,-3.16344e-08,0.857563,0.00203113,1.37748e-06,1.48257e-08,0.859596,0.00203393,1.42196e-06,-2.76684e-08,0.861631,0.00203669,1.33895e-06,3.62433e-08,0.863669,0.00203947,1.44768e-06,1.90463e-09,0.86571,0.00204237,1.45339e-06,-4.38617e-08,0.867754,0.00204515,1.32181e-06,5.43328e-08,0.8698,0.00204796,1.48481e-06,-5.42603e-08,0.87185,0.00205076,1.32203e-06,4.34989e-08,0.873902,0.00205354,1.45252e-06,-5.26029e-10,0.875957,0.00205644,1.45095e-06,-4.13949e-08,0.878015,0.00205922,1.32676e-06,4.68962e-08,0.880075,0.00206201,1.46745e-06,-2.69807e-08,0.882139,0.00206487,1.38651e-06,1.42181e-09,0.884205,0.00206764,1.39077e-06,2.12935e-08,0.886274,0.00207049,1.45465e-06,-2.69912e-08,0.888346,0.00207332,1.37368e-06,2.70664e-08,0.890421,0.00207615,1.45488e-06,-2.16698e-08,0.892498,0.00207899,1.38987e-06,8.14756e-12,0.894579,0.00208177,1.38989e-06,2.16371e-08,0.896662,0.00208462,1.45481e-06,-2.6952e-08,0.898748,0.00208744,1.37395e-06,2.65663e-08,0.900837,0.00209027,1.45365e-06,-1.97084e-08,0.902928,0.00209312,1.39452e-06,-7.33731e-09,0.905023,0.00209589,1.37251e-06,4.90578e-08,0.90712,0.00209878,1.51968e-06,-6.96845e-08,0.90922,0.00210161,1.31063e-06,5.08664e-08,0.911323,0.00210438,1.46323e-06,-1.45717e-08,0.913429,0.00210727,1.41952e-06,7.42038e-09,0.915538,0.00211013,1.44178e-06,-1.51097e-08,0.917649,0.00211297,1.39645e-06,-6.58618e-09,0.919764,0.00211574,1.37669e-06,4.14545e-08,0.921881,0.00211862,1.50105e-06,-4.00222e-08,0.924001,0.0021215,1.38099e-06,-5.7518e-10,0.926124,0.00212426,1.37926e-06,4.23229e-08,0.92825,0.00212714,1.50623e-06,-4.9507e-08,0.930378,0.00213001,1.35771e-06,3.64958e-08,0.93251,0.00213283,1.4672e-06,-3.68713e-08,0.934644,0.00213566,1.35658e-06,5.13848e-08,0.936781,0.00213852,1.51074e-06,-4.94585e-08,0.938921,0.0021414,1.36236e-06,2.72399e-08,0.941064,0.0021442,1.44408e-06,1.0372e-10,0.943209,0.00214709,1.44439e-06,-2.76547e-08,0.945358,0.0021499,1.36143e-06,5.09106e-08,0.947509,0.00215277,1.51416e-06,-5.67784e-08,0.949663,0.00215563,1.34382e-06,5.69935e-08,0.95182,0.00215849,1.5148e-06,-5.19861e-08,0.95398,0.00216136,1.35885e-06,3.17417e-08,0.956143,0.00216418,1.45407e-06,-1.53758e-08,0.958309,0.00216704,1.40794e-06,2.97615e-08,0.960477,0.00216994,1.49723e-06,-4.40657e-08,0.962649,0.00217281,1.36503e-06,2.72919e-08,0.964823,0.00217562,1.44691e-06,-5.49729e-09,0.967,0.0021785,1.43041e-06,-5.30273e-09,0.96918,0.00218134,1.41451e-06,2.67084e-08,0.971363,0.00218425,1.49463e-06,-4.19265e-08,0.973548,0.00218711,1.36885e-06,2.17881e-08,0.975737,0.00218992,1.43422e-06,1.43789e-08,0.977928,0.00219283,1.47735e-06,-1.96989e-08,0.980122,0.00219572,1.41826e-06,4.81221e-09,0.98232,0.00219857,1.43269e-06,4.50048e-10,0.98452,0.00220144,1.43404e-06,-6.61237e-09,0.986722,0.00220429,1.41421e-06,2.59993e-08,0.988928,0.0022072,1.4922e-06,-3.77803e-08,0.991137,0.00221007,1.37886e-06,5.9127e-09,0.993348,0.00221284,1.3966e-06,1.33339e-07,0.995563,0.00221604,1.79662e-06,-5.98872e-07,0.99778,0.00222015,0.,0.};
+
+        template <bool srgb, int blueIdx, typename T, typename D>
+        __device__ __forceinline__ void RGB2LabConvert_f(const T& src, D& dst)
+        {
+            const float _1_3 = 1.0f / 3.0f;
+            const float _a = 16.0f / 116.0f;
+
+            float B = blueIdx == 0 ? src.x : src.z;
+            float G = src.y;
+            float R = blueIdx == 0 ? src.z : src.x;
+
+            if (srgb)
+            {
+                B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE);
+                G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE);
+                R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE);
+            }
+
+            float X = B * 0.189828f + G * 0.376219f + R * 0.433953f;
+            float Y = B * 0.072169f + G * 0.715160f + R * 0.212671f;
+            float Z = B * 0.872766f + G * 0.109477f + R * 0.017758f;
+
+            float FX = X > 0.008856f ? ::powf(X, _1_3) : (7.787f * X + _a);
+            float FY = Y > 0.008856f ? ::powf(Y, _1_3) : (7.787f * Y + _a);
+            float FZ = Z > 0.008856f ? ::powf(Z, _1_3) : (7.787f * Z + _a);
+
+            float L = Y > 0.008856f ? (116.f * FY - 16.f) : (903.3f * Y);
+            float a = 500.f * (FX - FY);
+            float b = 200.f * (FY - FZ);
+
+            dst.x = L;
+            dst.y = a;
+            dst.z = b;
+        }
+
+        template <typename T, int scn, int dcn, bool srgb, int blueIdx> struct RGB2Lab;
+        template <int scn, int dcn, bool srgb, int blueIdx>
+        struct RGB2Lab<uchar, scn, dcn, srgb, blueIdx>
+            : unary_function<typename TypeVec<uchar, scn>::vec_type, typename TypeVec<uchar, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<uchar, dcn>::vec_type operator ()(const typename TypeVec<uchar, scn>::vec_type& src) const
+            {
+                typename TypeVec<uchar, dcn>::vec_type dst;
+
+                RGB2LabConvert_b<srgb, blueIdx>(src, dst);
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ RGB2Lab() {}
+            __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {}
+        };
+        template <int scn, int dcn, bool srgb, int blueIdx>
+        struct RGB2Lab<float, scn, dcn, srgb, blueIdx>
+            : unary_function<typename TypeVec<float, scn>::vec_type, typename TypeVec<float, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<float, dcn>::vec_type operator ()(const typename TypeVec<float, scn>::vec_type& src) const
+            {
+                typename TypeVec<float, dcn>::vec_type dst;
+
+                RGB2LabConvert_f<srgb, blueIdx>(src, dst);
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ RGB2Lab() {}
+            __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(name, scn, dcn, srgb, blueIdx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2Lab<T, scn, dcn, srgb, blueIdx> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+    namespace color_detail
+    {
+        __constant__ float c_sRGBInvGammaTab[] = {0,0.0126255,0.,-8.33961e-06,0.0126172,0.0126005,-2.50188e-05,4.1698e-05,0.0252344,0.0126756,0.000100075,-0.000158451,0.0378516,0.0124004,-0.000375277,-0.000207393,0.0496693,0.0110276,-0.000997456,0.00016837,0.0598678,0.00953783,-0.000492346,2.07235e-05,0.068934,0.00861531,-0.000430176,3.62876e-05,0.0771554,0.00786382,-0.000321313,1.87625e-05,0.0847167,0.00727748,-0.000265025,1.53594e-05,0.0917445,0.00679351,-0.000218947,1.10545e-05,0.0983301,0.00638877,-0.000185784,8.66984e-06,0.104542,0.00604322,-0.000159774,6.82996e-06,0.110432,0.00574416,-0.000139284,5.51008e-06,0.116042,0.00548212,-0.000122754,4.52322e-06,0.121406,0.00525018,-0.000109184,3.75557e-06,0.126551,0.00504308,-9.79177e-05,3.17134e-06,0.131499,0.00485676,-8.84037e-05,2.68469e-06,0.13627,0.004688,-8.03496e-05,2.31725e-06,0.14088,0.00453426,-7.33978e-05,2.00868e-06,0.145343,0.00439349,-6.73718e-05,1.74775e-06,0.149671,0.00426399,-6.21286e-05,1.53547e-06,0.153875,0.00414434,-5.75222e-05,1.364e-06,0.157963,0.00403338,-5.34301e-05,1.20416e-06,0.161944,0.00393014,-4.98177e-05,1.09114e-06,0.165825,0.00383377,-4.65443e-05,9.57987e-07,0.169613,0.00374356,-4.36703e-05,8.88359e-07,0.173314,0.00365888,-4.10052e-05,7.7849e-07,0.176933,0.00357921,-3.86697e-05,7.36254e-07,0.180474,0.00350408,-3.6461e-05,6.42534e-07,0.183942,0.00343308,-3.45334e-05,6.12614e-07,0.187342,0.00336586,-3.26955e-05,5.42894e-07,0.190675,0.00330209,-3.10669e-05,5.08967e-07,0.193947,0.00324149,-2.954e-05,4.75977e-07,0.197159,0.00318383,-2.8112e-05,4.18343e-07,0.200315,0.00312887,-2.6857e-05,4.13651e-07,0.203418,0.00307639,-2.5616e-05,3.70847e-07,0.206469,0.00302627,-2.45035e-05,3.3813e-07,0.209471,0.00297828,-2.34891e-05,3.32999e-07,0.212426,0.0029323,-2.24901e-05,2.96826e-07,0.215336,0.00288821,-2.15996e-05,2.82736e-07,0.218203,0.00284586,-2.07514e-05,2.70961e-07,0.221029,0.00280517,-1.99385e-05,2.42744e-07,0.223814,0.00276602,-1.92103e-05,2.33277e-07,0.226561,0.0027283,-1.85105e-05,2.2486e-07,0.229271,0.00269195,-1.78359e-05,2.08383e-07,0.231945,0.00265691,-1.72108e-05,1.93305e-07,0.234585,0.00262307,-1.66308e-05,1.80687e-07,0.237192,0.00259035,-1.60888e-05,1.86632e-07,0.239766,0.00255873,-1.55289e-05,1.60569e-07,0.24231,0.00252815,-1.50472e-05,1.54566e-07,0.244823,0.00249852,-1.45835e-05,1.59939e-07,0.247307,0.00246983,-1.41037e-05,1.29549e-07,0.249763,0.00244202,-1.3715e-05,1.41429e-07,0.252191,0.00241501,-1.32907e-05,1.39198e-07,0.254593,0.00238885,-1.28731e-05,1.06444e-07,0.256969,0.00236342,-1.25538e-05,1.2048e-07,0.25932,0.00233867,-1.21924e-05,1.26892e-07,0.261647,0.00231467,-1.18117e-05,8.72084e-08,0.26395,0.00229131,-1.15501e-05,1.20323e-07,0.26623,0.00226857,-1.11891e-05,8.71514e-08,0.268487,0.00224645,-1.09276e-05,9.73165e-08,0.270723,0.00222489,-1.06357e-05,8.98259e-08,0.272937,0.00220389,-1.03662e-05,7.98218e-08,0.275131,0.00218339,-1.01267e-05,9.75254e-08,0.277304,0.00216343,-9.83416e-06,6.65195e-08,0.279458,0.00214396,-9.63461e-06,8.34313e-08,0.281592,0.00212494,-9.38431e-06,7.65919e-08,0.283708,0.00210641,-9.15454e-06,5.7236e-08,0.285805,0.00208827,-8.98283e-06,8.18939e-08,0.287885,0.00207055,-8.73715e-06,6.2224e-08,0.289946,0.00205326,-8.55047e-06,5.66388e-08,0.291991,0.00203633,-8.38056e-06,6.88491e-08,0.294019,0.00201978,-8.17401e-06,5.53955e-08,0.296031,0.00200359,-8.00782e-06,6.71971e-08,0.298027,0.00198778,-7.80623e-06,3.34439e-08,0.300007,0.00197227,-7.7059e-06,6.7248e-08,0.301971,0.00195706,-7.50416e-06,5.51915e-08,0.303921,0.00194221,-7.33858e-06,3.98124e-08,0.305856,0.00192766,-7.21915e-06,5.37795e-08,0.307776,0.00191338,-7.05781e-06,4.30919e-08,0.309683,0.00189939,-6.92853e-06,4.20744e-08,0.311575,0.00188566,-6.80231e-06,5.68321e-08,0.313454,0.00187223,-6.63181e-06,2.86195e-08,0.31532,0.00185905,-6.54595e-06,3.73075e-08,0.317172,0.00184607,-6.43403e-06,6.05684e-08,0.319012,0.00183338,-6.25233e-06,1.84426e-08,0.320839,0.00182094,-6.197e-06,4.44757e-08,0.322654,0.00180867,-6.06357e-06,4.20729e-08,0.324456,0.00179667,-5.93735e-06,2.56511e-08,0.326247,0.00178488,-5.8604e-06,3.41368e-08,0.328026,0.00177326,-5.75799e-06,4.64177e-08,0.329794,0.00176188,-5.61874e-06,1.86107e-08,0.33155,0.0017507,-5.5629e-06,2.81511e-08,0.333295,0.00173966,-5.47845e-06,4.75987e-08,0.335029,0.00172884,-5.33565e-06,1.98726e-08,0.336753,0.00171823,-5.27604e-06,2.19226e-08,0.338466,0.00170775,-5.21027e-06,4.14483e-08,0.340169,0.00169745,-5.08592e-06,2.09017e-08,0.341861,0.00168734,-5.02322e-06,2.39561e-08,0.343543,0.00167737,-4.95135e-06,3.22852e-08,0.345216,0.00166756,-4.85449e-06,2.57173e-08,0.346878,0.00165793,-4.77734e-06,1.38569e-08,0.348532,0.00164841,-4.73577e-06,3.80634e-08,0.350175,0.00163906,-4.62158e-06,1.27043e-08,0.35181,0.00162985,-4.58347e-06,3.03279e-08,0.353435,0.00162078,-4.49249e-06,1.49961e-08,0.355051,0.00161184,-4.4475e-06,2.88977e-08,0.356659,0.00160303,-4.3608e-06,1.84241e-08,0.358257,0.00159436,-4.30553e-06,1.6616e-08,0.359848,0.0015858,-4.25568e-06,3.43218e-08,0.361429,0.00157739,-4.15272e-06,-4.89172e-09,0.363002,0.00156907,-4.16739e-06,4.48498e-08,0.364567,0.00156087,-4.03284e-06,4.30676e-09,0.366124,0.00155282,-4.01992e-06,2.73303e-08,0.367673,0.00154486,-3.93793e-06,5.58036e-09,0.369214,0.001537,-3.92119e-06,3.97554e-08,0.370747,0.00152928,-3.80193e-06,-1.55904e-08,0.372272,0.00152163,-3.8487e-06,5.24081e-08,0.37379,0.00151409,-3.69147e-06,-1.52272e-08,0.375301,0.00150666,-3.73715e-06,3.83028e-08,0.376804,0.0014993,-3.62225e-06,1.10278e-08,0.378299,0.00149209,-3.58916e-06,6.99326e-09,0.379788,0.00148493,-3.56818e-06,2.06038e-08,0.381269,0.00147786,-3.50637e-06,2.98009e-08,0.382744,0.00147093,-3.41697e-06,-2.05978e-08,0.384211,0.00146404,-3.47876e-06,5.25899e-08,0.385672,0.00145724,-3.32099e-06,-1.09471e-08,0.387126,0.00145056,-3.35383e-06,2.10009e-08,0.388573,0.00144392,-3.29083e-06,1.63501e-08,0.390014,0.00143739,-3.24178e-06,3.00641e-09,0.391448,0.00143091,-3.23276e-06,3.12282e-08,0.392875,0.00142454,-3.13908e-06,-8.70932e-09,0.394297,0.00141824,-3.16521e-06,3.34114e-08,0.395712,0.00141201,-3.06497e-06,-5.72754e-09,0.397121,0.00140586,-3.08215e-06,1.9301e-08,0.398524,0.00139975,-3.02425e-06,1.7931e-08,0.39992,0.00139376,-2.97046e-06,-1.61822e-09,0.401311,0.00138781,-2.97531e-06,1.83442e-08,0.402696,0.00138192,-2.92028e-06,1.76485e-08,0.404075,0.00137613,-2.86733e-06,4.68617e-10,0.405448,0.00137039,-2.86593e-06,1.02794e-08,0.406816,0.00136469,-2.83509e-06,1.80179e-08,0.408178,0.00135908,-2.78104e-06,7.05594e-09,0.409534,0.00135354,-2.75987e-06,1.33633e-08,0.410885,0.00134806,-2.71978e-06,-9.04568e-10,0.41223,0.00134261,-2.72249e-06,2.0057e-08,0.41357,0.00133723,-2.66232e-06,1.00841e-08,0.414905,0.00133194,-2.63207e-06,-7.88835e-10,0.416234,0.00132667,-2.63444e-06,2.28734e-08,0.417558,0.00132147,-2.56582e-06,-1.29785e-09,0.418877,0.00131633,-2.56971e-06,1.21205e-08,0.420191,0.00131123,-2.53335e-06,1.24202e-08,0.421499,0.0013062,-2.49609e-06,-2.19681e-09,0.422803,0.0013012,-2.50268e-06,2.61696e-08,0.424102,0.00129628,-2.42417e-06,-1.30747e-08,0.425396,0.00129139,-2.46339e-06,2.6129e-08,0.426685,0.00128654,-2.38501e-06,-2.03454e-09,0.427969,0.00128176,-2.39111e-06,1.18115e-08,0.429248,0.00127702,-2.35567e-06,1.43932e-08,0.430523,0.00127235,-2.31249e-06,-9.77965e-09,0.431793,0.00126769,-2.34183e-06,2.47253e-08,0.433058,0.00126308,-2.26766e-06,2.85278e-10,0.434319,0.00125855,-2.2668e-06,3.93614e-09,0.435575,0.00125403,-2.25499e-06,1.37722e-08,0.436827,0.00124956,-2.21368e-06,5.79803e-10,0.438074,0.00124513,-2.21194e-06,1.37112e-08,0.439317,0.00124075,-2.1708e-06,4.17973e-09,0.440556,0.00123642,-2.15826e-06,-6.27703e-10,0.44179,0.0012321,-2.16015e-06,2.81332e-08,0.44302,0.00122787,-2.07575e-06,-2.24985e-08,0.444246,0.00122365,-2.14324e-06,3.20586e-08,0.445467,0.00121946,-2.04707e-06,-1.6329e-08,0.446685,0.00121532,-2.09605e-06,3.32573e-08,0.447898,0.00121122,-1.99628e-06,-2.72927e-08,0.449107,0.00120715,-2.07816e-06,4.6111e-08,0.450312,0.00120313,-1.93983e-06,-3.79416e-08,0.451514,0.00119914,-2.05365e-06,4.60507e-08,0.452711,0.00119517,-1.9155e-06,-2.7052e-08,0.453904,0.00119126,-1.99666e-06,3.23551e-08,0.455093,0.00118736,-1.89959e-06,-1.29613e-08,0.456279,0.00118352,-1.93848e-06,1.94905e-08,0.45746,0.0011797,-1.88e-06,-5.39588e-09,0.458638,0.00117593,-1.89619e-06,2.09282e-09,0.459812,0.00117214,-1.88991e-06,2.68267e-08,0.460982,0.00116844,-1.80943e-06,-1.99925e-08,0.462149,0.00116476,-1.86941e-06,2.3341e-08,0.463312,0.00116109,-1.79939e-06,-1.37674e-08,0.464471,0.00115745,-1.84069e-06,3.17287e-08,0.465627,0.00115387,-1.7455e-06,-2.37407e-08,0.466779,0.00115031,-1.81673e-06,3.34315e-08,0.467927,0.00114677,-1.71643e-06,-2.05786e-08,0.469073,0.00114328,-1.77817e-06,1.90802e-08,0.470214,0.00113978,-1.72093e-06,3.86247e-09,0.471352,0.00113635,-1.70934e-06,-4.72759e-09,0.472487,0.00113292,-1.72352e-06,1.50478e-08,0.473618,0.00112951,-1.67838e-06,4.14108e-09,0.474746,0.00112617,-1.66595e-06,-1.80986e-09,0.47587,0.00112283,-1.67138e-06,3.09816e-09,0.476991,0.0011195,-1.66209e-06,1.92198e-08,0.478109,0.00111623,-1.60443e-06,-2.03726e-08,0.479224,0.00111296,-1.66555e-06,3.2468e-08,0.480335,0.00110973,-1.56814e-06,-2.00922e-08,0.481443,0.00110653,-1.62842e-06,1.80983e-08,0.482548,0.00110333,-1.57413e-06,7.30362e-09,0.48365,0.0011002,-1.55221e-06,-1.75107e-08,0.484749,0.00109705,-1.60475e-06,3.29373e-08,0.485844,0.00109393,-1.50594e-06,-2.48315e-08,0.486937,0.00109085,-1.58043e-06,3.65865e-08,0.488026,0.0010878,-1.47067e-06,-3.21078e-08,0.489112,0.00108476,-1.56699e-06,3.22397e-08,0.490195,0.00108172,-1.47027e-06,-7.44391e-09,0.491276,0.00107876,-1.49261e-06,-2.46428e-09,0.492353,0.00107577,-1.5e-06,1.73011e-08,0.493427,0.00107282,-1.4481e-06,-7.13552e-09,0.494499,0.0010699,-1.4695e-06,1.1241e-08,0.495567,0.001067,-1.43578e-06,-8.02637e-09,0.496633,0.0010641,-1.45986e-06,2.08645e-08,0.497695,0.00106124,-1.39726e-06,-1.58271e-08,0.498755,0.0010584,-1.44475e-06,1.26415e-08,0.499812,0.00105555,-1.40682e-06,2.48655e-08,0.500866,0.00105281,-1.33222e-06,-5.24988e-08,0.501918,0.00104999,-1.48972e-06,6.59206e-08,0.502966,0.00104721,-1.29196e-06,-3.237e-08,0.504012,0.00104453,-1.38907e-06,3.95479e-09,0.505055,0.00104176,-1.3772e-06,1.65509e-08,0.506096,0.00103905,-1.32755e-06,-1.05539e-08,0.507133,0.00103637,-1.35921e-06,2.56648e-08,0.508168,0.00103373,-1.28222e-06,-3.25007e-08,0.509201,0.00103106,-1.37972e-06,4.47336e-08,0.51023,0.00102844,-1.24552e-06,-2.72245e-08,0.511258,0.00102587,-1.32719e-06,4.55952e-09,0.512282,0.00102323,-1.31352e-06,8.98645e-09,0.513304,0.00102063,-1.28656e-06,1.90992e-08,0.514323,0.00101811,-1.22926e-06,-2.57786e-08,0.51534,0.00101557,-1.30659e-06,2.44104e-08,0.516355,0.00101303,-1.23336e-06,-1.22581e-08,0.517366,0.00101053,-1.27014e-06,2.4622e-08,0.518376,0.00100806,-1.19627e-06,-2.66253e-08,0.519383,0.00100559,-1.27615e-06,2.22744e-08,0.520387,0.00100311,-1.20932e-06,-2.8679e-09,0.521389,0.00100068,-1.21793e-06,-1.08029e-08,0.522388,0.000998211,-1.25034e-06,4.60795e-08,0.523385,0.000995849,-1.1121e-06,-5.4306e-08,0.52438,0.000993462,-1.27502e-06,5.19354e-08,0.525372,0.000991067,-1.11921e-06,-3.42262e-08,0.526362,0.000988726,-1.22189e-06,2.53646e-08,0.52735,0.000986359,-1.14579e-06,-7.62782e-09,0.528335,0.000984044,-1.16868e-06,5.14668e-09,0.529318,0.000981722,-1.15324e-06,-1.29589e-08,0.530298,0.000979377,-1.19211e-06,4.66888e-08,0.531276,0.000977133,-1.05205e-06,-5.45868e-08,0.532252,0.000974865,-1.21581e-06,5.24495e-08,0.533226,0.000972591,-1.05846e-06,-3.60019e-08,0.534198,0.000970366,-1.16647e-06,3.19537e-08,0.535167,0.000968129,-1.07061e-06,-3.2208e-08,0.536134,0.000965891,-1.16723e-06,3.72738e-08,0.537099,0.000963668,-1.05541e-06,2.32205e-09,0.538061,0.000961564,-1.04844e-06,-4.65618e-08,0.539022,0.000959328,-1.18813e-06,6.47159e-08,0.53998,0.000957146,-9.93979e-07,-3.3488e-08,0.540936,0.000955057,-1.09444e-06,9.63166e-09,0.54189,0.000952897,-1.06555e-06,-5.03871e-09,0.542842,0.000950751,-1.08066e-06,1.05232e-08,0.543792,0.000948621,-1.04909e-06,2.25503e-08,0.544739,0.000946591,-9.81444e-07,-4.11195e-08,0.545685,0.000944504,-1.1048e-06,2.27182e-08,0.546628,0.000942363,-1.03665e-06,9.85146e-09,0.54757,0.000940319,-1.00709e-06,-2.51938e-09,0.548509,0.000938297,-1.01465e-06,2.25858e-10,0.549446,0.000936269,-1.01397e-06,1.61598e-09,0.550381,0.000934246,-1.00913e-06,-6.68983e-09,0.551315,0.000932207,-1.0292e-06,2.51434e-08,0.552246,0.000930224,-9.53765e-07,-3.42793e-08,0.553175,0.000928214,-1.0566e-06,5.23688e-08,0.554102,0.000926258,-8.99497e-07,-5.59865e-08,0.555028,0.000924291,-1.06746e-06,5.23679e-08,0.555951,0.000922313,-9.10352e-07,-3.42763e-08,0.556872,0.00092039,-1.01318e-06,2.51326e-08,0.557792,0.000918439,-9.37783e-07,-6.64954e-09,0.558709,0.000916543,-9.57732e-07,1.46554e-09,0.559625,0.000914632,-9.53335e-07,7.87281e-10,0.560538,0.000912728,-9.50973e-07,-4.61466e-09,0.56145,0.000910812,-9.64817e-07,1.76713e-08,0.56236,0.000908935,-9.11804e-07,-6.46564e-09,0.563268,0.000907092,-9.312e-07,8.19121e-09,0.564174,0.000905255,-9.06627e-07,-2.62992e-08,0.565078,0.000903362,-9.85524e-07,3.74007e-08,0.565981,0.000901504,-8.73322e-07,-4.0942e-09,0.566882,0.000899745,-8.85605e-07,-2.1024e-08,0.56778,0.00089791,-9.48677e-07,2.85854e-08,0.568677,0.000896099,-8.62921e-07,-3.3713e-08,0.569573,0.000894272,-9.64059e-07,4.6662e-08,0.570466,0.000892484,-8.24073e-07,-3.37258e-08,0.571358,0.000890734,-9.25251e-07,2.86365e-08,0.572247,0.00088897,-8.39341e-07,-2.12155e-08,0.573135,0.000887227,-9.02988e-07,-3.37913e-09,0.574022,0.000885411,-9.13125e-07,3.47319e-08,0.574906,0.000883689,-8.08929e-07,-1.63394e-08,0.575789,0.000882022,-8.57947e-07,-2.8979e-08,0.57667,0.00088022,-9.44885e-07,7.26509e-08,0.57755,0.000878548,-7.26932e-07,-8.28106e-08,0.578427,0.000876845,-9.75364e-07,7.97774e-08,0.579303,0.000875134,-7.36032e-07,-5.74849e-08,0.580178,0.00087349,-9.08486e-07,3.09529e-08,0.58105,0.000871765,-8.15628e-07,-6.72206e-09,0.581921,0.000870114,-8.35794e-07,-4.06451e-09,0.582791,0.00086843,-8.47987e-07,2.29799e-08,0.583658,0.000866803,-7.79048e-07,-2.82503e-08,0.584524,0.00086516,-8.63799e-07,3.04167e-08,0.585388,0.000863524,-7.72548e-07,-3.38119e-08,0.586251,0.000861877,-8.73984e-07,4.52264e-08,0.587112,0.000860265,-7.38305e-07,-2.78842e-08,0.587972,0.000858705,-8.21958e-07,6.70567e-09,0.58883,0.000857081,-8.01841e-07,1.06161e-09,0.589686,0.000855481,-7.98656e-07,-1.09521e-08,0.590541,0.00085385,-8.31512e-07,4.27468e-08,0.591394,0.000852316,-7.03272e-07,-4.08257e-08,0.592245,0.000850787,-8.25749e-07,1.34677e-09,0.593095,0.000849139,-8.21709e-07,3.54387e-08,0.593944,0.000847602,-7.15393e-07,-2.38924e-08,0.59479,0.0008461,-7.8707e-07,5.26143e-10,0.595636,0.000844527,-7.85491e-07,2.17879e-08,0.596479,0.000843021,-7.20127e-07,-2.80733e-08,0.597322,0.000841497,-8.04347e-07,3.09005e-08,0.598162,0.000839981,-7.11646e-07,-3.5924e-08,0.599002,0.00083845,-8.19418e-07,5.3191e-08,0.599839,0.000836971,-6.59845e-07,-5.76307e-08,0.600676,0.000835478,-8.32737e-07,5.81227e-08,0.60151,0.000833987,-6.58369e-07,-5.56507e-08,0.602344,0.000832503,-8.25321e-07,4.52706e-08,0.603175,0.000830988,-6.89509e-07,-6.22236e-09,0.604006,0.000829591,-7.08176e-07,-2.03811e-08,0.604834,0.000828113,-7.6932e-07,2.8142e-08,0.605662,0.000826659,-6.84894e-07,-3.25822e-08,0.606488,0.000825191,-7.8264e-07,4.25823e-08,0.607312,0.000823754,-6.54893e-07,-1.85376e-08,0.608135,0.000822389,-7.10506e-07,-2.80365e-08,0.608957,0.000820883,-7.94616e-07,7.1079e-08,0.609777,0.000819507,-5.81379e-07,-7.74655e-08,0.610596,0.000818112,-8.13775e-07,5.9969e-08,0.611413,0.000816665,-6.33868e-07,-4.32013e-08,0.612229,0.000815267,-7.63472e-07,5.32313e-08,0.613044,0.0008139,-6.03778e-07,-5.05148e-08,0.613857,0.000812541,-7.55323e-07,2.96187e-08,0.614669,0.000811119,-6.66466e-07,-8.35545e-09,0.615479,0.000809761,-6.91533e-07,3.80301e-09,0.616288,0.00080839,-6.80124e-07,-6.85666e-09,0.617096,0.000807009,-7.00694e-07,2.36237e-08,0.617903,0.000805678,-6.29822e-07,-2.80336e-08,0.618708,0.000804334,-7.13923e-07,2.8906e-08,0.619511,0.000802993,-6.27205e-07,-2.79859e-08,0.620314,0.000801655,-7.11163e-07,2.34329e-08,0.621114,0.000800303,-6.40864e-07,-6.14108e-09,0.621914,0.000799003,-6.59287e-07,1.13151e-09,0.622712,0.000797688,-6.55893e-07,1.61507e-09,0.62351,0.000796381,-6.51048e-07,-7.59186e-09,0.624305,0.000795056,-6.73823e-07,2.87524e-08,0.6251,0.000793794,-5.87566e-07,-4.7813e-08,0.625893,0.000792476,-7.31005e-07,4.32901e-08,0.626685,0.000791144,-6.01135e-07,-6.13814e-09,0.627475,0.000789923,-6.19549e-07,-1.87376e-08,0.628264,0.000788628,-6.75762e-07,2.14837e-08,0.629052,0.000787341,-6.11311e-07,-7.59265e-09,0.629839,0.000786095,-6.34089e-07,8.88692e-09,0.630625,0.000784854,-6.07428e-07,-2.7955e-08,0.631409,0.000783555,-6.91293e-07,4.33285e-08,0.632192,0.000782302,-5.61307e-07,-2.61497e-08,0.632973,0.000781101,-6.39757e-07,1.6658e-09,0.633754,0.000779827,-6.34759e-07,1.94866e-08,0.634533,0.000778616,-5.76299e-07,-2.00076e-08,0.635311,0.000777403,-6.36322e-07,9.39091e-10,0.636088,0.000776133,-6.33505e-07,1.62512e-08,0.636863,0.000774915,-5.84751e-07,-6.33937e-09,0.637638,0.000773726,-6.03769e-07,9.10609e-09,0.638411,0.000772546,-5.76451e-07,-3.00849e-08,0.639183,0.000771303,-6.66706e-07,5.1629e-08,0.639953,0.000770125,-5.11819e-07,-5.7222e-08,0.640723,0.000768929,-6.83485e-07,5.80497e-08,0.641491,0.000767736,-5.09336e-07,-5.57674e-08,0.642259,0.000766551,-6.76638e-07,4.58105e-08,0.643024,0.000765335,-5.39206e-07,-8.26541e-09,0.643789,0.000764231,-5.64002e-07,-1.27488e-08,0.644553,0.000763065,-6.02249e-07,-3.44168e-10,0.645315,0.00076186,-6.03281e-07,1.41254e-08,0.646077,0.000760695,-5.60905e-07,3.44727e-09,0.646837,0.000759584,-5.50563e-07,-2.79144e-08,0.647596,0.000758399,-6.34307e-07,4.86057e-08,0.648354,0.000757276,-4.88489e-07,-4.72989e-08,0.64911,0.000756158,-6.30386e-07,2.13807e-08,0.649866,0.000754961,-5.66244e-07,2.13808e-08,0.65062,0.000753893,-5.02102e-07,-4.7299e-08,0.651374,0.000752746,-6.43999e-07,4.86059e-08,0.652126,0.000751604,-4.98181e-07,-2.79154e-08,0.652877,0.000750524,-5.81927e-07,3.45089e-09,0.653627,0.000749371,-5.71575e-07,1.41119e-08,0.654376,0.00074827,-5.29239e-07,-2.93748e-10,0.655123,0.00074721,-5.3012e-07,-1.29368e-08,0.65587,0.000746111,-5.68931e-07,-7.56355e-09,0.656616,0.000744951,-5.91621e-07,4.3191e-08,0.65736,0.000743897,-4.62048e-07,-4.59911e-08,0.658103,0.000742835,-6.00022e-07,2.15642e-08,0.658846,0.0007417,-5.35329e-07,1.93389e-08,0.659587,0.000740687,-4.77312e-07,-3.93152e-08,0.660327,0.000739615,-5.95258e-07,1.87126e-08,0.661066,0.00073848,-5.3912e-07,2.40695e-08,0.661804,0.000737474,-4.66912e-07,-5.53859e-08,0.662541,0.000736374,-6.33069e-07,7.82648e-08,0.663277,0.000735343,-3.98275e-07,-7.88593e-08,0.664012,0.00073431,-6.34853e-07,5.83585e-08,0.664745,0.000733215,-4.59777e-07,-3.53656e-08,0.665478,0.000732189,-5.65874e-07,2.34994e-08,0.66621,0.000731128,-4.95376e-07,9.72743e-10,0.66694,0.00073014,-4.92458e-07,-2.73903e-08,0.66767,0.000729073,-5.74629e-07,4.89839e-08,0.668398,0.000728071,-4.27677e-07,-4.93359e-08,0.669126,0.000727068,-5.75685e-07,2.91504e-08,0.669853,0.000726004,-4.88234e-07,-7.66109e-09,0.670578,0.000725004,-5.11217e-07,1.49392e-09,0.671303,0.000723986,-5.06735e-07,1.68533e-09,0.672026,0.000722978,-5.01679e-07,-8.23525e-09,0.672749,0.00072195,-5.26385e-07,3.12556e-08,0.67347,0.000720991,-4.32618e-07,-5.71825e-08,0.674191,0.000719954,-6.04166e-07,7.8265e-08,0.67491,0.00071898,-3.69371e-07,-7.70634e-08,0.675628,0.00071801,-6.00561e-07,5.11747e-08,0.676346,0.000716963,-4.47037e-07,-8.42615e-09,0.677062,0.000716044,-4.72315e-07,-1.747e-08,0.677778,0.000715046,-5.24725e-07,1.87015e-08,0.678493,0.000714053,-4.68621e-07,2.26856e-09,0.679206,0.000713123,-4.61815e-07,-2.77758e-08,0.679919,0.000712116,-5.45142e-07,4.92298e-08,0.68063,0.000711173,-3.97453e-07,-4.99339e-08,0.681341,0.000710228,-5.47255e-07,3.12967e-08,0.682051,0.000709228,-4.53365e-07,-1.56481e-08,0.68276,0.000708274,-5.00309e-07,3.12958e-08,0.683467,0.000707367,-4.06422e-07,-4.99303e-08,0.684174,0.000706405,-5.56213e-07,4.9216e-08,0.68488,0.00070544,-4.08565e-07,-2.77245e-08,0.685585,0.00070454,-4.91738e-07,2.07748e-09,0.686289,0.000703562,-4.85506e-07,1.94146e-08,0.686992,0.00070265,-4.27262e-07,-2.01314e-08,0.687695,0.000701735,-4.87656e-07,1.50616e-09,0.688396,0.000700764,-4.83137e-07,1.41067e-08,0.689096,0.00069984,-4.40817e-07,1.67168e-09,0.689795,0.000698963,-4.35802e-07,-2.07934e-08,0.690494,0.000698029,-4.98182e-07,2.18972e-08,0.691192,0.000697099,-4.32491e-07,-7.19092e-09,0.691888,0.000696212,-4.54064e-07,6.86642e-09,0.692584,0.000695325,-4.33464e-07,-2.02747e-08,0.693279,0.000694397,-4.94288e-07,1.46279e-08,0.693973,0.000693452,-4.50405e-07,2.13678e-08,0.694666,0.000692616,-3.86301e-07,-4.04945e-08,0.695358,0.000691721,-5.07785e-07,2.14009e-08,0.696049,0.00069077,-4.43582e-07,1.44955e-08,0.69674,0.000689926,-4.00096e-07,-1.97783e-08,0.697429,0.000689067,-4.5943e-07,5.01296e-09,0.698118,0.000688163,-4.44392e-07,-2.73521e-10,0.698805,0.000687273,-4.45212e-07,-3.91893e-09,0.699492,0.000686371,-4.56969e-07,1.59493e-08,0.700178,0.000685505,-4.09121e-07,-2.73351e-10,0.700863,0.000684686,-4.09941e-07,-1.4856e-08,0.701548,0.000683822,-4.54509e-07,9.25979e-11,0.702231,0.000682913,-4.54231e-07,1.44855e-08,0.702913,0.000682048,-4.10775e-07,1.56992e-09,0.703595,0.000681231,-4.06065e-07,-2.07652e-08,0.704276,0.000680357,-4.68361e-07,2.18864e-08,0.704956,0.000679486,-4.02701e-07,-7.17595e-09,0.705635,0.000678659,-4.24229e-07,6.81748e-09,0.706313,0.000677831,-4.03777e-07,-2.0094e-08,0.70699,0.000676963,-4.64059e-07,1.39538e-08,0.707667,0.000676077,-4.22197e-07,2.38835e-08,0.708343,0.000675304,-3.50547e-07,-4.98831e-08,0.709018,0.000674453,-5.00196e-07,5.64395e-08,0.709692,0.000673622,-3.30878e-07,-5.66657e-08,0.710365,0.00067279,-5.00875e-07,5.1014e-08,0.711037,0.000671942,-3.47833e-07,-2.81809e-08,0.711709,0.000671161,-4.32376e-07,2.10513e-09,0.712379,0.000670303,-4.2606e-07,1.97604e-08,0.713049,0.00066951,-3.66779e-07,-2.15422e-08,0.713718,0.000668712,-4.31406e-07,6.8038e-09,0.714387,0.000667869,-4.10994e-07,-5.67295e-09,0.715054,0.00066703,-4.28013e-07,1.5888e-08,0.715721,0.000666222,-3.80349e-07,1.72576e-09,0.716387,0.000665467,-3.75172e-07,-2.27911e-08,0.717052,0.000664648,-4.43545e-07,2.9834e-08,0.717716,0.00066385,-3.54043e-07,-3.69401e-08,0.718379,0.000663031,-4.64864e-07,5.83219e-08,0.719042,0.000662277,-2.89898e-07,-7.71382e-08,0.719704,0.000661465,-5.21313e-07,7.14171e-08,0.720365,0.000660637,-3.07061e-07,-2.97161e-08,0.721025,0.000659934,-3.96209e-07,-1.21575e-08,0.721685,0.000659105,-4.32682e-07,1.87412e-08,0.722343,0.000658296,-3.76458e-07,-3.2029e-09,0.723001,0.000657533,-3.86067e-07,-5.9296e-09,0.723659,0.000656743,-4.03856e-07,2.69213e-08,0.724315,0.000656016,-3.23092e-07,-4.21511e-08,0.724971,0.000655244,-4.49545e-07,2.24737e-08,0.725625,0.000654412,-3.82124e-07,1.18611e-08,0.726279,0.000653683,-3.46541e-07,-1.03132e-08,0.726933,0.000652959,-3.7748e-07,-3.02128e-08,0.727585,0.000652114,-4.68119e-07,7.15597e-08,0.728237,0.000651392,-2.5344e-07,-7.72119e-08,0.728888,0.000650654,-4.85075e-07,5.8474e-08,0.729538,0.000649859,-3.09654e-07,-3.74746e-08,0.730188,0.000649127,-4.22077e-07,3.18197e-08,0.730837,0.000648379,-3.26618e-07,-3.01997e-08,0.731485,0.000647635,-4.17217e-07,2.93747e-08,0.732132,0.000646888,-3.29093e-07,-2.76943e-08,0.732778,0.000646147,-4.12176e-07,2.17979e-08,0.733424,0.000645388,-3.46783e-07,1.07292e-10,0.734069,0.000644695,-3.46461e-07,-2.22271e-08,0.734713,0.000643935,-4.13142e-07,2.91963e-08,0.735357,0.000643197,-3.25553e-07,-3.49536e-08,0.736,0.000642441,-4.30414e-07,5.10133e-08,0.736642,0.000641733,-2.77374e-07,-4.98904e-08,0.737283,0.000641028,-4.27045e-07,2.93392e-08,0.737924,0.000640262,-3.39028e-07,-7.86156e-09,0.738564,0.000639561,-3.62612e-07,2.10703e-09,0.739203,0.000638842,-3.56291e-07,-5.6653e-10,0.739842,0.000638128,-3.57991e-07,1.59086e-10,0.740479,0.000637412,-3.57513e-07,-6.98321e-11,0.741116,0.000636697,-3.57723e-07,1.20214e-10,0.741753,0.000635982,-3.57362e-07,-4.10987e-10,0.742388,0.000635266,-3.58595e-07,1.5237e-09,0.743023,0.000634553,-3.54024e-07,-5.68376e-09,0.743657,0.000633828,-3.71075e-07,2.12113e-08,0.744291,0.00063315,-3.07441e-07,-1.95569e-08,0.744924,0.000632476,-3.66112e-07,-2.58816e-09,0.745556,0.000631736,-3.73877e-07,2.99096e-08,0.746187,0.000631078,-2.84148e-07,-5.74454e-08,0.746818,0.000630337,-4.56484e-07,8.06629e-08,0.747448,0.000629666,-2.14496e-07,-8.63922e-08,0.748077,0.000628978,-4.73672e-07,8.60918e-08,0.748706,0.000628289,-2.15397e-07,-7.91613e-08,0.749334,0.000627621,-4.5288e-07,5.17393e-08,0.749961,0.00062687,-2.97663e-07,-8.58662e-09,0.750588,0.000626249,-3.23422e-07,-1.73928e-08,0.751214,0.00062555,-3.75601e-07,1.85532e-08,0.751839,0.000624855,-3.19941e-07,2.78479e-09,0.752463,0.000624223,-3.11587e-07,-2.96923e-08,0.753087,0.000623511,-4.00664e-07,5.63799e-08,0.75371,0.000622879,-2.31524e-07,-7.66179e-08,0.754333,0.000622186,-4.61378e-07,7.12778e-08,0.754955,0.000621477,-2.47545e-07,-2.96794e-08,0.755576,0.000620893,-3.36583e-07,-1.21648e-08,0.756196,0.000620183,-3.73077e-07,1.87339e-08,0.756816,0.000619493,-3.16875e-07,-3.16622e-09,0.757435,0.00061885,-3.26374e-07,-6.0691e-09,0.758054,0.000618179,-3.44581e-07,2.74426e-08,0.758672,0.000617572,-2.62254e-07,-4.40968e-08,0.759289,0.000616915,-3.94544e-07,2.97352e-08,0.759906,0.000616215,-3.05338e-07,-1.52393e-08,0.760522,0.000615559,-3.51056e-07,3.12221e-08,0.761137,0.000614951,-2.5739e-07,-5.00443e-08,0.761751,0.000614286,-4.07523e-07,4.9746e-08,0.762365,0.00061362,-2.58285e-07,-2.97303e-08,0.762979,0.000613014,-3.47476e-07,9.57079e-09,0.763591,0.000612348,-3.18764e-07,-8.55287e-09,0.764203,0.000611685,-3.44422e-07,2.46407e-08,0.764815,0.00061107,-2.705e-07,-3.04053e-08,0.765426,0.000610437,-3.61716e-07,3.73759e-08,0.766036,0.000609826,-2.49589e-07,-5.94935e-08,0.766645,0.000609149,-4.28069e-07,8.13889e-08,0.767254,0.000608537,-1.83902e-07,-8.72483e-08,0.767862,0.000607907,-4.45647e-07,8.87901e-08,0.76847,0.000607282,-1.79277e-07,-8.90983e-08,0.769077,0.000606656,-4.46572e-07,8.87892e-08,0.769683,0.000606029,-1.80204e-07,-8.72446e-08,0.770289,0.000605407,-4.41938e-07,8.13752e-08,0.770894,0.000604768,-1.97812e-07,-5.94423e-08,0.771498,0.000604194,-3.76139e-07,3.71848e-08,0.772102,0.000603553,-2.64585e-07,-2.96922e-08,0.772705,0.000602935,-3.53661e-07,2.19793e-08,0.773308,0.000602293,-2.87723e-07,1.37955e-09,0.77391,0.000601722,-2.83585e-07,-2.74976e-08,0.774512,0.000601072,-3.66077e-07,4.9006e-08,0.775112,0.000600487,-2.19059e-07,-4.93171e-08,0.775712,0.000599901,-3.67011e-07,2.90531e-08,0.776312,0.000599254,-2.79851e-07,-7.29081e-09,0.776911,0.000598673,-3.01724e-07,1.10077e-10,0.777509,0.00059807,-3.01393e-07,6.85053e-09,0.778107,0.000597487,-2.80842e-07,-2.75123e-08,0.778704,0.000596843,-3.63379e-07,4.35939e-08,0.779301,0.000596247,-2.32597e-07,-2.7654e-08,0.779897,0.000595699,-3.15559e-07,7.41741e-09,0.780492,0.00059509,-2.93307e-07,-2.01562e-09,0.781087,0.000594497,-2.99354e-07,6.45059e-10,0.781681,0.000593901,-2.97418e-07,-5.64635e-10,0.782275,0.000593304,-2.99112e-07,1.61347e-09,0.782868,0.000592711,-2.94272e-07,-5.88926e-09,0.78346,0.000592105,-3.1194e-07,2.19436e-08,0.784052,0.000591546,-2.46109e-07,-2.22805e-08,0.784643,0.000590987,-3.1295e-07,7.57368e-09,0.785234,0.000590384,-2.90229e-07,-8.01428e-09,0.785824,0.00058978,-3.14272e-07,2.44834e-08,0.786414,0.000589225,-2.40822e-07,-3.03148e-08,0.787003,0.000588652,-3.31766e-07,3.7171e-08,0.787591,0.0005881,-2.20253e-07,-5.87646e-08,0.788179,0.000587483,-3.96547e-07,7.86782e-08,0.788766,0.000586926,-1.60512e-07,-7.71342e-08,0.789353,0.000586374,-3.91915e-07,5.10444e-08,0.789939,0.000585743,-2.38782e-07,-7.83422e-09,0.790524,0.000585242,-2.62284e-07,-1.97076e-08,0.791109,0.000584658,-3.21407e-07,2.70598e-08,0.791693,0.000584097,-2.40228e-07,-2.89269e-08,0.792277,0.000583529,-3.27008e-07,2.90431e-08,0.792861,0.000582963,-2.39879e-07,-2.76409e-08,0.793443,0.0005824,-3.22802e-07,2.1916e-08,0.794025,0.00058182,-2.57054e-07,-4.18368e-10,0.794607,0.000581305,-2.58309e-07,-2.02425e-08,0.795188,0.000580727,-3.19036e-07,2.17838e-08,0.795768,0.000580155,-2.53685e-07,-7.28814e-09,0.796348,0.000579625,-2.75549e-07,7.36871e-09,0.796928,0.000579096,-2.53443e-07,-2.21867e-08,0.797506,0.000578523,-3.20003e-07,2.17736e-08,0.798085,0.000577948,-2.54683e-07,-5.30296e-09,0.798662,0.000577423,-2.70592e-07,-5.61698e-10,0.799239,0.00057688,-2.72277e-07,7.54977e-09,0.799816,0.000576358,-2.49627e-07,-2.96374e-08,0.800392,0.00057577,-3.38539e-07,5.1395e-08,0.800968,0.000575247,-1.84354e-07,-5.67335e-08,0.801543,0.000574708,-3.54555e-07,5.63297e-08,0.802117,0.000574168,-1.85566e-07,-4.93759e-08,0.802691,0.000573649,-3.33693e-07,2.19646e-08,0.803264,0.000573047,-2.678e-07,2.1122e-08,0.803837,0.000572575,-2.04433e-07,-4.68482e-08,0.804409,0.000572026,-3.44978e-07,4.70613e-08,0.804981,0.000571477,-2.03794e-07,-2.21877e-08,0.805552,0.000571003,-2.70357e-07,-1.79153e-08,0.806123,0.000570408,-3.24103e-07,3.42443e-08,0.806693,0.000569863,-2.2137e-07,1.47556e-10,0.807263,0.000569421,-2.20928e-07,-3.48345e-08,0.807832,0.000568874,-3.25431e-07,1.99812e-08,0.808401,0.000568283,-2.65487e-07,1.45143e-08,0.808969,0.000567796,-2.21945e-07,-1.84338e-08,0.809536,0.000567297,-2.77246e-07,-3.83608e-10,0.810103,0.000566741,-2.78397e-07,1.99683e-08,0.81067,0.000566244,-2.18492e-07,-1.98848e-08,0.811236,0.000565747,-2.78146e-07,-3.38976e-11,0.811801,0.000565191,-2.78248e-07,2.00204e-08,0.812366,0.000564695,-2.18187e-07,-2.04429e-08,0.812931,0.000564197,-2.79516e-07,2.1467e-09,0.813495,0.000563644,-2.73076e-07,1.18561e-08,0.814058,0.000563134,-2.37507e-07,1.00334e-08,0.814621,0.000562689,-2.07407e-07,-5.19898e-08,0.815183,0.000562118,-3.63376e-07,7.87163e-08,0.815745,0.000561627,-1.27227e-07,-8.40616e-08,0.816306,0.000561121,-3.79412e-07,7.87163e-08,0.816867,0.000560598,-1.43263e-07,-5.19898e-08,0.817428,0.000560156,-2.99233e-07,1.00335e-08,0.817988,0.000559587,-2.69132e-07,1.18559e-08,0.818547,0.000559085,-2.33564e-07,2.14764e-09,0.819106,0.000558624,-2.27122e-07,-2.04464e-08,0.819664,0.000558108,-2.88461e-07,2.00334e-08,0.820222,0.000557591,-2.28361e-07,-8.24277e-11,0.820779,0.000557135,-2.28608e-07,-1.97037e-08,0.821336,0.000556618,-2.87719e-07,1.92925e-08,0.821893,0.000556101,-2.29841e-07,2.13831e-09,0.822448,0.000555647,-2.23427e-07,-2.78458e-08,0.823004,0.000555117,-3.06964e-07,4.96402e-08,0.823559,0.000554652,-1.58043e-07,-5.15058e-08,0.824113,0.000554181,-3.12561e-07,3.71737e-08,0.824667,0.000553668,-2.0104e-07,-3.75844e-08,0.82522,0.000553153,-3.13793e-07,5.35592e-08,0.825773,0.000552686,-1.53115e-07,-5.74431e-08,0.826326,0.000552207,-3.25444e-07,5.7004e-08,0.826878,0.000551728,-1.54433e-07,-5.13635e-08,0.827429,0.000551265,-3.08523e-07,2.92406e-08,0.82798,0.000550735,-2.20801e-07,-5.99424e-09,0.828531,0.000550276,-2.38784e-07,-5.26363e-09,0.829081,0.000549782,-2.54575e-07,2.70488e-08,0.82963,0.000549354,-1.73429e-07,-4.33268e-08,0.83018,0.000548878,-3.03409e-07,2.7049e-08,0.830728,0.000548352,-2.22262e-07,-5.26461e-09,0.831276,0.000547892,-2.38056e-07,-5.99057e-09,0.831824,0.000547397,-2.56027e-07,2.92269e-08,0.832371,0.000546973,-1.68347e-07,-5.13125e-08,0.832918,0.000546482,-3.22284e-07,5.68139e-08,0.833464,0.000546008,-1.51843e-07,-5.67336e-08,0.83401,0.000545534,-3.22043e-07,5.09113e-08,0.834555,0.000545043,-1.6931e-07,-2.77022e-08,0.8351,0.000544621,-2.52416e-07,2.92924e-10,0.835644,0.000544117,-2.51537e-07,2.65305e-08,0.836188,0.000543694,-1.71946e-07,-4.68105e-08,0.836732,0.00054321,-3.12377e-07,4.15021e-08,0.837275,0.000542709,-1.87871e-07,1.13355e-11,0.837817,0.000542334,-1.87837e-07,-4.15474e-08,0.838359,0.000541833,-3.12479e-07,4.69691e-08,0.838901,0.000541349,-1.71572e-07,-2.71196e-08,0.839442,0.000540925,-2.52931e-07,1.90462e-09,0.839983,0.000540425,-2.47217e-07,1.95011e-08,0.840523,0.000539989,-1.88713e-07,-2.03045e-08,0.841063,0.00053955,-2.49627e-07,2.11216e-09,0.841602,0.000539057,-2.4329e-07,1.18558e-08,0.842141,0.000538606,-2.07723e-07,1.00691e-08,0.842679,0.000538221,-1.77516e-07,-5.21324e-08,0.843217,0.00053771,-3.33913e-07,7.92513e-08,0.843755,0.00053728,-9.6159e-08,-8.60587e-08,0.844292,0.000536829,-3.54335e-07,8.61696e-08,0.844828,0.000536379,-9.58263e-08,-7.98057e-08,0.845364,0.000535948,-3.35243e-07,5.42394e-08,0.8459,0.00053544,-1.72525e-07,-1.79426e-08,0.846435,0.000535041,-2.26353e-07,1.75308e-08,0.84697,0.000534641,-1.73761e-07,-5.21806e-08,0.847505,0.000534137,-3.30302e-07,7.19824e-08,0.848038,0.000533692,-1.14355e-07,-5.69349e-08,0.848572,0.000533293,-2.8516e-07,3.65479e-08,0.849105,0.000532832,-1.75516e-07,-2.96519e-08,0.849638,0.000532392,-2.64472e-07,2.2455e-08,0.85017,0.000531931,-1.97107e-07,-5.63451e-10,0.850702,0.000531535,-1.98797e-07,-2.02011e-08,0.851233,0.000531077,-2.59401e-07,2.17634e-08,0.851764,0.000530623,-1.94111e-07,-7.24794e-09,0.852294,0.000530213,-2.15854e-07,7.22832e-09,0.852824,0.000529803,-1.94169e-07,-2.16653e-08,0.853354,0.00052935,-2.59165e-07,1.98283e-08,0.853883,0.000528891,-1.9968e-07,1.95678e-09,0.854412,0.000528497,-1.9381e-07,-2.76554e-08,0.85494,0.000528027,-2.76776e-07,4.90603e-08,0.855468,0.00052762,-1.29596e-07,-4.93764e-08,0.855995,0.000527213,-2.77725e-07,2.92361e-08,0.856522,0.000526745,-1.90016e-07,-7.96341e-09,0.857049,0.000526341,-2.13907e-07,2.61752e-09,0.857575,0.000525922,-2.06054e-07,-2.50665e-09,0.8581,0.000525502,-2.13574e-07,7.40906e-09,0.858626,0.000525097,-1.91347e-07,-2.71296e-08,0.859151,0.000524633,-2.72736e-07,4.15048e-08,0.859675,0.000524212,-1.48221e-07,-1.96802e-08,0.860199,0.000523856,-2.07262e-07,-2.23886e-08,0.860723,0.000523375,-2.74428e-07,4.96299e-08,0.861246,0.000522975,-1.25538e-07,-5.69216e-08,0.861769,0.000522553,-2.96303e-07,5.88473e-08,0.862291,0.000522137,-1.19761e-07,-5.92584e-08,0.862813,0.00052172,-2.97536e-07,5.8977e-08,0.863334,0.000521301,-1.20605e-07,-5.74403e-08,0.863855,0.000520888,-2.92926e-07,5.15751e-08,0.864376,0.000520457,-1.38201e-07,-2.96506e-08,0.864896,0.000520091,-2.27153e-07,7.42277e-09,0.865416,0.000519659,-2.04885e-07,-4.05057e-11,0.865936,0.00051925,-2.05006e-07,-7.26074e-09,0.866455,0.000518818,-2.26788e-07,2.90835e-08,0.866973,0.000518451,-1.39538e-07,-4.94686e-08,0.867492,0.000518024,-2.87944e-07,4.95814e-08,0.868009,0.000517597,-1.39199e-07,-2.96479e-08,0.868527,0.000517229,-2.28143e-07,9.40539e-09,0.869044,0.000516801,-1.99927e-07,-7.9737e-09,0.86956,0.000516378,-2.23848e-07,2.24894e-08,0.870077,0.000515997,-1.5638e-07,-2.23793e-08,0.870592,0.000515617,-2.23517e-07,7.42302e-09,0.871108,0.000515193,-2.01248e-07,-7.31283e-09,0.871623,0.000514768,-2.23187e-07,2.18283e-08,0.872137,0.000514387,-1.57702e-07,-2.03959e-08,0.872652,0.000514011,-2.1889e-07,1.50711e-10,0.873165,0.000513573,-2.18437e-07,1.97931e-08,0.873679,0.000513196,-1.59058e-07,-1.97183e-08,0.874192,0.000512819,-2.18213e-07,-5.24324e-10,0.874704,0.000512381,-2.19786e-07,2.18156e-08,0.875217,0.000512007,-1.54339e-07,-2.71336e-08,0.875728,0.000511616,-2.3574e-07,2.71141e-08,0.87624,0.000511226,-1.54398e-07,-2.17182e-08,0.876751,0.000510852,-2.19552e-07,1.54131e-10,0.877262,0.000510414,-2.1909e-07,2.11017e-08,0.877772,0.000510039,-1.55785e-07,-2.49562e-08,0.878282,0.000509652,-2.30654e-07,1.91183e-08,0.878791,0.000509248,-1.73299e-07,8.08751e-09,0.8793,0.000508926,-1.49036e-07,-5.14684e-08,0.879809,0.000508474,-3.03441e-07,7.85766e-08,0.880317,0.000508103,-6.77112e-08,-8.40242e-08,0.880825,0.000507715,-3.19784e-07,7.87063e-08,0.881333,0.000507312,-8.36649e-08,-5.19871e-08,0.88184,0.000506988,-2.39626e-07,1.00327e-08,0.882346,0.000506539,-2.09528e-07,1.18562e-08,0.882853,0.000506156,-1.73959e-07,2.14703e-09,0.883359,0.000505814,-1.67518e-07,-2.04444e-08,0.883864,0.000505418,-2.28851e-07,2.00258e-08,0.88437,0.00050502,-1.68774e-07,-5.42855e-11,0.884874,0.000504682,-1.68937e-07,-1.98087e-08,0.885379,0.000504285,-2.28363e-07,1.96842e-08,0.885883,0.000503887,-1.6931e-07,6.76342e-10,0.886387,0.000503551,-1.67281e-07,-2.23896e-08,0.88689,0.000503149,-2.3445e-07,2.92774e-08,0.887393,0.000502768,-1.46618e-07,-3.51152e-08,0.887896,0.00050237,-2.51963e-07,5.15787e-08,0.888398,0.00050202,-9.72271e-08,-5.19903e-08,0.8889,0.00050167,-2.53198e-07,3.71732e-08,0.889401,0.000501275,-1.41678e-07,-3.70978e-08,0.889902,0.00050088,-2.52972e-07,5.16132e-08,0.890403,0.000500529,-9.81321e-08,-5.01459e-08,0.890903,0.000500183,-2.4857e-07,2.9761e-08,0.891403,0.000499775,-1.59287e-07,-9.29351e-09,0.891903,0.000499428,-1.87167e-07,7.41301e-09,0.892402,0.000499076,-1.64928e-07,-2.03585e-08,0.892901,0.000498685,-2.26004e-07,1.44165e-08,0.893399,0.000498276,-1.82754e-07,2.22974e-08,0.893898,0.000497978,-1.15862e-07,-4.40013e-08,0.894395,0.000497614,-2.47866e-07,3.44985e-08,0.894893,0.000497222,-1.44371e-07,-3.43882e-08,0.89539,0.00049683,-2.47535e-07,4.34497e-08,0.895886,0.000496465,-1.17186e-07,-2.02012e-08,0.896383,0.00049617,-1.7779e-07,-2.22497e-08,0.896879,0.000495748,-2.44539e-07,4.95952e-08,0.897374,0.000495408,-9.57532e-08,-5.69217e-08,0.89787,0.000495045,-2.66518e-07,5.88823e-08,0.898364,0.000494689,-8.98713e-08,-5.93983e-08,0.898859,0.000494331,-2.68066e-07,5.95017e-08,0.899353,0.000493973,-8.95613e-08,-5.9399e-08,0.899847,0.000493616,-2.67758e-07,5.8885e-08,0.90034,0.000493257,-9.11033e-08,-5.69317e-08,0.900833,0.000492904,-2.61898e-07,4.96326e-08,0.901326,0.000492529,-1.13001e-07,-2.23893e-08,0.901819,0.000492236,-1.80169e-07,-1.968e-08,0.902311,0.000491817,-2.39209e-07,4.15047e-08,0.902802,0.000491463,-1.14694e-07,-2.71296e-08,0.903293,0.000491152,-1.96083e-07,7.409e-09,0.903784,0.000490782,-1.73856e-07,-2.50645e-09,0.904275,0.000490427,-1.81376e-07,2.61679e-09,0.904765,0.000490072,-1.73525e-07,-7.96072e-09,0.905255,0.000489701,-1.97407e-07,2.92261e-08,0.905745,0.000489394,-1.09729e-07,-4.93389e-08,0.906234,0.000489027,-2.57746e-07,4.89204e-08,0.906723,0.000488658,-1.10985e-07,-2.71333e-08,0.907211,0.000488354,-1.92385e-07,8.30861e-12,0.907699,0.00048797,-1.9236e-07,2.71001e-08,0.908187,0.000487666,-1.1106e-07,-4.88041e-08,0.908675,0.000487298,-2.57472e-07,4.89069e-08,0.909162,0.000486929,-1.10751e-07,-2.76143e-08,0.909649,0.000486625,-1.93594e-07,1.9457e-09,0.910135,0.000486244,-1.87757e-07,1.98315e-08,0.910621,0.000485928,-1.28262e-07,-2.16671e-08,0.911107,0.000485606,-1.93264e-07,7.23216e-09,0.911592,0.000485241,-1.71567e-07,-7.26152e-09,0.912077,0.000484877,-1.93352e-07,2.18139e-08,0.912562,0.000484555,-1.2791e-07,-2.03895e-08,0.913047,0.000484238,-1.89078e-07,1.39494e-10,0.913531,0.000483861,-1.8866e-07,1.98315e-08,0.914014,0.000483543,-1.29165e-07,-1.98609e-08,0.914498,0.000483225,-1.88748e-07,7.39912e-12,0.914981,0.000482847,-1.88726e-07,1.98313e-08,0.915463,0.000482529,-1.29232e-07,-1.9728e-08,0.915946,0.000482212,-1.88416e-07,-5.24035e-10,0.916428,0.000481833,-1.89988e-07,2.18241e-08,0.916909,0.000481519,-1.24516e-07,-2.71679e-08,0.917391,0.000481188,-2.06019e-07,2.72427e-08,0.917872,0.000480858,-1.24291e-07,-2.21985e-08,0.918353,0.000480543,-1.90886e-07,1.94644e-09,0.918833,0.000480167,-1.85047e-07,1.44127e-08,0.919313,0.00047984,-1.41809e-07,7.39438e-12,0.919793,0.000479556,-1.41787e-07,-1.44423e-08,0.920272,0.000479229,-1.85114e-07,-1.84291e-09,0.920751,0.000478854,-1.90642e-07,2.18139e-08,0.92123,0.000478538,-1.25201e-07,-2.58081e-08,0.921708,0.00047821,-2.02625e-07,2.18139e-08,0.922186,0.00047787,-1.37183e-07,-1.84291e-09,0.922664,0.00047759,-1.42712e-07,-1.44423e-08,0.923141,0.000477262,-1.86039e-07,7.34701e-12,0.923618,0.00047689,-1.86017e-07,1.44129e-08,0.924095,0.000476561,-1.42778e-07,1.94572e-09,0.924572,0.000476281,-1.36941e-07,-2.21958e-08,0.925048,0.000475941,-2.03528e-07,2.72327e-08,0.925523,0.000475615,-1.2183e-07,-2.71304e-08,0.925999,0.00047529,-2.03221e-07,2.16843e-08,0.926474,0.000474949,-1.38168e-07,-2.16005e-12,0.926949,0.000474672,-1.38175e-07,-2.16756e-08,0.927423,0.000474331,-2.03202e-07,2.71001e-08,0.927897,0.000474006,-1.21902e-07,-2.71201e-08,0.928371,0.000473681,-2.03262e-07,2.17757e-08,0.928845,0.00047334,-1.37935e-07,-3.78028e-10,0.929318,0.000473063,-1.39069e-07,-2.02636e-08,0.929791,0.000472724,-1.9986e-07,2.18276e-08,0.930263,0.000472389,-1.34377e-07,-7.44231e-09,0.930736,0.000472098,-1.56704e-07,7.94165e-09,0.931208,0.000471809,-1.32879e-07,-2.43243e-08,0.931679,0.00047147,-2.05851e-07,2.97508e-08,0.932151,0.000471148,-1.16599e-07,-3.50742e-08,0.932622,0.000470809,-2.21822e-07,5.09414e-08,0.933092,0.000470518,-6.89976e-08,-4.94821e-08,0.933563,0.000470232,-2.17444e-07,2.77775e-08,0.934033,0.00046988,-1.34111e-07,-2.02351e-09,0.934502,0.000469606,-1.40182e-07,-1.96835e-08,0.934972,0.000469267,-1.99232e-07,2.11529e-08,0.935441,0.000468932,-1.35774e-07,-5.32332e-09,0.93591,0.000468644,-1.51743e-07,1.40413e-10,0.936378,0.000468341,-1.51322e-07,4.76166e-09,0.936846,0.000468053,-1.37037e-07,-1.9187e-08,0.937314,0.000467721,-1.94598e-07,1.23819e-08,0.937782,0.000467369,-1.57453e-07,2.92642e-08,0.938249,0.000467142,-6.96601e-08,-6.98342e-08,0.938716,0.000466793,-2.79163e-07,7.12586e-08,0.939183,0.000466449,-6.53869e-08,-3.63863e-08,0.939649,0.000466209,-1.74546e-07,1.46818e-08,0.940115,0.000465904,-1.305e-07,-2.2341e-08,0.940581,0.000465576,-1.97523e-07,1.50774e-08,0.941046,0.000465226,-1.52291e-07,2.16359e-08,0.941511,0.000464986,-8.73832e-08,-4.20162e-08,0.941976,0.000464685,-2.13432e-07,2.72198e-08,0.942441,0.00046434,-1.31773e-07,-7.2581e-09,0.942905,0.000464055,-1.53547e-07,1.81263e-09,0.943369,0.000463753,-1.48109e-07,7.58386e-12,0.943832,0.000463457,-1.48086e-07,-1.84298e-09,0.944296,0.000463155,-1.53615e-07,7.36433e-09,0.944759,0.00046287,-1.31522e-07,-2.76143e-08,0.945221,0.000462524,-2.14365e-07,4.34883e-08,0.945684,0.000462226,-8.39003e-08,-2.71297e-08,0.946146,0.000461977,-1.65289e-07,5.42595e-09,0.946608,0.000461662,-1.49012e-07,5.42593e-09,0.947069,0.000461381,-1.32734e-07,-2.71297e-08,0.94753,0.000461034,-2.14123e-07,4.34881e-08,0.947991,0.000460736,-8.36585e-08,-2.76134e-08,0.948452,0.000460486,-1.66499e-07,7.36083e-09,0.948912,0.000460175,-1.44416e-07,-1.82993e-09,0.949372,0.000459881,-1.49906e-07,-4.11073e-11,0.949832,0.000459581,-1.50029e-07,1.99434e-09,0.950291,0.000459287,-1.44046e-07,-7.93627e-09,0.950751,0.000458975,-1.67855e-07,2.97507e-08,0.951209,0.000458728,-7.86029e-08,-5.1462e-08,0.951668,0.000458417,-2.32989e-07,5.6888e-08,0.952126,0.000458121,-6.2325e-08,-5.68806e-08,0.952584,0.000457826,-2.32967e-07,5.14251e-08,0.953042,0.000457514,-7.86914e-08,-2.96107e-08,0.953499,0.000457268,-1.67523e-07,7.41296e-09,0.953956,0.000456955,-1.45285e-07,-4.11262e-11,0.954413,0.000456665,-1.45408e-07,-7.24847e-09,0.95487,0.000456352,-1.67153e-07,2.9035e-08,0.955326,0.000456105,-8.00484e-08,-4.92869e-08,0.955782,0.000455797,-2.27909e-07,4.89032e-08,0.956238,0.000455488,-8.11994e-08,-2.71166e-08,0.956693,0.000455244,-1.62549e-07,-4.13678e-11,0.957148,0.000454919,-1.62673e-07,2.72821e-08,0.957603,0.000454675,-8.0827e-08,-4.94824e-08,0.958057,0.000454365,-2.29274e-07,5.14382e-08,0.958512,0.000454061,-7.49597e-08,-3.7061e-08,0.958965,0.0004538,-1.86143e-07,3.72013e-08,0.959419,0.000453539,-7.45389e-08,-5.21396e-08,0.959873,0.000453234,-2.30958e-07,5.21476e-08,0.960326,0.000452928,-7.45146e-08,-3.72416e-08,0.960778,0.000452667,-1.8624e-07,3.72143e-08,0.961231,0.000452407,-7.45967e-08,-5.20109e-08,0.961683,0.000452101,-2.30629e-07,5.16199e-08,0.962135,0.000451795,-7.57696e-08,-3.52595e-08,0.962587,0.000451538,-1.81548e-07,2.98133e-08,0.963038,0.000451264,-9.2108e-08,-2.43892e-08,0.963489,0.000451007,-1.65276e-07,8.13892e-09,0.96394,0.000450701,-1.40859e-07,-8.16647e-09,0.964391,0.000450394,-1.65358e-07,2.45269e-08,0.964841,0.000450137,-9.17775e-08,-3.03367e-08,0.965291,0.000449863,-1.82787e-07,3.7215e-08,0.965741,0.000449609,-7.11424e-08,-5.89188e-08,0.96619,0.00044929,-2.47899e-07,7.92509e-08,0.966639,0.000449032,-1.01462e-08,-7.92707e-08,0.967088,0.000448773,-2.47958e-07,5.90181e-08,0.967537,0.000448455,-7.0904e-08,-3.75925e-08,0.967985,0.0004482,-1.83681e-07,3.17471e-08,0.968433,0.000447928,-8.84401e-08,-2.97913e-08,0.968881,0.000447662,-1.77814e-07,2.78133e-08,0.969329,0.000447389,-9.4374e-08,-2.18572e-08,0.969776,0.000447135,-1.59946e-07,1.10134e-11,0.970223,0.000446815,-1.59913e-07,2.18132e-08,0.97067,0.000446561,-9.44732e-08,-2.76591e-08,0.971116,0.000446289,-1.7745e-07,2.92185e-08,0.971562,0.000446022,-8.97948e-08,-2.96104e-08,0.972008,0.000445753,-1.78626e-07,2.96185e-08,0.972454,0.000445485,-8.97706e-08,-2.92588e-08,0.972899,0.000445218,-1.77547e-07,2.78123e-08,0.973344,0.000444946,-9.41103e-08,-2.23856e-08,0.973789,0.000444691,-1.61267e-07,2.12559e-09,0.974233,0.000444374,-1.5489e-07,1.38833e-08,0.974678,0.000444106,-1.13241e-07,1.94591e-09,0.975122,0.000443886,-1.07403e-07,-2.16669e-08,0.975565,0.000443606,-1.72404e-07,2.5117e-08,0.976009,0.000443336,-9.70526e-08,-1.91963e-08,0.976452,0.000443085,-1.54642e-07,-7.93627e-09,0.976895,0.000442752,-1.7845e-07,5.09414e-08,0.977338,0.000442548,-2.56262e-08,-7.66201e-08,0.97778,0.000442266,-2.55486e-07,7.67249e-08,0.978222,0.000441986,-2.53118e-08,-5.14655e-08,0.978664,0.000441781,-1.79708e-07,9.92773e-09,0.979106,0.000441451,-1.49925e-07,1.17546e-08,0.979547,0.000441186,-1.14661e-07,2.65868e-09,0.979988,0.000440965,-1.06685e-07,-2.23893e-08,0.980429,0.000440684,-1.73853e-07,2.72939e-08,0.980869,0.000440419,-9.19716e-08,-2.71816e-08,0.98131,0.000440153,-1.73516e-07,2.18278e-08,0.98175,0.000439872,-1.08033e-07,-5.24833e-10,0.982189,0.000439654,-1.09607e-07,-1.97284e-08,0.982629,0.000439376,-1.68793e-07,1.98339e-08,0.983068,0.000439097,-1.09291e-07,-2.62901e-12,0.983507,0.000438879,-1.09299e-07,-1.98234e-08,0.983946,0.000438601,-1.68769e-07,1.96916e-08,0.984384,0.000438322,-1.09694e-07,6.6157e-10,0.984823,0.000438105,-1.0771e-07,-2.23379e-08,0.985261,0.000437823,-1.74723e-07,2.90855e-08,0.985698,0.00043756,-8.74669e-08,-3.43992e-08,0.986136,0.000437282,-1.90665e-07,4.89068e-08,0.986573,0.000437048,-4.39442e-08,-4.20188e-08,0.98701,0.000436834,-1.7e-07,-4.11073e-11,0.987446,0.000436494,-1.70124e-07,4.21832e-08,0.987883,0.00043628,-4.35742e-08,-4.94824e-08,0.988319,0.000436044,-1.92021e-07,3.6537e-08,0.988755,0.00043577,-8.24102e-08,-3.70611e-08,0.989191,0.000435494,-1.93593e-07,5.21026e-08,0.989626,0.000435263,-3.72855e-08,-5.21402e-08,0.990061,0.000435032,-1.93706e-07,3.7249e-08,0.990496,0.000434756,-8.19592e-08,-3.72512e-08,0.990931,0.000434481,-1.93713e-07,5.21511e-08,0.991365,0.00043425,-3.72595e-08,-5.21439e-08,0.991799,0.000434019,-1.93691e-07,3.72152e-08,0.992233,0.000433743,-8.20456e-08,-3.71123e-08,0.992667,0.000433468,-1.93382e-07,5.16292e-08,0.9931,0.000433236,-3.84947e-08,-5.01953e-08,0.993533,0.000433008,-1.89081e-07,2.99427e-08,0.993966,0.00043272,-9.92525e-08,-9.9708e-09,0.994399,0.000432491,-1.29165e-07,9.94051e-09,0.994831,0.000432263,-9.93434e-08,-2.97912e-08,0.995263,0.000431975,-1.88717e-07,4.96198e-08,0.995695,0.000431746,-3.98578e-08,-4.94785e-08,0.996127,0.000431518,-1.88293e-07,2.9085e-08,0.996558,0.000431229,-1.01038e-07,-7.25675e-09,0.996989,0.000431005,-1.22809e-07,-5.79945e-11,0.99742,0.000430759,-1.22983e-07,7.48873e-09,0.997851,0.000430536,-1.00516e-07,-2.98969e-08,0.998281,0.000430245,-1.90207e-07,5.24942e-08,0.998711,0.000430022,-3.27246e-08,-6.08706e-08,0.999141,0.000429774,-2.15336e-07,7.17788e-08,0.999571,0.000429392,0.,0.};
+
+        template <bool srgb, int blueIdx, typename T, typename D>
+        __device__ __forceinline__ void Lab2RGBConvert_f(const T& src, D& dst)
+        {
+            const float lThresh = 0.008856f * 903.3f;
+            const float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f;
+
+            float Y, fy;
+
+            if (src.x <= lThresh)
+            {
+                Y = src.x / 903.3f;
+                fy = 7.787f * Y + 16.0f / 116.0f;
+            }
+            else
+            {
+                fy = (src.x + 16.0f) / 116.0f;
+                Y = fy * fy * fy;
+            }
+
+            float X = src.y / 500.0f + fy;
+            float Z = fy - src.z / 200.0f;
+
+            if (X <= fThresh)
+                X = (X - 16.0f / 116.0f) / 7.787f;
+            else
+                X = X * X * X;
+
+            if (Z <= fThresh)
+                Z = (Z - 16.0f / 116.0f) / 7.787f;
+            else
+                Z = Z * Z * Z;
+
+            float B = 0.052891f * X - 0.204043f * Y + 1.151152f * Z;
+            float G = -0.921235f * X + 1.875991f * Y + 0.045244f * Z;
+            float R = 3.079933f * X - 1.537150f * Y - 0.542782f * Z;
+
+            if (srgb)
+            {
+                B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE);
+                G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE);
+                R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE);
+            }
+
+            dst.x = blueIdx == 0 ? B : R;
+            dst.y = G;
+            dst.z = blueIdx == 0 ? R : B;
+            setAlpha(dst, ColorChannel<float>::max());
+        }
+
+        template <bool srgb, int blueIdx, typename T, typename D>
+        __device__ __forceinline__ void Lab2RGBConvert_b(const T& src, D& dst)
+        {
+            float3 srcf, dstf;
+
+            srcf.x = src.x * (100.f / 255.f);
+            srcf.y = src.y - 128;
+            srcf.z = src.z - 128;
+
+            Lab2RGBConvert_f<srgb, blueIdx>(srcf, dstf);
+
+            dst.x = saturate_cast<uchar>(dstf.x * 255.f);
+            dst.y = saturate_cast<uchar>(dstf.y * 255.f);
+            dst.z = saturate_cast<uchar>(dstf.z * 255.f);
+            setAlpha(dst, ColorChannel<uchar>::max());
+        }
+
+        template <typename T, int scn, int dcn, bool srgb, int blueIdx> struct Lab2RGB;
+        template <int scn, int dcn, bool srgb, int blueIdx>
+        struct Lab2RGB<uchar, scn, dcn, srgb, blueIdx>
+            : unary_function<typename TypeVec<uchar, scn>::vec_type, typename TypeVec<uchar, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<uchar, dcn>::vec_type operator ()(const typename TypeVec<uchar, scn>::vec_type& src) const
+            {
+                typename TypeVec<uchar, dcn>::vec_type dst;
+
+                Lab2RGBConvert_b<srgb, blueIdx>(src, dst);
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ Lab2RGB() {}
+            __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {}
+        };
+        template <int scn, int dcn, bool srgb, int blueIdx>
+        struct Lab2RGB<float, scn, dcn, srgb, blueIdx>
+            : unary_function<typename TypeVec<float, scn>::vec_type, typename TypeVec<float, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<float, dcn>::vec_type operator ()(const typename TypeVec<float, scn>::vec_type& src) const
+            {
+                typename TypeVec<float, dcn>::vec_type dst;
+
+                Lab2RGBConvert_f<srgb, blueIdx>(src, dst);
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ Lab2RGB() {}
+            __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(name, scn, dcn, srgb, blueIdx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::Lab2RGB<T, scn, dcn, srgb, blueIdx> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+///////////////////////////////////// RGB <-> Luv /////////////////////////////////////
+
+    namespace color_detail
+    {
+        __constant__ float c_LabCbrtTab[] = {0.137931,0.0114066,0.,1.18859e-07,0.149338,0.011407,3.56578e-07,-5.79396e-07,0.160745,0.0114059,-1.38161e-06,2.16892e-06,0.172151,0.0114097,5.12516e-06,-8.0814e-06,0.183558,0.0113957,-1.9119e-05,3.01567e-05,0.194965,0.0114479,7.13509e-05,-0.000112545,0.206371,0.011253,-0.000266285,-0.000106493,0.217252,0.0104009,-0.000585765,7.32149e-05,0.22714,0.00944906,-0.00036612,1.21917e-05,0.236235,0.0087534,-0.000329545,2.01753e-05,0.244679,0.00815483,-0.000269019,1.24435e-05,0.252577,0.00765412,-0.000231689,1.05618e-05,0.26001,0.00722243,-0.000200003,8.26662e-06,0.267041,0.00684723,-0.000175203,6.76746e-06,0.27372,0.00651712,-0.000154901,5.61192e-06,0.280088,0.00622416,-0.000138065,4.67009e-06,0.286179,0.00596204,-0.000124055,3.99012e-06,0.292021,0.0057259,-0.000112085,3.36032e-06,0.297638,0.00551181,-0.000102004,2.95338e-06,0.30305,0.00531666,-9.31435e-05,2.52875e-06,0.308277,0.00513796,-8.55572e-05,2.22022e-06,0.313331,0.00497351,-7.88966e-05,1.97163e-06,0.318228,0.00482163,-7.29817e-05,1.7248e-06,0.322978,0.00468084,-6.78073e-05,1.55998e-06,0.327593,0.0045499,-6.31274e-05,1.36343e-06,0.332081,0.00442774,-5.90371e-05,1.27136e-06,0.336451,0.00431348,-5.5223e-05,1.09111e-06,0.34071,0.00420631,-5.19496e-05,1.0399e-06,0.344866,0.00410553,-4.88299e-05,9.18347e-07,0.348923,0.00401062,-4.60749e-05,8.29942e-07,0.352889,0.00392096,-4.35851e-05,7.98478e-07,0.356767,0.00383619,-4.11896e-05,6.84917e-07,0.360562,0.00375586,-3.91349e-05,6.63976e-07,0.36428,0.00367959,-3.7143e-05,5.93086e-07,0.367923,0.00360708,-3.53637e-05,5.6976e-07,0.371495,0.00353806,-3.36544e-05,4.95533e-07,0.375,0.00347224,-3.21678e-05,4.87951e-07,0.378441,0.00340937,-3.0704e-05,4.4349e-07,0.38182,0.00334929,-2.93735e-05,4.20297e-07,0.38514,0.0032918,-2.81126e-05,3.7872e-07,0.388404,0.00323671,-2.69764e-05,3.596e-07,0.391614,0.00318384,-2.58976e-05,3.5845e-07,0.394772,0.00313312,-2.48223e-05,2.92765e-07,0.397881,0.00308435,-2.3944e-05,3.18232e-07,0.400942,0.00303742,-2.29893e-05,2.82046e-07,0.403957,0.00299229,-2.21432e-05,2.52315e-07,0.406927,0.00294876,-2.13862e-05,2.58416e-07,0.409855,0.00290676,-2.0611e-05,2.33939e-07,0.412741,0.00286624,-1.99092e-05,2.36342e-07,0.415587,0.00282713,-1.92001e-05,1.916e-07,0.418396,0.00278931,-1.86253e-05,2.1915e-07,0.421167,0.00275271,-1.79679e-05,1.83498e-07,0.423901,0.00271733,-1.74174e-05,1.79343e-07,0.426602,0.00268303,-1.68794e-05,1.72013e-07,0.429268,0.00264979,-1.63633e-05,1.75686e-07,0.431901,0.00261759,-1.58363e-05,1.3852e-07,0.434503,0.00258633,-1.54207e-05,1.64304e-07,0.437074,0.00255598,-1.49278e-05,1.28136e-07,0.439616,0.00252651,-1.45434e-05,1.57618e-07,0.442128,0.0024979,-1.40705e-05,1.0566e-07,0.444612,0.00247007,-1.37535e-05,1.34998e-07,0.447068,0.00244297,-1.33485e-05,1.29207e-07,0.449498,0.00241666,-1.29609e-05,9.32347e-08,0.451902,0.00239102,-1.26812e-05,1.23703e-07,0.45428,0.00236603,-1.23101e-05,9.74072e-08,0.456634,0.0023417,-1.20179e-05,1.12518e-07,0.458964,0.002318,-1.16803e-05,7.83681e-08,0.46127,0.00229488,-1.14452e-05,1.10452e-07,0.463554,0.00227232,-1.11139e-05,7.58719e-08,0.465815,0.00225032,-1.08863e-05,9.2699e-08,0.468055,0.00222882,-1.06082e-05,8.97738e-08,0.470273,0.00220788,-1.03388e-05,5.4845e-08,0.47247,0.00218736,-1.01743e-05,1.0808e-07,0.474648,0.00216734,-9.85007e-06,4.9277e-08,0.476805,0.00214779,-9.70224e-06,8.22408e-08,0.478943,0.00212863,-9.45551e-06,6.87942e-08,0.481063,0.00210993,-9.24913e-06,5.98144e-08,0.483163,0.00209161,-9.06969e-06,7.93789e-08,0.485246,0.00207371,-8.83155e-06,3.99032e-08,0.487311,0.00205616,-8.71184e-06,8.88325e-08,0.489358,0.002039,-8.44534e-06,2.20004e-08,0.491389,0.00202218,-8.37934e-06,9.13872e-08,0.493403,0.0020057,-8.10518e-06,2.96829e-08,0.495401,0.00198957,-8.01613e-06,5.81028e-08,0.497382,0.00197372,-7.84183e-06,6.5731e-08,0.499348,0.00195823,-7.64463e-06,3.66019e-08,0.501299,0.00194305,-7.53483e-06,2.62811e-08,0.503234,0.00192806,-7.45598e-06,9.66907e-08,0.505155,0.00191344,-7.16591e-06,4.18928e-09,0.507061,0.00189912,-7.15334e-06,6.53665e-08,0.508953,0.00188501,-6.95724e-06,3.23686e-08,0.510831,0.00187119,-6.86014e-06,4.35774e-08,0.512696,0.0018576,-6.72941e-06,3.17406e-08,0.514547,0.00184424,-6.63418e-06,6.78785e-08,0.516384,0.00183117,-6.43055e-06,-5.23126e-09,0.518209,0.0018183,-6.44624e-06,7.22562e-08,0.520021,0.00180562,-6.22947e-06,1.42292e-08,0.52182,0.0017932,-6.18679e-06,4.9641e-08,0.523607,0.00178098,-6.03786e-06,2.56259e-08,0.525382,0.00176898,-5.96099e-06,2.66696e-08,0.527145,0.00175714,-5.88098e-06,4.65094e-08,0.528897,0.00174552,-5.74145e-06,2.57114e-08,0.530637,0.00173411,-5.66431e-06,2.94588e-08,0.532365,0.00172287,-5.57594e-06,3.52667e-08,0.534082,0.00171182,-5.47014e-06,8.28868e-09,0.535789,0.00170091,-5.44527e-06,5.07871e-08,0.537484,0.00169017,-5.29291e-06,2.69817e-08,0.539169,0.00167967,-5.21197e-06,2.01009e-08,0.540844,0.0016693,-5.15166e-06,1.18237e-08,0.542508,0.00165903,-5.11619e-06,5.18135e-08,0.544162,0.00164896,-4.96075e-06,1.9341e-08,0.545806,0.00163909,-4.90273e-06,-9.96867e-09,0.54744,0.00162926,-4.93263e-06,8.01382e-08,0.549064,0.00161963,-4.69222e-06,-1.25601e-08,0.550679,0.00161021,-4.7299e-06,2.97067e-08,0.552285,0.00160084,-4.64078e-06,1.29426e-08,0.553881,0.0015916,-4.60195e-06,3.77327e-08,0.555468,0.00158251,-4.48875e-06,1.49412e-08,0.557046,0.00157357,-4.44393e-06,2.17118e-08,0.558615,0.00156475,-4.3788e-06,1.74206e-08,0.560176,0.00155605,-4.32653e-06,2.78152e-08,0.561727,0.00154748,-4.24309e-06,-9.47239e-09,0.563271,0.00153896,-4.27151e-06,6.9679e-08,0.564805,0.00153063,-4.06247e-06,-3.08246e-08,0.566332,0.00152241,-4.15494e-06,5.36188e-08,0.56785,0.00151426,-3.99409e-06,-4.83594e-09,0.56936,0.00150626,-4.00859e-06,2.53293e-08,0.570863,0.00149832,-3.93261e-06,2.27286e-08,0.572357,0.00149052,-3.86442e-06,2.96541e-09,0.573844,0.0014828,-3.85552e-06,2.50147e-08,0.575323,0.00147516,-3.78048e-06,1.61842e-08,0.576794,0.00146765,-3.73193e-06,2.94582e-08,0.578258,0.00146028,-3.64355e-06,-1.48076e-08,0.579715,0.00145295,-3.68798e-06,2.97724e-08,0.581164,0.00144566,-3.59866e-06,1.49272e-08,0.582606,0.00143851,-3.55388e-06,2.97285e-08,0.584041,0.00143149,-3.46469e-06,-1.46323e-08,0.585469,0.00142451,-3.50859e-06,2.88004e-08,0.58689,0.00141758,-3.42219e-06,1.864e-08,0.588304,0.00141079,-3.36627e-06,1.58482e-08,0.589712,0.00140411,-3.31872e-06,-2.24279e-08,0.591112,0.00139741,-3.38601e-06,7.38639e-08,0.592507,0.00139085,-3.16441e-06,-3.46088e-08,0.593894,0.00138442,-3.26824e-06,4.96675e-09,0.595275,0.0013779,-3.25334e-06,7.4346e-08,0.59665,0.00137162,-3.0303e-06,-6.39319e-08,0.598019,0.00136536,-3.2221e-06,6.21725e-08,0.599381,0.00135911,-3.03558e-06,-5.94423e-09,0.600737,0.00135302,-3.05341e-06,2.12091e-08,0.602087,0.00134697,-2.98979e-06,-1.92876e-08,0.603431,0.00134094,-3.04765e-06,5.5941e-08,0.604769,0.00133501,-2.87983e-06,-2.56622e-08,0.606101,0.00132917,-2.95681e-06,4.67078e-08,0.607427,0.0013234,-2.81669e-06,-4.19592e-08,0.608748,0.00131764,-2.94257e-06,6.15243e-08,0.610062,0.00131194,-2.75799e-06,-2.53244e-08,0.611372,0.00130635,-2.83397e-06,3.97739e-08,0.612675,0.0013008,-2.71465e-06,-1.45618e-08,0.613973,0.00129533,-2.75833e-06,1.84733e-08,0.615266,0.00128986,-2.70291e-06,2.73606e-10,0.616553,0.00128446,-2.70209e-06,4.00367e-08,0.617835,0.00127918,-2.58198e-06,-4.12113e-08,0.619111,0.00127389,-2.70561e-06,6.52039e-08,0.620383,0.00126867,-2.51e-06,-4.07901e-08,0.621649,0.00126353,-2.63237e-06,3.83516e-08,0.62291,0.00125838,-2.51732e-06,6.59315e-09,0.624166,0.00125337,-2.49754e-06,-5.11939e-09,0.625416,0.00124836,-2.5129e-06,1.38846e-08,0.626662,0.00124337,-2.47124e-06,9.18514e-09,0.627903,0.00123846,-2.44369e-06,8.97952e-09,0.629139,0.0012336,-2.41675e-06,1.45012e-08,0.63037,0.00122881,-2.37325e-06,-7.37949e-09,0.631597,0.00122404,-2.39538e-06,1.50169e-08,0.632818,0.00121929,-2.35033e-06,6.91648e-09,0.634035,0.00121461,-2.32958e-06,1.69219e-08,0.635248,0.00121,-2.27882e-06,-1.49997e-08,0.636455,0.0012054,-2.32382e-06,4.30769e-08,0.637659,0.00120088,-2.19459e-06,-3.80986e-08,0.638857,0.00119638,-2.30888e-06,4.97134e-08,0.640051,0.00119191,-2.15974e-06,-4.15463e-08,0.641241,0.00118747,-2.28438e-06,5.68667e-08,0.642426,0.00118307,-2.11378e-06,-7.10641e-09,0.643607,0.00117882,-2.1351e-06,-2.8441e-08,0.644784,0.00117446,-2.22042e-06,6.12658e-08,0.645956,0.00117021,-2.03663e-06,-3.78083e-08,0.647124,0.00116602,-2.15005e-06,3.03627e-08,0.648288,0.00116181,-2.05896e-06,-2.40379e-08,0.649448,0.00115762,-2.13108e-06,6.57887e-08,0.650603,0.00115356,-1.93371e-06,-6.03028e-08,0.651755,0.00114951,-2.11462e-06,5.62134e-08,0.652902,0.00114545,-1.94598e-06,-4.53417e-08,0.654046,0.00114142,-2.082e-06,6.55489e-08,0.655185,0.00113745,-1.88536e-06,-3.80396e-08,0.656321,0.00113357,-1.99948e-06,2.70049e-08,0.657452,0.00112965,-1.91846e-06,-1.03755e-08,0.65858,0.00112578,-1.94959e-06,1.44973e-08,0.659704,0.00112192,-1.9061e-06,1.1991e-08,0.660824,0.00111815,-1.87012e-06,-2.85634e-09,0.66194,0.0011144,-1.87869e-06,-5.65782e-10,0.663053,0.00111064,-1.88039e-06,5.11947e-09,0.664162,0.0011069,-1.86503e-06,3.96924e-08,0.665267,0.00110328,-1.74595e-06,-4.46795e-08,0.666368,0.00109966,-1.87999e-06,1.98161e-08,0.667466,0.00109596,-1.82054e-06,2.502e-08,0.66856,0.00109239,-1.74548e-06,-6.86593e-10,0.669651,0.0010889,-1.74754e-06,-2.22739e-08,0.670738,0.00108534,-1.81437e-06,3.01776e-08,0.671821,0.0010818,-1.72383e-06,2.07732e-08,0.672902,0.00107841,-1.66151e-06,-5.36658e-08,0.673978,0.00107493,-1.82251e-06,7.46802e-08,0.675051,0.00107151,-1.59847e-06,-6.62411e-08,0.676121,0.00106811,-1.79719e-06,7.10748e-08,0.677188,0.00106473,-1.58397e-06,-3.92441e-08,0.678251,0.00106145,-1.7017e-06,2.62973e-08,0.679311,0.00105812,-1.62281e-06,-6.34035e-09,0.680367,0.00105486,-1.64183e-06,-9.36249e-10,0.68142,0.00105157,-1.64464e-06,1.00854e-08,0.68247,0.00104831,-1.61438e-06,2.01995e-08,0.683517,0.00104514,-1.55378e-06,-3.1279e-08,0.68456,0.00104194,-1.64762e-06,4.53114e-08,0.685601,0.00103878,-1.51169e-06,-3.07573e-08,0.686638,0.00103567,-1.60396e-06,1.81133e-08,0.687672,0.00103251,-1.54962e-06,1.79085e-08,0.688703,0.00102947,-1.49589e-06,-3.01428e-08,0.689731,0.00102639,-1.58632e-06,4.30583e-08,0.690756,0.00102334,-1.45715e-06,-2.28814e-08,0.691778,0.00102036,-1.52579e-06,-1.11373e-08,0.692797,0.00101727,-1.5592e-06,6.74305e-08,0.693812,0.00101436,-1.35691e-06,-7.97709e-08,0.694825,0.0010114,-1.59622e-06,7.28391e-08,0.695835,0.00100843,-1.37771e-06,-3.27715e-08,0.696842,0.00100558,-1.47602e-06,-1.35807e-09,0.697846,0.00100262,-1.48009e-06,3.82037e-08,0.698847,0.000999775,-1.36548e-06,-3.22474e-08,0.699846,0.000996948,-1.46223e-06,3.11809e-08,0.700841,0.000994117,-1.36868e-06,-3.28714e-08,0.701834,0.000991281,-1.4673e-06,4.07001e-08,0.702824,0.000988468,-1.3452e-06,-1.07197e-08,0.703811,0.000985746,-1.37736e-06,2.17866e-09,0.704795,0.000982998,-1.37082e-06,2.00521e-09,0.705777,0.000980262,-1.3648e-06,-1.01996e-08,0.706756,0.000977502,-1.3954e-06,3.87931e-08,0.707732,0.000974827,-1.27902e-06,-2.57632e-08,0.708706,0.000972192,-1.35631e-06,4.65513e-09,0.709676,0.000969493,-1.34235e-06,7.14257e-09,0.710645,0.00096683,-1.32092e-06,2.63791e-08,0.71161,0.000964267,-1.24178e-06,-5.30543e-08,0.712573,0.000961625,-1.40095e-06,6.66289e-08,0.713533,0.000959023,-1.20106e-06,-3.46474e-08,0.714491,0.000956517,-1.305e-06,1.23559e-08,0.715446,0.000953944,-1.26793e-06,-1.47763e-08,0.716399,0.000951364,-1.31226e-06,4.67494e-08,0.717349,0.000948879,-1.17201e-06,-5.3012e-08,0.718297,0.000946376,-1.33105e-06,4.60894e-08,0.719242,0.000943852,-1.19278e-06,-1.21366e-08,0.720185,0.00094143,-1.22919e-06,2.45673e-09,0.721125,0.000938979,-1.22182e-06,2.30966e-09,0.722063,0.000936543,-1.21489e-06,-1.16954e-08,0.722998,0.000934078,-1.24998e-06,4.44718e-08,0.723931,0.000931711,-1.11656e-06,-4.69823e-08,0.724861,0.000929337,-1.25751e-06,2.4248e-08,0.725789,0.000926895,-1.18477e-06,9.5949e-09,0.726715,0.000924554,-1.15598e-06,-3.02286e-09,0.727638,0.000922233,-1.16505e-06,2.49649e-09,0.72856,0.00091991,-1.15756e-06,-6.96321e-09,0.729478,0.000917575,-1.17845e-06,2.53564e-08,0.730395,0.000915294,-1.10238e-06,-3.48578e-08,0.731309,0.000912984,-1.20695e-06,5.44704e-08,0.732221,0.000910734,-1.04354e-06,-6.38144e-08,0.73313,0.000908455,-1.23499e-06,8.15781e-08,0.734038,0.00090623,-9.90253e-07,-8.3684e-08,0.734943,0.000903999,-1.2413e-06,7.43441e-08,0.735846,0.000901739,-1.01827e-06,-3.48787e-08,0.736746,0.000899598,-1.12291e-06,5.56596e-09,0.737645,0.000897369,-1.10621e-06,1.26148e-08,0.738541,0.000895194,-1.06837e-06,3.57935e-09,0.739435,0.000893068,-1.05763e-06,-2.69322e-08,0.740327,0.000890872,-1.13842e-06,4.45448e-08,0.741217,0.000888729,-1.00479e-06,-3.20376e-08,0.742105,0.000886623,-1.1009e-06,2.40011e-08,0.74299,0.000884493,-1.0289e-06,-4.36209e-09,0.743874,0.000882422,-1.04199e-06,-6.55268e-09,0.744755,0.000880319,-1.06164e-06,3.05728e-08,0.745634,0.000878287,-9.69926e-07,-5.61338e-08,0.746512,0.000876179,-1.13833e-06,7.4753e-08,0.747387,0.000874127,-9.14068e-07,-6.40644e-08,0.74826,0.000872106,-1.10626e-06,6.22955e-08,0.749131,0.000870081,-9.19375e-07,-6.59083e-08,0.75,0.000868044,-1.1171e-06,8.21284e-08,0.750867,0.000866056,-8.70714e-07,-8.37915e-08,0.751732,0.000864064,-1.12209e-06,7.42237e-08,0.752595,0.000862042,-8.99418e-07,-3.42894e-08,0.753456,0.00086014,-1.00229e-06,3.32955e-09,0.754315,0.000858146,-9.92297e-07,2.09712e-08,0.755173,0.000856224,-9.29384e-07,-2.76096e-08,0.756028,0.000854282,-1.01221e-06,2.98627e-08,0.756881,0.000852348,-9.22625e-07,-3.22365e-08,0.757733,0.000850406,-1.01933e-06,3.94786e-08,0.758582,0.000848485,-9.00898e-07,-6.46833e-09,0.75943,0.000846664,-9.20303e-07,-1.36052e-08,0.760275,0.000844783,-9.61119e-07,1.28447e-09,0.761119,0.000842864,-9.57266e-07,8.4674e-09,0.761961,0.000840975,-9.31864e-07,2.44506e-08,0.762801,0.000839185,-8.58512e-07,-4.6665e-08,0.763639,0.000837328,-9.98507e-07,4.30001e-08,0.764476,0.00083546,-8.69507e-07,-6.12609e-09,0.76531,0.000833703,-8.87885e-07,-1.84959e-08,0.766143,0.000831871,-9.43372e-07,2.05052e-08,0.766974,0.000830046,-8.81857e-07,-3.92026e-09,0.767803,0.000828271,-8.93618e-07,-4.82426e-09,0.768631,0.000826469,-9.0809e-07,2.32172e-08,0.769456,0.000824722,-8.38439e-07,-2.84401e-08,0.77028,0.00082296,-9.23759e-07,3.09386e-08,0.771102,0.000821205,-8.30943e-07,-3.57099e-08,0.771922,0.000819436,-9.38073e-07,5.22963e-08,0.772741,0.000817717,-7.81184e-07,-5.42658e-08,0.773558,0.000815992,-9.43981e-07,4.55579e-08,0.774373,0.000814241,-8.07308e-07,-8.75656e-09,0.775186,0.0008126,-8.33578e-07,-1.05315e-08,0.775998,0.000810901,-8.65172e-07,-8.72188e-09,0.776808,0.000809145,-8.91338e-07,4.54191e-08,0.777616,0.000807498,-7.5508e-07,-5.37454e-08,0.778423,0.000805827,-9.16317e-07,5.03532e-08,0.779228,0.000804145,-7.65257e-07,-2.84584e-08,0.780031,0.000802529,-8.50632e-07,3.87579e-09,0.780833,0.00080084,-8.39005e-07,1.29552e-08,0.781633,0.0007992,-8.00139e-07,3.90804e-09,0.782432,0.000797612,-7.88415e-07,-2.85874e-08,0.783228,0.000795949,-8.74177e-07,5.0837e-08,0.784023,0.000794353,-7.21666e-07,-5.55513e-08,0.784817,0.000792743,-8.8832e-07,5.21587e-08,0.785609,0.000791123,-7.31844e-07,-3.38744e-08,0.786399,0.000789558,-8.33467e-07,2.37342e-08,0.787188,0.000787962,-7.62264e-07,-1.45775e-09,0.787975,0.000786433,-7.66638e-07,-1.79034e-08,0.788761,0.000784846,-8.20348e-07,1.34665e-08,0.789545,0.000783246,-7.79948e-07,2.3642e-08,0.790327,0.000781757,-7.09022e-07,-4.84297e-08,0.791108,0.000780194,-8.54311e-07,5.08674e-08,0.791888,0.000778638,-7.01709e-07,-3.58303e-08,0.792666,0.000777127,-8.092e-07,3.28493e-08,0.793442,0.000775607,-7.10652e-07,-3.59624e-08,0.794217,0.000774078,-8.1854e-07,5.13959e-08,0.79499,0.000772595,-6.64352e-07,-5.04121e-08,0.795762,0.000771115,-8.15588e-07,3.10431e-08,0.796532,0.000769577,-7.22459e-07,-1.41557e-08,0.797301,0.00076809,-7.64926e-07,2.55795e-08,0.798069,0.000766636,-6.88187e-07,-2.85578e-08,0.798835,0.000765174,-7.73861e-07,2.90472e-08,0.799599,0.000763714,-6.86719e-07,-2.80262e-08,0.800362,0.000762256,-7.70798e-07,2.34531e-08,0.801123,0.000760785,-7.00438e-07,-6.18144e-09,0.801884,0.000759366,-7.18983e-07,1.27263e-09,0.802642,0.000757931,-7.15165e-07,1.09101e-09,0.803399,0.000756504,-7.11892e-07,-5.63675e-09,0.804155,0.000755064,-7.28802e-07,2.14559e-08,0.80491,0.00075367,-6.64434e-07,-2.05821e-08,0.805663,0.00075228,-7.26181e-07,1.26812e-09,0.806414,0.000750831,-7.22377e-07,1.55097e-08,0.807164,0.000749433,-6.75848e-07,-3.70216e-09,0.807913,0.00074807,-6.86954e-07,-7.0105e-10,0.80866,0.000746694,-6.89057e-07,6.5063e-09,0.809406,0.000745336,-6.69538e-07,-2.53242e-08,0.810151,0.000743921,-7.45511e-07,3.51858e-08,0.810894,0.000742535,-6.39953e-07,3.79034e-09,0.811636,0.000741267,-6.28582e-07,-5.03471e-08,0.812377,0.000739858,-7.79624e-07,7.83886e-08,0.813116,0.000738534,-5.44458e-07,-8.43935e-08,0.813854,0.000737192,-7.97638e-07,8.03714e-08,0.81459,0.000735838,-5.56524e-07,-5.82784e-08,0.815325,0.00073455,-7.31359e-07,3.35329e-08,0.816059,0.000733188,-6.3076e-07,-1.62486e-08,0.816792,0.000731878,-6.79506e-07,3.14614e-08,0.817523,0.000730613,-5.85122e-07,-4.99925e-08,0.818253,0.000729293,-7.35099e-07,4.92994e-08,0.818982,0.000727971,-5.87201e-07,-2.79959e-08,0.819709,0.000726712,-6.71189e-07,3.07959e-09,0.820435,0.000725379,-6.6195e-07,1.56777e-08,0.82116,0.000724102,-6.14917e-07,-6.18564e-09,0.821883,0.000722854,-6.33474e-07,9.06488e-09,0.822606,0.000721614,-6.06279e-07,-3.00739e-08,0.823327,0.000720311,-6.96501e-07,5.16262e-08,0.824046,0.000719073,-5.41623e-07,-5.72214e-08,0.824765,0.000717818,-7.13287e-07,5.80503e-08,0.825482,0.000716566,-5.39136e-07,-5.57703e-08,0.826198,0.00071532,-7.06447e-07,4.58215e-08,0.826912,0.000714045,-5.68983e-07,-8.30636e-09,0.827626,0.000712882,-5.93902e-07,-1.25961e-08,0.828338,0.000711656,-6.3169e-07,-9.13985e-10,0.829049,0.00071039,-6.34432e-07,1.62519e-08,0.829759,0.00070917,-5.85676e-07,-4.48904e-09,0.830468,0.000707985,-5.99143e-07,1.70418e-09,0.831175,0.000706792,-5.9403e-07,-2.32768e-09,0.831881,0.000705597,-6.01014e-07,7.60648e-09,0.832586,0.000704418,-5.78194e-07,-2.80982e-08,0.83329,0.000703177,-6.62489e-07,4.51817e-08,0.833993,0.000701988,-5.26944e-07,-3.34192e-08,0.834694,0.000700834,-6.27201e-07,2.88904e-08,0.835394,0.000699666,-5.4053e-07,-2.25378e-08,0.836093,0.000698517,-6.08143e-07,1.65589e-09,0.836791,0.000697306,-6.03176e-07,1.59142e-08,0.837488,0.000696147,-5.55433e-07,-5.70801e-09,0.838184,0.000695019,-5.72557e-07,6.91792e-09,0.838878,0.000693895,-5.51803e-07,-2.19637e-08,0.839571,0.000692725,-6.17694e-07,2.13321e-08,0.840263,0.000691554,-5.53698e-07,-3.75996e-09,0.840954,0.000690435,-5.64978e-07,-6.29219e-09,0.841644,0.000689287,-5.83855e-07,2.89287e-08,0.842333,0.000688206,-4.97068e-07,-4.98181e-08,0.843021,0.000687062,-6.46523e-07,5.11344e-08,0.843707,0.000685922,-4.9312e-07,-3.55102e-08,0.844393,0.00068483,-5.9965e-07,3.13019e-08,0.845077,0.000683724,-5.05745e-07,-3.00925e-08,0.84576,0.000682622,-5.96022e-07,2.94636e-08,0.846442,0.000681519,-5.07631e-07,-2.81572e-08,0.847123,0.000680419,-5.92103e-07,2.35606e-08,0.847803,0.000679306,-5.21421e-07,-6.48045e-09,0.848482,0.000678243,-5.40863e-07,2.36124e-09,0.849159,0.000677169,-5.33779e-07,-2.96461e-09,0.849836,0.000676092,-5.42673e-07,9.49728e-09,0.850512,0.000675035,-5.14181e-07,-3.50245e-08,0.851186,0.000673902,-6.19254e-07,7.09959e-08,0.851859,0.000672876,-4.06267e-07,-7.01453e-08,0.852532,0.000671853,-6.16703e-07,3.07714e-08,0.853203,0.000670712,-5.24388e-07,6.66423e-09,0.853873,0.000669684,-5.04396e-07,2.17629e-09,0.854542,0.000668681,-4.97867e-07,-1.53693e-08,0.855211,0.000667639,-5.43975e-07,-3.03752e-10,0.855878,0.000666551,-5.44886e-07,1.65844e-08,0.856544,0.000665511,-4.95133e-07,-6.42907e-09,0.857209,0.000664501,-5.1442e-07,9.13195e-09,0.857873,0.0006635,-4.87024e-07,-3.00987e-08,0.858536,0.000662435,-5.7732e-07,5.16584e-08,0.859198,0.000661436,-4.22345e-07,-5.73255e-08,0.859859,0.000660419,-5.94322e-07,5.84343e-08,0.860518,0.000659406,-4.19019e-07,-5.72022e-08,0.861177,0.000658396,-5.90626e-07,5.11653e-08,0.861835,0.000657368,-4.3713e-07,-2.82495e-08,0.862492,0.000656409,-5.21878e-07,2.22788e-09,0.863148,0.000655372,-5.15195e-07,1.9338e-08,0.863803,0.0006544,-4.5718e-07,-1.99754e-08,0.864457,0.000653425,-5.17107e-07,9.59024e-10,0.86511,0.000652394,-5.1423e-07,1.61393e-08,0.865762,0.000651414,-4.65812e-07,-5.91149e-09,0.866413,0.000650465,-4.83546e-07,7.50665e-09,0.867063,0.00064952,-4.61026e-07,-2.4115e-08,0.867712,0.000648526,-5.33371e-07,2.93486e-08,0.86836,0.000647547,-4.45325e-07,-3.36748e-08,0.869007,0.000646555,-5.4635e-07,4.57461e-08,0.869653,0.0006456,-4.09112e-07,-3.01002e-08,0.870298,0.000644691,-4.99412e-07,1.50501e-08,0.870942,0.000643738,-4.54262e-07,-3.01002e-08,0.871585,0.000642739,-5.44563e-07,4.57461e-08,0.872228,0.000641787,-4.07324e-07,-3.36748e-08,0.872869,0.000640871,-5.08349e-07,2.93486e-08,0.873509,0.000639943,-4.20303e-07,-2.4115e-08,0.874149,0.00063903,-4.92648e-07,7.50655e-09,0.874787,0.000638067,-4.70128e-07,-5.91126e-09,0.875425,0.000637109,-4.87862e-07,1.61385e-08,0.876062,0.000636182,-4.39447e-07,9.61961e-10,0.876697,0.000635306,-4.36561e-07,-1.99863e-08,0.877332,0.000634373,-4.9652e-07,1.93785e-08,0.877966,0.000633438,-4.38384e-07,2.07697e-09,0.878599,0.000632567,-4.32153e-07,-2.76864e-08,0.879231,0.00063162,-5.15212e-07,4.90641e-08,0.879862,0.000630737,-3.6802e-07,-4.93606e-08,0.880493,0.000629852,-5.16102e-07,2.9169e-08,0.881122,0.000628908,-4.28595e-07,-7.71083e-09,0.881751,0.000628027,-4.51727e-07,1.6744e-09,0.882378,0.000627129,-4.46704e-07,1.01317e-09,0.883005,0.000626239,-4.43665e-07,-5.72703e-09,0.883631,0.000625334,-4.60846e-07,2.1895e-08,0.884255,0.000624478,-3.95161e-07,-2.22481e-08,0.88488,0.000623621,-4.61905e-07,7.4928e-09,0.885503,0.00062272,-4.39427e-07,-7.72306e-09,0.886125,0.000621818,-4.62596e-07,2.33995e-08,0.886746,0.000620963,-3.92398e-07,-2.62704e-08,0.887367,0.000620099,-4.71209e-07,2.20775e-08,0.887987,0.000619223,-4.04976e-07,-2.43496e-09,0.888605,0.000618406,-4.12281e-07,-1.23377e-08,0.889223,0.000617544,-4.49294e-07,-7.81876e-09,0.88984,0.000616622,-4.72751e-07,4.36128e-08,0.890457,0.000615807,-3.41912e-07,-4.7423e-08,0.891072,0.000614981,-4.84181e-07,2.68698e-08,0.891687,0.000614093,-4.03572e-07,-4.51384e-10,0.8923,0.000613285,-4.04926e-07,-2.50643e-08,0.892913,0.0006124,-4.80119e-07,4.11038e-08,0.893525,0.000611563,-3.56808e-07,-2.01414e-08,0.894136,0.000610789,-4.17232e-07,-2.01426e-08,0.894747,0.000609894,-4.7766e-07,4.11073e-08,0.895356,0.000609062,-3.54338e-07,-2.50773e-08,0.895965,0.000608278,-4.2957e-07,-4.02954e-10,0.896573,0.000607418,-4.30779e-07,2.66891e-08,0.89718,0.000606636,-3.50711e-07,-4.67489e-08,0.897786,0.000605795,-4.90958e-07,4.10972e-08,0.898391,0.000604936,-3.67666e-07,1.56948e-09,0.898996,0.000604205,-3.62958e-07,-4.73751e-08,0.8996,0.000603337,-5.05083e-07,6.87214e-08,0.900202,0.000602533,-2.98919e-07,-4.86966e-08,0.900805,0.000601789,-4.45009e-07,6.85589e-09,0.901406,0.00060092,-4.24441e-07,2.1273e-08,0.902007,0.000600135,-3.60622e-07,-3.23434e-08,0.902606,0.000599317,-4.57652e-07,4.84959e-08,0.903205,0.000598547,-3.12164e-07,-4.24309e-08,0.903803,0.000597795,-4.39457e-07,2.01844e-09,0.904401,0.000596922,-4.33402e-07,3.43571e-08,0.904997,0.000596159,-3.30331e-07,-2.02374e-08,0.905593,0.000595437,-3.91043e-07,-1.30123e-08,0.906188,0.000594616,-4.3008e-07,1.26819e-08,0.906782,0.000593794,-3.92034e-07,2.18894e-08,0.907376,0.000593076,-3.26366e-07,-4.06349e-08,0.907968,0.000592301,-4.4827e-07,2.1441e-08,0.90856,0.000591469,-3.83947e-07,1.44754e-08,0.909151,0.000590744,-3.40521e-07,-1.97379e-08,0.909742,0.000590004,-3.99735e-07,4.87161e-09,0.910331,0.000589219,-3.8512e-07,2.51532e-10,0.91092,0.00058845,-3.84366e-07,-5.87776e-09,0.911508,0.000587663,-4.01999e-07,2.32595e-08,0.912096,0.000586929,-3.3222e-07,-2.75554e-08,0.912682,0.000586182,-4.14887e-07,2.73573e-08,0.913268,0.000585434,-3.32815e-07,-2.22692e-08,0.913853,0.000584702,-3.99622e-07,2.11486e-09,0.914437,0.000583909,-3.93278e-07,1.38098e-08,0.915021,0.000583164,-3.51848e-07,2.25042e-09,0.915604,0.000582467,-3.45097e-07,-2.28115e-08,0.916186,0.000581708,-4.13531e-07,2.93911e-08,0.916767,0.000580969,-3.25358e-07,-3.51481e-08,0.917348,0.000580213,-4.30803e-07,5.15967e-08,0.917928,0.000579506,-2.76012e-07,-5.20296e-08,0.918507,0.000578798,-4.32101e-07,3.73124e-08,0.919085,0.000578046,-3.20164e-07,-3.76154e-08,0.919663,0.000577293,-4.3301e-07,5.35447e-08,0.92024,0.000576587,-2.72376e-07,-5.7354e-08,0.920816,0.000575871,-4.44438e-07,5.66621e-08,0.921391,0.000575152,-2.74452e-07,-5.00851e-08,0.921966,0.000574453,-4.24707e-07,2.4469e-08,0.92254,0.000573677,-3.513e-07,1.18138e-08,0.923114,0.000573009,-3.15859e-07,-1.21195e-08,0.923686,0.000572341,-3.52217e-07,-2.29403e-08,0.924258,0.000571568,-4.21038e-07,4.4276e-08,0.924829,0.000570859,-2.8821e-07,-3.49546e-08,0.9254,0.000570178,-3.93074e-07,3.59377e-08,0.92597,0.000569499,-2.85261e-07,-4.91915e-08,0.926539,0.000568781,-4.32835e-07,4.16189e-08,0.927107,0.00056804,-3.07979e-07,1.92523e-09,0.927675,0.00056743,-3.02203e-07,-4.93198e-08,0.928242,0.000566678,-4.50162e-07,7.61447e-08,0.928809,0.000566006,-2.21728e-07,-7.6445e-08,0.929374,0.000565333,-4.51063e-07,5.08216e-08,0.929939,0.000564583,-2.98599e-07,-7.63212e-09,0.930503,0.000563963,-3.21495e-07,-2.02931e-08,0.931067,0.000563259,-3.82374e-07,2.92001e-08,0.93163,0.000562582,-2.94774e-07,-3.69025e-08,0.932192,0.000561882,-4.05482e-07,5.88053e-08,0.932754,0.000561247,-2.29066e-07,-7.91094e-08,0.933315,0.000560552,-4.66394e-07,7.88184e-08,0.933875,0.000559856,-2.29939e-07,-5.73501e-08,0.934434,0.000559224,-4.01989e-07,3.13727e-08,0.934993,0.000558514,-3.07871e-07,-8.53611e-09,0.935551,0.000557873,-3.33479e-07,2.77175e-09,0.936109,0.000557214,-3.25164e-07,-2.55091e-09,0.936666,0.000556556,-3.32817e-07,7.43188e-09,0.937222,0.000555913,-3.10521e-07,-2.71766e-08,0.937778,0.00055521,-3.92051e-07,4.167e-08,0.938333,0.000554551,-2.67041e-07,-2.02941e-08,0.938887,0.000553956,-3.27923e-07,-2.00984e-08,0.93944,0.00055324,-3.88218e-07,4.10828e-08,0.939993,0.000552587,-2.6497e-07,-2.50237e-08,0.940546,0.000551982,-3.40041e-07,-5.92583e-10,0.941097,0.0005513,-3.41819e-07,2.7394e-08,0.941648,0.000550698,-2.59637e-07,-4.93788e-08,0.942199,0.000550031,-4.07773e-07,5.09119e-08,0.942748,0.000549368,-2.55038e-07,-3.50595e-08,0.943297,0.000548753,-3.60216e-07,2.97214e-08,0.943846,0.000548122,-2.71052e-07,-2.42215e-08,0.944394,0.000547507,-3.43716e-07,7.55985e-09,0.944941,0.000546842,-3.21037e-07,-6.01796e-09,0.945487,0.000546182,-3.3909e-07,1.65119e-08,0.946033,0.000545553,-2.89555e-07,-4.2498e-10,0.946578,0.000544973,-2.9083e-07,-1.4812e-08,0.947123,0.000544347,-3.35266e-07,6.83068e-11,0.947667,0.000543676,-3.35061e-07,1.45388e-08,0.94821,0.00054305,-2.91444e-07,1.38123e-09,0.948753,0.000542471,-2.87301e-07,-2.00637e-08,0.949295,0.000541836,-3.47492e-07,1.92688e-08,0.949837,0.000541199,-2.89685e-07,2.59298e-09,0.950378,0.000540628,-2.81906e-07,-2.96407e-08,0.950918,0.000539975,-3.70829e-07,5.63652e-08,0.951458,0.000539402,-2.01733e-07,-7.66107e-08,0.951997,0.000538769,-4.31565e-07,7.12638e-08,0.952535,0.00053812,-2.17774e-07,-2.96305e-08,0.953073,0.000537595,-3.06665e-07,-1.23464e-08,0.95361,0.000536945,-3.43704e-07,1.94114e-08,0.954147,0.000536316,-2.8547e-07,-5.69451e-09,0.954683,0.000535728,-3.02554e-07,3.36666e-09,0.955219,0.000535133,-2.92454e-07,-7.77208e-09,0.955753,0.000534525,-3.1577e-07,2.77216e-08,0.956288,0.000533976,-2.32605e-07,-4.35097e-08,0.956821,0.00053338,-3.63134e-07,2.7108e-08,0.957354,0.000532735,-2.8181e-07,-5.31772e-09,0.957887,0.000532156,-2.97764e-07,-5.83718e-09,0.958419,0.000531543,-3.15275e-07,2.86664e-08,0.95895,0.000530998,-2.29276e-07,-4.9224e-08,0.959481,0.000530392,-3.76948e-07,4.90201e-08,0.960011,0.000529785,-2.29887e-07,-2.76471e-08,0.96054,0.000529243,-3.12829e-07,1.96385e-09,0.961069,0.000528623,-3.06937e-07,1.97917e-08,0.961598,0.000528068,-2.47562e-07,-2.15261e-08,0.962125,0.000527508,-3.1214e-07,6.70795e-09,0.962653,0.000526904,-2.92016e-07,-5.30573e-09,0.963179,0.000526304,-3.07934e-07,1.4515e-08,0.963705,0.000525732,-2.64389e-07,6.85048e-09,0.964231,0.000525224,-2.43837e-07,-4.19169e-08,0.964756,0.00052461,-3.69588e-07,4.1608e-08,0.96528,0.000523996,-2.44764e-07,-5.30598e-09,0.965804,0.000523491,-2.60682e-07,-2.03841e-08,0.966327,0.000522908,-3.21834e-07,2.72378e-08,0.966849,0.000522346,-2.40121e-07,-2.89625e-08,0.967371,0.000521779,-3.27008e-07,2.90075e-08,0.967893,0.000521212,-2.39986e-07,-2.74629e-08,0.968414,0.00052065,-3.22374e-07,2.12396e-08,0.968934,0.000520069,-2.58656e-07,2.10922e-09,0.969454,0.000519558,-2.52328e-07,-2.96765e-08,0.969973,0.000518964,-3.41357e-07,5.6992e-08,0.970492,0.000518452,-1.70382e-07,-7.90821e-08,0.97101,0.000517874,-4.07628e-07,8.05224e-08,0.971528,0.000517301,-1.66061e-07,-6.41937e-08,0.972045,0.000516776,-3.58642e-07,5.70429e-08,0.972561,0.00051623,-1.87513e-07,-4.47686e-08,0.973077,0.00051572,-3.21819e-07,2.82237e-09,0.973593,0.000515085,-3.13352e-07,3.34792e-08,0.974108,0.000514559,-2.12914e-07,-1.75298e-08,0.974622,0.000514081,-2.65503e-07,-2.29648e-08,0.975136,0.000513481,-3.34398e-07,4.97843e-08,0.975649,0.000512961,-1.85045e-07,-5.6963e-08,0.976162,0.00051242,-3.55934e-07,5.88585e-08,0.976674,0.000511885,-1.79359e-07,-5.92616e-08,0.977185,0.000511348,-3.57143e-07,5.89785e-08,0.977696,0.000510811,-1.80208e-07,-5.74433e-08,0.978207,0.000510278,-3.52538e-07,5.15854e-08,0.978717,0.000509728,-1.97781e-07,-2.9689e-08,0.979226,0.000509243,-2.86848e-07,7.56591e-09,0.979735,0.000508692,-2.64151e-07,-5.74649e-10,0.980244,0.000508162,-2.65875e-07,-5.26732e-09,0.980752,0.000507615,-2.81677e-07,2.16439e-08,0.981259,0.000507116,-2.16745e-07,-2.17037e-08,0.981766,0.000506618,-2.81856e-07,5.56636e-09,0.982272,0.000506071,-2.65157e-07,-5.61689e-10,0.982778,0.000505539,-2.66842e-07,-3.31963e-09,0.983283,0.000504995,-2.76801e-07,1.38402e-08,0.983788,0.000504483,-2.3528e-07,7.56339e-09,0.984292,0.000504035,-2.1259e-07,-4.40938e-08,0.984796,0.000503478,-3.44871e-07,4.96026e-08,0.985299,0.000502937,-1.96064e-07,-3.51071e-08,0.985802,0.000502439,-3.01385e-07,3.12212e-08,0.986304,0.00050193,-2.07721e-07,-3.0173e-08,0.986806,0.000501424,-2.9824e-07,2.9866e-08,0.987307,0.000500917,-2.08642e-07,-2.96865e-08,0.987808,0.000500411,-2.97702e-07,2.92753e-08,0.988308,0.000499903,-2.09876e-07,-2.78101e-08,0.988807,0.0004994,-2.93306e-07,2.23604e-08,0.989307,0.000498881,-2.26225e-07,-2.02681e-09,0.989805,0.000498422,-2.32305e-07,-1.42531e-08,0.990303,0.000497915,-2.75065e-07,-5.65232e-10,0.990801,0.000497363,-2.76761e-07,1.65141e-08,0.991298,0.000496859,-2.27218e-07,-5.88639e-09,0.991795,0.000496387,-2.44878e-07,7.0315e-09,0.992291,0.000495918,-2.23783e-07,-2.22396e-08,0.992787,0.000495404,-2.90502e-07,2.23224e-08,0.993282,0.00049489,-2.23535e-07,-7.44543e-09,0.993776,0.000494421,-2.45871e-07,7.45924e-09,0.994271,0.000493951,-2.23493e-07,-2.23915e-08,0.994764,0.000493437,-2.90668e-07,2.25021e-08,0.995257,0.000492923,-2.23161e-07,-8.01218e-09,0.99575,0.000492453,-2.47198e-07,9.54669e-09,0.996242,0.000491987,-2.18558e-07,-3.01746e-08,0.996734,0.000491459,-3.09082e-07,5.1547e-08,0.997225,0.000490996,-1.54441e-07,-5.68039e-08,0.997716,0.000490517,-3.24853e-07,5.64594e-08,0.998206,0.000490036,-1.55474e-07,-4.98245e-08,0.998696,0.000489576,-3.04948e-07,2.36292e-08,0.999186,0.000489037,-2.3406e-07,1.49121e-08,0.999674,0.000488613,-1.89324e-07,-2.3673e-08,1.00016,0.000488164,-2.60343e-07,2.01754e-08,1.00065,0.000487704,-1.99816e-07,-5.70288e-08,1.00114,0.000487133,-3.70903e-07,8.87303e-08,1.00162,0.000486657,-1.04712e-07,-5.94737e-08,1.00211,0.000486269,-2.83133e-07,2.99553e-08,1.0026,0.000485793,-1.93267e-07,-6.03474e-08,1.00308,0.000485225,-3.74309e-07,9.2225e-08,1.00357,0.000484754,-9.76345e-08,-7.0134e-08,1.00405,0.000484348,-3.08036e-07,6.91016e-08,1.00454,0.000483939,-1.00731e-07,-8.70633e-08,1.00502,0.000483476,-3.61921e-07,4.07328e-08,1.0055,0.000482875,-2.39723e-07,4.33413e-08,1.00599,0.000482525,-1.09699e-07,-9.48886e-08,1.00647,0.000482021,-3.94365e-07,9.77947e-08,1.00695,0.000481526,-1.00981e-07,-5.78713e-08,1.00743,0.00048115,-2.74595e-07,1.44814e-08,1.00791,0.000480645,-2.31151e-07,-5.42665e-11,1.00839,0.000480182,-2.31314e-07,-1.42643e-08,1.00887,0.000479677,-2.74106e-07,5.71115e-08,1.00935,0.0004793,-1.02772e-07,-9.49724e-08,1.00983,0.000478809,-3.87689e-07,8.43596e-08,1.01031,0.000478287,-1.3461e-07,-4.04755e-09,1.01079,0.000478006,-1.46753e-07,-6.81694e-08,1.01127,0.000477508,-3.51261e-07,3.83067e-08,1.01174,0.00047692,-2.36341e-07,3.41521e-08,1.01222,0.00047655,-1.33885e-07,-5.57058e-08,1.0127,0.000476115,-3.01002e-07,6.94616e-08,1.01317,0.000475721,-9.26174e-08,-1.02931e-07,1.01365,0.000475227,-4.01412e-07,1.03846e-07,1.01412,0.000474736,-8.98751e-08,-7.40321e-08,1.0146,0.000474334,-3.11971e-07,7.30735e-08,1.01507,0.00047393,-9.27508e-08,-9.90527e-08,1.01554,0.000473447,-3.89909e-07,8.47188e-08,1.01602,0.000472921,-1.35753e-07,-1.40381e-09,1.01649,0.000472645,-1.39964e-07,-7.91035e-08,1.01696,0.000472128,-3.77275e-07,7.93993e-08,1.01744,0.000471612,-1.39077e-07,-7.52607e-11,1.01791,0.000471334,-1.39302e-07,-7.90983e-08,1.01838,0.000470818,-3.76597e-07,7.80499e-08,1.01885,0.000470299,-1.42448e-07,5.31733e-09,1.01932,0.00047003,-1.26496e-07,-9.93193e-08,1.01979,0.000469479,-4.24453e-07,1.53541e-07,1.02026,0.00046909,3.617e-08,-1.57217e-07,1.02073,0.000468691,-4.35482e-07,1.177e-07,1.02119,0.000468173,-8.23808e-08,-7.51659e-08,1.02166,0.000467783,-3.07878e-07,6.37538e-08,1.02213,0.000467358,-1.16617e-07,-6.064e-08,1.0226,0.000466943,-2.98537e-07,5.9597e-08,1.02306,0.000466525,-1.19746e-07,-5.85386e-08,1.02353,0.00046611,-2.95362e-07,5.53482e-08,1.024,0.000465685,-1.29317e-07,-4.36449e-08,1.02446,0.000465296,-2.60252e-07,2.20268e-11,1.02493,0.000464775,-2.60186e-07,4.35568e-08,1.02539,0.000464386,-1.29516e-07,-5.50398e-08,1.02586,0.000463961,-2.94635e-07,5.73932e-08,1.02632,0.000463544,-1.22456e-07,-5.53236e-08,1.02678,0.000463133,-2.88426e-07,4.46921e-08,1.02725,0.000462691,-1.5435e-07,-4.23534e-09,1.02771,0.000462369,-1.67056e-07,-2.77507e-08,1.02817,0.000461952,-2.50308e-07,-3.97101e-09,1.02863,0.000461439,-2.62221e-07,4.36348e-08,1.02909,0.000461046,-1.31317e-07,-5.13589e-08,1.02955,0.000460629,-2.85394e-07,4.25913e-08,1.03001,0.000460186,-1.5762e-07,2.0285e-10,1.03047,0.000459871,-1.57011e-07,-4.34027e-08,1.03093,0.000459427,-2.87219e-07,5.41987e-08,1.03139,0.000459015,-1.24623e-07,-5.4183e-08,1.03185,0.000458604,-2.87172e-07,4.33239e-08,1.03231,0.000458159,-1.572e-07,9.65817e-11,1.03277,0.000457845,-1.56911e-07,-4.37103e-08,1.03323,0.0004574,-2.88041e-07,5.55351e-08,1.03368,0.000456991,-1.21436e-07,-5.9221e-08,1.03414,0.00045657,-2.99099e-07,6.21394e-08,1.0346,0.000456158,-1.1268e-07,-7.01275e-08,1.03505,0.000455723,-3.23063e-07,9.91614e-08,1.03551,0.000455374,-2.55788e-08,-8.80996e-08,1.03596,0.000455058,-2.89878e-07,1.48184e-08,1.03642,0.000454523,-2.45422e-07,2.88258e-08,1.03687,0.000454119,-1.58945e-07,-1.09125e-08,1.03733,0.000453768,-1.91682e-07,1.48241e-08,1.03778,0.000453429,-1.4721e-07,-4.83838e-08,1.03823,0.00045299,-2.92361e-07,5.95019e-08,1.03869,0.000452584,-1.13856e-07,-7.04146e-08,1.03914,0.000452145,-3.25099e-07,1.02947e-07,1.03959,0.000451803,-1.62583e-08,-1.02955e-07,1.04004,0.000451462,-3.25123e-07,7.04544e-08,1.04049,0.000451023,-1.1376e-07,-5.96534e-08,1.04094,0.000450616,-2.9272e-07,4.89499e-08,1.04139,0.000450178,-1.45871e-07,-1.69369e-08,1.04184,0.000449835,-1.96681e-07,1.87977e-08,1.04229,0.000449498,-1.40288e-07,-5.82539e-08,1.04274,0.000449043,-3.1505e-07,9.50087e-08,1.04319,0.000448698,-3.00238e-08,-8.33623e-08,1.04364,0.000448388,-2.80111e-07,2.20363e-11,1.04409,0.000447828,-2.80045e-07,8.32742e-08,1.04454,0.000447517,-3.02221e-08,-9.47002e-08,1.04498,0.000447173,-3.14323e-07,5.7108e-08,1.04543,0.000446716,-1.42999e-07,-1.45225e-08,1.04588,0.000446386,-1.86566e-07,9.82022e-10,1.04632,0.000446016,-1.8362e-07,1.05944e-08,1.04677,0.00044568,-1.51837e-07,-4.33597e-08,1.04721,0.000445247,-2.81916e-07,4.36352e-08,1.04766,0.000444814,-1.51011e-07,-1.19717e-08,1.0481,0.000444476,-1.86926e-07,4.25158e-09,1.04855,0.000444115,-1.74171e-07,-5.03461e-09,1.04899,0.000443751,-1.89275e-07,1.58868e-08,1.04944,0.00044342,-1.41614e-07,-5.85127e-08,1.04988,0.000442961,-3.17152e-07,9.89548e-08,1.05032,0.000442624,-2.0288e-08,-9.88878e-08,1.05076,0.000442287,-3.16951e-07,5.81779e-08,1.05121,0.000441827,-1.42418e-07,-1.46144e-08,1.05165,0.000441499,-1.86261e-07,2.79892e-10,1.05209,0.000441127,-1.85421e-07,1.34949e-08,1.05253,0.000440797,-1.44937e-07,-5.42594e-08,1.05297,0.000440344,-3.07715e-07,8.43335e-08,1.05341,0.000439982,-5.47146e-08,-4.46558e-08,1.05385,0.000439738,-1.88682e-07,-2.49193e-08,1.05429,0.000439286,-2.6344e-07,2.5124e-08,1.05473,0.000438835,-1.88068e-07,4.36328e-08,1.05517,0.000438589,-5.71699e-08,-8.04459e-08,1.05561,0.000438234,-2.98508e-07,3.97324e-08,1.05605,0.000437756,-1.79311e-07,4.07258e-08,1.05648,0.000437519,-5.71332e-08,-8.34263e-08,1.05692,0.000437155,-3.07412e-07,5.45608e-08,1.05736,0.000436704,-1.4373e-07,-1.56078e-08,1.05779,0.000436369,-1.90553e-07,7.87043e-09,1.05823,0.000436012,-1.66942e-07,-1.58739e-08,1.05867,0.00043563,-2.14563e-07,5.56251e-08,1.0591,0.000435368,-4.76881e-08,-8.74172e-08,1.05954,0.000435011,-3.0994e-07,5.56251e-08,1.05997,0.000434558,-1.43064e-07,-1.58739e-08,1.06041,0.000434224,-1.90686e-07,7.87042e-09,1.06084,0.000433866,-1.67075e-07,-1.56078e-08,1.06127,0.000433485,-2.13898e-07,5.45609e-08,1.06171,0.000433221,-5.02157e-08,-8.34263e-08,1.06214,0.00043287,-3.00495e-07,4.07258e-08,1.06257,0.000432391,-1.78317e-07,3.97325e-08,1.063,0.000432154,-5.91198e-08,-8.04464e-08,1.06344,0.000431794,-3.00459e-07,4.36347e-08,1.06387,0.000431324,-1.69555e-07,2.5117e-08,1.0643,0.000431061,-9.42041e-08,-2.48934e-08,1.06473,0.000430798,-1.68884e-07,-4.47527e-08,1.06516,0.000430326,-3.03142e-07,8.46951e-08,1.06559,0.000429973,-4.90573e-08,-5.56089e-08,1.06602,0.000429708,-2.15884e-07,1.85314e-08,1.06645,0.000429332,-1.6029e-07,-1.85166e-08,1.06688,0.000428956,-2.1584e-07,5.5535e-08,1.06731,0.000428691,-4.92347e-08,-8.44142e-08,1.06774,0.000428339,-3.02477e-07,4.37032e-08,1.06816,0.000427865,-1.71368e-07,2.88107e-08,1.06859,0.000427609,-8.49356e-08,-3.97367e-08,1.06902,0.00042732,-2.04146e-07,1.09267e-08,1.06945,0.000426945,-1.71365e-07,-3.97023e-09,1.06987,0.00042659,-1.83276e-07,4.9542e-09,1.0703,0.000426238,-1.68414e-07,-1.58466e-08,1.07073,0.000425854,-2.15953e-07,5.84321e-08,1.07115,0.000425597,-4.0657e-08,-9.86725e-08,1.07158,0.00042522,-3.36674e-07,9.78392e-08,1.072,0.00042484,-4.31568e-08,-5.42658e-08,1.07243,0.000424591,-2.05954e-07,1.45377e-11,1.07285,0.000424179,-2.0591e-07,5.42076e-08,1.07328,0.00042393,-4.32877e-08,-9.76357e-08,1.0737,0.00042355,-3.36195e-07,9.79165e-08,1.07412,0.000423172,-4.24451e-08,-5.56118e-08,1.07455,0.00042292,-2.09281e-07,5.32143e-09,1.07497,0.000422518,-1.93316e-07,3.43261e-08,1.07539,0.000422234,-9.0338e-08,-2.34165e-08,1.07581,0.000421983,-1.60588e-07,-5.98692e-08,1.07623,0.000421482,-3.40195e-07,1.43684e-07,1.07666,0.000421233,9.08574e-08,-1.5724e-07,1.07708,0.000420943,-3.80862e-07,1.27647e-07,1.0775,0.000420564,2.0791e-09,-1.1493e-07,1.07792,0.000420223,-3.4271e-07,9.36534e-08,1.07834,0.000419819,-6.17499e-08,-2.12653e-08,1.07876,0.000419632,-1.25546e-07,-8.59219e-09,1.07918,0.000419355,-1.51322e-07,-6.35752e-08,1.0796,0.000418861,-3.42048e-07,1.43684e-07,1.08002,0.000418608,8.90034e-08,-1.53532e-07,1.08043,0.000418326,-3.71593e-07,1.12817e-07,1.08085,0.000417921,-3.31414e-08,-5.93184e-08,1.08127,0.000417677,-2.11097e-07,5.24697e-09,1.08169,0.00041727,-1.95356e-07,3.83305e-08,1.0821,0.000416995,-8.03642e-08,-3.93597e-08,1.08252,0.000416716,-1.98443e-07,-1.0094e-10,1.08294,0.000416319,-1.98746e-07,3.97635e-08,1.08335,0.00041604,-7.94557e-08,-3.97437e-08,1.08377,0.000415762,-1.98687e-07,1.94215e-12,1.08419,0.000415365,-1.98681e-07,3.97359e-08,1.0846,0.000415087,-7.94732e-08,-3.97362e-08,1.08502,0.000414809,-1.98682e-07,-4.31063e-13,1.08543,0.000414411,-1.98683e-07,3.97379e-08,1.08584,0.000414133,-7.94694e-08,-3.97418e-08,1.08626,0.000413855,-1.98695e-07,2.00563e-11,1.08667,0.000413458,-1.98635e-07,3.96616e-08,1.08709,0.000413179,-7.965e-08,-3.9457e-08,1.0875,0.000412902,-1.98021e-07,-1.04281e-09,1.08791,0.000412502,-2.01149e-07,4.36282e-08,1.08832,0.000412231,-7.02648e-08,-5.42608e-08,1.08874,0.000411928,-2.33047e-07,5.42057e-08,1.08915,0.000411624,-7.04301e-08,-4.33527e-08,1.08956,0.000411353,-2.00488e-07,-4.07378e-12,1.08997,0.000410952,-2.005e-07,4.3369e-08,1.09038,0.000410681,-7.03934e-08,-5.42627e-08,1.09079,0.000410378,-2.33182e-07,5.44726e-08,1.0912,0.000410075,-6.97637e-08,-4.44186e-08,1.09161,0.000409802,-2.03019e-07,3.99235e-09,1.09202,0.000409408,-1.91042e-07,2.84491e-08,1.09243,0.000409111,-1.05695e-07,1.42043e-09,1.09284,0.000408904,-1.01434e-07,-3.41308e-08,1.09325,0.000408599,-2.03826e-07,1.58937e-08,1.09366,0.000408239,-1.56145e-07,-2.94438e-08,1.09406,0.000407838,-2.44476e-07,1.01881e-07,1.09447,0.000407655,6.11676e-08,-1.39663e-07,1.09488,0.000407358,-3.57822e-07,9.91432e-08,1.09529,0.00040694,-6.03921e-08,-1.84912e-08,1.09569,0.000406764,-1.15866e-07,-2.51785e-08,1.0961,0.000406457,-1.91401e-07,-4.03115e-12,1.09651,0.000406074,-1.91413e-07,2.51947e-08,1.09691,0.000405767,-1.15829e-07,1.84346e-08,1.09732,0.00040559,-6.05254e-08,-9.89332e-08,1.09772,0.000405172,-3.57325e-07,1.3888e-07,1.09813,0.000404874,5.93136e-08,-9.8957e-08,1.09853,0.000404696,-2.37557e-07,1.853e-08,1.09894,0.000404277,-1.81968e-07,2.48372e-08,1.09934,0.000403987,-1.07456e-07,1.33047e-09,1.09975,0.000403776,-1.03465e-07,-3.01591e-08,1.10015,0.000403479,-1.93942e-07,9.66054e-11,1.10055,0.000403091,-1.93652e-07,2.97727e-08,1.10096,0.000402793,-1.04334e-07,2.19273e-11,1.10136,0.000402585,-1.04268e-07,-2.98604e-08,1.10176,0.000402287,-1.93849e-07,2.10325e-10,1.10216,0.0004019,-1.93218e-07,2.90191e-08,1.10256,0.0004016,-1.06161e-07,2.92264e-09,1.10297,0.000401397,-9.73931e-08,-4.07096e-08,1.10337,0.00040108,-2.19522e-07,4.07067e-08,1.10377,0.000400763,-9.7402e-08,-2.90783e-09,1.10417,0.000400559,-1.06126e-07,-2.90754e-08,1.10457,0.00040026,-1.93352e-07,9.00021e-14,1.10497,0.000399873,-1.93351e-07,2.9075e-08,1.10537,0.000399574,-1.06126e-07,2.90902e-09,1.10577,0.00039937,-9.73992e-08,-4.07111e-08,1.10617,0.000399053,-2.19533e-07,4.07262e-08,1.10657,0.000398736,-9.73541e-08,-2.98424e-09,1.10697,0.000398533,-1.06307e-07,-2.87892e-08,1.10736,0.000398234,-1.92674e-07,-1.06824e-09,1.10776,0.000397845,-1.95879e-07,3.30622e-08,1.10816,0.000397552,-9.66926e-08,-1.19712e-08,1.10856,0.000397323,-1.32606e-07,1.48225e-08,1.10895,0.000397102,-8.81387e-08,-4.73187e-08,1.10935,0.000396784,-2.30095e-07,5.52429e-08,1.10975,0.00039649,-6.4366e-08,-5.44437e-08,1.11014,0.000396198,-2.27697e-07,4.33226e-08,1.11054,0.000395872,-9.77293e-08,3.62656e-10,1.11094,0.000395678,-9.66414e-08,-4.47732e-08,1.11133,0.00039535,-2.30961e-07,5.95208e-08,1.11173,0.000395067,-5.23985e-08,-7.41008e-08,1.11212,0.00039474,-2.74701e-07,1.17673e-07,1.11252,0.000394543,7.83181e-08,-1.58172e-07,1.11291,0.000394225,-3.96199e-07,1.57389e-07,1.1133,0.000393905,7.59679e-08,-1.13756e-07,1.1137,0.000393716,-2.653e-07,5.92165e-08,1.11409,0.000393363,-8.76507e-08,-3.90074e-09,1.11449,0.000393176,-9.93529e-08,-4.36136e-08,1.11488,0.000392846,-2.30194e-07,5.91457e-08,1.11527,0.000392563,-5.27564e-08,-7.376e-08,1.11566,0.000392237,-2.74037e-07,1.16685e-07,1.11606,0.000392039,7.60189e-08,-1.54562e-07,1.11645,0.000391727,-3.87667e-07,1.43935e-07,1.11684,0.000391384,4.4137e-08,-6.35487e-08,1.11723,0.000391281,-1.46509e-07,-8.94896e-09,1.11762,0.000390961,-1.73356e-07,-1.98647e-08,1.11801,0.000390555,-2.3295e-07,8.8408e-08,1.1184,0.000390354,3.22736e-08,-9.53486e-08,1.11879,0.000390133,-2.53772e-07,5.45677e-08,1.11918,0.000389789,-9.0069e-08,-3.71296e-09,1.11957,0.000389598,-1.01208e-07,-3.97159e-08,1.11996,0.000389276,-2.20355e-07,4.33671e-08,1.12035,0.000388966,-9.02542e-08,-1.45431e-08,1.12074,0.000388741,-1.33883e-07,1.48052e-08,1.12113,0.000388518,-8.94678e-08,-4.46778e-08,1.12152,0.000388205,-2.23501e-07,4.46966e-08,1.12191,0.000387892,-8.94114e-08,-1.48992e-08,1.12229,0.000387669,-1.34109e-07,1.49003e-08,1.12268,0.000387445,-8.94082e-08,-4.47019e-08,1.12307,0.000387132,-2.23514e-07,4.4698e-08,1.12345,0.000386819,-8.942e-08,-1.48806e-08,1.12384,0.000386596,-1.34062e-07,1.48245e-08,1.12423,0.000386372,-8.95885e-08,-4.44172e-08,1.12461,0.00038606,-2.2284e-07,4.36351e-08,1.125,0.000385745,-9.19348e-08,-1.09139e-08,1.12539,0.000385528,-1.24677e-07,2.05584e-11,1.12577,0.000385279,-1.24615e-07,1.08317e-08,1.12616,0.000385062,-9.21198e-08,-4.33473e-08,1.12654,0.000384748,-2.22162e-07,4.33481e-08,1.12693,0.000384434,-9.21174e-08,-1.08356e-08,1.12731,0.000384217,-1.24624e-07,-5.50907e-12,1.12769,0.000383968,-1.24641e-07,1.08577e-08,1.12808,0.000383751,-9.20679e-08,-4.34252e-08,1.12846,0.000383437,-2.22343e-07,4.36337e-08,1.12884,0.000383123,-9.14422e-08,-1.19005e-08,1.12923,0.000382904,-1.27144e-07,3.96813e-09,1.12961,0.000382662,-1.15239e-07,-3.97207e-09,1.12999,0.000382419,-1.27155e-07,1.19201e-08,1.13038,0.000382201,-9.1395e-08,-4.37085e-08,1.13076,0.000381887,-2.2252e-07,4.37046e-08,1.13114,0.000381573,-9.14068e-08,-1.19005e-08,1.13152,0.000381355,-1.27108e-07,3.89734e-09,1.1319,0.000381112,-1.15416e-07,-3.68887e-09,1.13228,0.00038087,-1.26483e-07,1.08582e-08,1.13266,0.00038065,-9.39083e-08,-3.97438e-08,1.13304,0.000380343,-2.1314e-07,2.89076e-08,1.13342,0.000380003,-1.26417e-07,4.33225e-08,1.1338,0.00037988,3.55072e-09,-8.29883e-08,1.13418,0.000379638,-2.45414e-07,5.0212e-08,1.13456,0.000379298,-9.47781e-08,1.34964e-09,1.13494,0.000379113,-9.07292e-08,-5.56105e-08,1.13532,0.000378764,-2.57561e-07,1.01883e-07,1.1357,0.000378555,4.80889e-08,-1.13504e-07,1.13608,0.000378311,-2.92423e-07,1.13713e-07,1.13646,0.000378067,4.87176e-08,-1.02931e-07,1.13683,0.000377856,-2.60076e-07,5.95923e-08,1.13721,0.000377514,-8.12988e-08,-1.62288e-08,1.13759,0.000377303,-1.29985e-07,5.32278e-09,1.13797,0.000377059,-1.14017e-07,-5.06237e-09,1.13834,0.000376816,-1.29204e-07,1.49267e-08,1.13872,0.000376602,-8.44237e-08,-5.46444e-08,1.1391,0.000376269,-2.48357e-07,8.44417e-08,1.13947,0.000376026,4.96815e-09,-4.47039e-08,1.13985,0.000375902,-1.29143e-07,-2.48355e-08,1.14023,0.000375569,-2.0365e-07,2.48368e-08,1.1406,0.000375236,-1.2914e-07,4.46977e-08,1.14098,0.000375112,4.95341e-09,-8.44184e-08,1.14135,0.000374869,-2.48302e-07,5.45572e-08,1.14173,0.000374536,-8.463e-08,-1.46013e-08,1.1421,0.000374323,-1.28434e-07,3.8478e-09,1.14247,0.000374077,-1.1689e-07,-7.89941e-10,1.14285,0.000373841,-1.1926e-07,-6.88042e-10,1.14322,0.0003736,-1.21324e-07,3.54213e-09,1.1436,0.000373368,-1.10698e-07,-1.34805e-08,1.14397,0.000373107,-1.51139e-07,5.03798e-08,1.14434,0.000372767,0.,0.};
+
+        template <bool srgb, int blueIdx, typename T, typename D>
+        __device__ __forceinline__ void RGB2LuvConvert_f(const T& src, D& dst)
+        {
+            const float _d = 1.f / (0.950456f + 15 + 1.088754f * 3);
+            const float _un = 13 * (4 * 0.950456f * _d);
+            const float _vn = 13 * (9 * _d);
+
+            float B = blueIdx == 0 ? src.x : src.z;
+            float G = src.y;
+            float R = blueIdx == 0 ? src.z : src.x;
+
+            if (srgb)
+            {
+                B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE);
+                G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE);
+                R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE);
+            }
+
+            float X = R * 0.412453f + G * 0.357580f + B * 0.180423f;
+            float Y = R * 0.212671f + G * 0.715160f + B * 0.072169f;
+            float Z = R * 0.019334f + G * 0.119193f + B * 0.950227f;
+
+            float L = splineInterpolate(Y * (LAB_CBRT_TAB_SIZE / 1.5f), c_LabCbrtTab, LAB_CBRT_TAB_SIZE);
+            L = 116.f * L - 16.f;
+
+            const float d = (4 * 13) / ::fmaxf(X + 15 * Y + 3 * Z, numeric_limits<float>::epsilon());
+            float u = L * (X * d - _un);
+            float v = L * ((9 * 0.25f) * Y * d - _vn);
+
+            dst.x = L;
+            dst.y = u;
+            dst.z = v;
+        }
+
+        template <bool srgb, int blueIdx, typename T, typename D>
+        __device__ __forceinline__ void RGB2LuvConvert_b(const T& src, D& dst)
+        {
+            float3 srcf, dstf;
+
+            srcf.x = src.x * (1.f / 255.f);
+            srcf.y = src.y * (1.f / 255.f);
+            srcf.z = src.z * (1.f / 255.f);
+
+            RGB2LuvConvert_f<srgb, blueIdx>(srcf, dstf);
+
+            dst.x = saturate_cast<uchar>(dstf.x * 2.55f);
+            dst.y = saturate_cast<uchar>(dstf.y * 0.72033898305084743f + 96.525423728813564f);
+            dst.z = saturate_cast<uchar>(dstf.z * 0.9732824427480916f + 136.259541984732824f);
+        }
+
+        template <typename T, int scn, int dcn, bool srgb, int blueIdx> struct RGB2Luv;
+        template <int scn, int dcn, bool srgb, int blueIdx>
+        struct RGB2Luv<uchar, scn, dcn, srgb, blueIdx>
+            : unary_function<typename TypeVec<uchar, scn>::vec_type, typename TypeVec<uchar, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<uchar, dcn>::vec_type operator ()(const typename TypeVec<uchar, scn>::vec_type& src) const
+            {
+                typename TypeVec<uchar, dcn>::vec_type dst;
+
+                RGB2LuvConvert_b<srgb, blueIdx>(src, dst);
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ RGB2Luv() {}
+            __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {}
+        };
+        template <int scn, int dcn, bool srgb, int blueIdx>
+        struct RGB2Luv<float, scn, dcn, srgb, blueIdx>
+            : unary_function<typename TypeVec<float, scn>::vec_type, typename TypeVec<float, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<float, dcn>::vec_type operator ()(const typename TypeVec<float, scn>::vec_type& src) const
+            {
+                typename TypeVec<float, dcn>::vec_type dst;
+
+                RGB2LuvConvert_f<srgb, blueIdx>(src, dst);
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ RGB2Luv() {}
+            __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(name, scn, dcn, srgb, blueIdx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::RGB2Luv<T, scn, dcn, srgb, blueIdx> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+    namespace color_detail
+    {
+        template <bool srgb, int blueIdx, typename T, typename D>
+        __device__ __forceinline__ void Luv2RGBConvert_f(const T& src, D& dst)
+        {
+            const float _d = 1.f / (0.950456f + 15 + 1.088754f * 3);
+            const float _un = 4 * 0.950456f * _d;
+            const float _vn = 9 * _d;
+
+            float L = src.x;
+            float u = src.y;
+            float v = src.z;
+
+            float Y = (L + 16.f) * (1.f / 116.f);
+            Y = Y * Y * Y;
+
+            float d = (1.f / 13.f) / L;
+            u = u * d + _un;
+            v = v * d + _vn;
+
+            float iv = 1.f / v;
+            float X = 2.25f * u * Y * iv;
+            float Z = (12 - 3 * u - 20 * v) * Y * 0.25f * iv;
+
+            float B = 0.055648f * X - 0.204043f * Y + 1.057311f * Z;
+            float G = -0.969256f * X + 1.875991f * Y + 0.041556f * Z;
+            float R = 3.240479f * X - 1.537150f * Y - 0.498535f * Z;
+
+            if (srgb)
+            {
+                B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE);
+                G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE);
+                R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE);
+            }
+
+            dst.x = blueIdx == 0 ? B : R;
+            dst.y = G;
+            dst.z = blueIdx == 0 ? R : B;
+            setAlpha(dst, ColorChannel<float>::max());
+        }
+
+        template <bool srgb, int blueIdx, typename T, typename D>
+        __device__ __forceinline__ void Luv2RGBConvert_b(const T& src, D& dst)
+        {
+            float3 srcf, dstf;
+
+            srcf.x = src.x * (100.f / 255.f);
+            srcf.y = src.y * 1.388235294117647f - 134.f;
+            srcf.z = src.z * 1.027450980392157f - 140.f;
+
+            Luv2RGBConvert_f<srgb, blueIdx>(srcf, dstf);
+
+            dst.x = saturate_cast<uchar>(dstf.x * 255.f);
+            dst.y = saturate_cast<uchar>(dstf.y * 255.f);
+            dst.z = saturate_cast<uchar>(dstf.z * 255.f);
+            setAlpha(dst, ColorChannel<uchar>::max());
+        }
+
+        template <typename T, int scn, int dcn, bool srgb, int blueIdx> struct Luv2RGB;
+        template <int scn, int dcn, bool srgb, int blueIdx>
+        struct Luv2RGB<uchar, scn, dcn, srgb, blueIdx>
+            : unary_function<typename TypeVec<uchar, scn>::vec_type, typename TypeVec<uchar, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<uchar, dcn>::vec_type operator ()(const typename TypeVec<uchar, scn>::vec_type& src) const
+            {
+                typename TypeVec<uchar, dcn>::vec_type dst;
+
+                Luv2RGBConvert_b<srgb, blueIdx>(src, dst);
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ Luv2RGB() {}
+            __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {}
+        };
+        template <int scn, int dcn, bool srgb, int blueIdx>
+        struct Luv2RGB<float, scn, dcn, srgb, blueIdx>
+            : unary_function<typename TypeVec<float, scn>::vec_type, typename TypeVec<float, dcn>::vec_type>
+        {
+            __device__ __forceinline__ typename TypeVec<float, dcn>::vec_type operator ()(const typename TypeVec<float, scn>::vec_type& src) const
+            {
+                typename TypeVec<float, dcn>::vec_type dst;
+
+                Luv2RGBConvert_f<srgb, blueIdx>(src, dst);
+
+                return dst;
+            }
+            __host__ __device__ __forceinline__ Luv2RGB() {}
+            __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {}
+        };
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(name, scn, dcn, srgb, blueIdx) \
+    template <typename T> struct name ## _traits \
+    { \
+        typedef ::cv::cuda::device::color_detail::Luv2RGB<T, scn, dcn, srgb, blueIdx> functor_type; \
+        static __host__ __device__ __forceinline__ functor_type create_functor() \
+        { \
+            return functor_type(); \
+        } \
+    };
+
+    #undef CV_DESCALE
+
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_COLOR_DETAIL_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/detail/reduce.hpp b/thirdparty/linux/include/opencv2/core/cuda/detail/reduce.hpp
new file mode 100644
index 0000000..ff82c3c
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/detail/reduce.hpp
@@ -0,0 +1,365 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_REDUCE_DETAIL_HPP
+#define OPENCV_CUDA_REDUCE_DETAIL_HPP
+
+#include <thrust/tuple.h>
+#include "../warp.hpp"
+#include "../warp_shuffle.hpp"
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    namespace reduce_detail
+    {
+        template <typename T> struct GetType;
+        template <typename T> struct GetType<T*>
+        {
+            typedef T type;
+        };
+        template <typename T> struct GetType<volatile T*>
+        {
+            typedef T type;
+        };
+        template <typename T> struct GetType<T&>
+        {
+            typedef T type;
+        };
+
+        template <unsigned int I, unsigned int N>
+        struct For
+        {
+            template <class PointerTuple, class ValTuple>
+            static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
+            {
+                thrust::get<I>(smem)[tid] = thrust::get<I>(val);
+
+                For<I + 1, N>::loadToSmem(smem, val, tid);
+            }
+            template <class PointerTuple, class ValTuple>
+            static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
+            {
+                thrust::get<I>(val) = thrust::get<I>(smem)[tid];
+
+                For<I + 1, N>::loadFromSmem(smem, val, tid);
+            }
+
+            template <class PointerTuple, class ValTuple, class OpTuple>
+            static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op)
+            {
+                typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta];
+                thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
+
+                For<I + 1, N>::merge(smem, val, tid, delta, op);
+            }
+            template <class ValTuple, class OpTuple>
+            static __device__ void mergeShfl(const ValTuple& val, unsigned int delta, unsigned int width, const OpTuple& op)
+            {
+                typename GetType<typename thrust::tuple_element<I, ValTuple>::type>::type reg = shfl_down(thrust::get<I>(val), delta, width);
+                thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
+
+                For<I + 1, N>::mergeShfl(val, delta, width, op);
+            }
+        };
+        template <unsigned int N>
+        struct For<N, N>
+        {
+            template <class PointerTuple, class ValTuple>
+            static __device__ void loadToSmem(const PointerTuple&, const ValTuple&, unsigned int)
+            {
+            }
+            template <class PointerTuple, class ValTuple>
+            static __device__ void loadFromSmem(const PointerTuple&, const ValTuple&, unsigned int)
+            {
+            }
+
+            template <class PointerTuple, class ValTuple, class OpTuple>
+            static __device__ void merge(const PointerTuple&, const ValTuple&, unsigned int, unsigned int, const OpTuple&)
+            {
+            }
+            template <class ValTuple, class OpTuple>
+            static __device__ void mergeShfl(const ValTuple&, unsigned int, unsigned int, const OpTuple&)
+            {
+            }
+        };
+
+        template <typename T>
+        __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid)
+        {
+            smem[tid] = val;
+        }
+        template <typename T>
+        __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid)
+        {
+            val = smem[tid];
+        }
+        template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
+                  typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
+        __device__ __forceinline__ void loadToSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
+                                                       const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                                       unsigned int tid)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid);
+        }
+        template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
+                  typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
+        __device__ __forceinline__ void loadFromSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
+                                                         const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                                         unsigned int tid)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid);
+        }
+
+        template <typename T, class Op>
+        __device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op)
+        {
+            T reg = smem[tid + delta];
+            smem[tid] = val = op(val, reg);
+        }
+        template <typename T, class Op>
+        __device__ __forceinline__ void mergeShfl(T& val, unsigned int delta, unsigned int width, const Op& op)
+        {
+            T reg = shfl_down(val, delta, width);
+            val = op(val, reg);
+        }
+        template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
+                  typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
+                  class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
+        __device__ __forceinline__ void merge(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
+                                              const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                              unsigned int tid,
+                                              unsigned int delta,
+                                              const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op);
+        }
+        template <typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
+                  class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
+        __device__ __forceinline__ void mergeShfl(const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                                  unsigned int delta,
+                                                  unsigned int width,
+                                                  const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9> >::value>::mergeShfl(val, delta, width, op);
+        }
+
+        template <unsigned int N> struct Generic
+        {
+            template <typename Pointer, typename Reference, class Op>
+            static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
+            {
+                loadToSmem(smem, val, tid);
+                if (N >= 32)
+                    __syncthreads();
+
+                if (N >= 2048)
+                {
+                    if (tid < 1024)
+                        merge(smem, val, tid, 1024, op);
+
+                    __syncthreads();
+                }
+                if (N >= 1024)
+                {
+                    if (tid < 512)
+                        merge(smem, val, tid, 512, op);
+
+                    __syncthreads();
+                }
+                if (N >= 512)
+                {
+                    if (tid < 256)
+                        merge(smem, val, tid, 256, op);
+
+                    __syncthreads();
+                }
+                if (N >= 256)
+                {
+                    if (tid < 128)
+                        merge(smem, val, tid, 128, op);
+
+                    __syncthreads();
+                }
+                if (N >= 128)
+                {
+                    if (tid < 64)
+                        merge(smem, val, tid, 64, op);
+
+                    __syncthreads();
+                }
+                if (N >= 64)
+                {
+                    if (tid < 32)
+                        merge(smem, val, tid, 32, op);
+                }
+
+                if (tid < 16)
+                {
+                    merge(smem, val, tid, 16, op);
+                    merge(smem, val, tid, 8, op);
+                    merge(smem, val, tid, 4, op);
+                    merge(smem, val, tid, 2, op);
+                    merge(smem, val, tid, 1, op);
+                }
+            }
+        };
+
+        template <unsigned int I, typename Pointer, typename Reference, class Op>
+        struct Unroll
+        {
+            static __device__ void loopShfl(Reference val, Op op, unsigned int N)
+            {
+                mergeShfl(val, I, N, op);
+                Unroll<I / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
+            }
+            static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op)
+            {
+                merge(smem, val, tid, I, op);
+                Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
+            }
+        };
+        template <typename Pointer, typename Reference, class Op>
+        struct Unroll<0, Pointer, Reference, Op>
+        {
+            static __device__ void loopShfl(Reference, Op, unsigned int)
+            {
+            }
+            static __device__ void loop(Pointer, Reference, unsigned int, Op)
+            {
+            }
+        };
+
+        template <unsigned int N> struct WarpOptimized
+        {
+            template <typename Pointer, typename Reference, class Op>
+            static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
+            {
+            #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+                (void) smem;
+                (void) tid;
+
+                Unroll<N / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
+            #else
+                loadToSmem(smem, val, tid);
+
+                if (tid < N / 2)
+                    Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
+            #endif
+            }
+        };
+
+        template <unsigned int N> struct GenericOptimized32
+        {
+            enum { M = N / 32 };
+
+            template <typename Pointer, typename Reference, class Op>
+            static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
+            {
+                const unsigned int laneId = Warp::laneId();
+
+            #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+                Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize);
+
+                if (laneId == 0)
+                    loadToSmem(smem, val, tid / 32);
+            #else
+                loadToSmem(smem, val, tid);
+
+                if (laneId < 16)
+                    Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op);
+
+                __syncthreads();
+
+                if (laneId == 0)
+                    loadToSmem(smem, val, tid / 32);
+            #endif
+
+                __syncthreads();
+
+                loadFromSmem(smem, val, tid);
+
+                if (tid < 32)
+                {
+                #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+                    Unroll<M / 2, Pointer, Reference, Op>::loopShfl(val, op, M);
+                #else
+                    Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
+                #endif
+                }
+            }
+        };
+
+        template <bool val, class T1, class T2> struct StaticIf;
+        template <class T1, class T2> struct StaticIf<true, T1, T2>
+        {
+            typedef T1 type;
+        };
+        template <class T1, class T2> struct StaticIf<false, T1, T2>
+        {
+            typedef T2 type;
+        };
+
+        template <unsigned int N> struct IsPowerOf2
+        {
+            enum { value = ((N != 0) && !(N & (N - 1))) };
+        };
+
+        template <unsigned int N> struct Dispatcher
+        {
+            typedef typename StaticIf<
+                (N <= 32) && IsPowerOf2<N>::value,
+                WarpOptimized<N>,
+                typename StaticIf<
+                    (N <= 1024) && IsPowerOf2<N>::value,
+                    GenericOptimized32<N>,
+                    Generic<N>
+                >::type
+            >::type reductor;
+        };
+    }
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_REDUCE_DETAIL_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/detail/reduce_key_val.hpp b/thirdparty/linux/include/opencv2/core/cuda/detail/reduce_key_val.hpp
new file mode 100644
index 0000000..6a537c9
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/detail/reduce_key_val.hpp
@@ -0,0 +1,502 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
+#define OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
+
+#include <thrust/tuple.h>
+#include "../warp.hpp"
+#include "../warp_shuffle.hpp"
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    namespace reduce_key_val_detail
+    {
+        template <typename T> struct GetType;
+        template <typename T> struct GetType<T*>
+        {
+            typedef T type;
+        };
+        template <typename T> struct GetType<volatile T*>
+        {
+            typedef T type;
+        };
+        template <typename T> struct GetType<T&>
+        {
+            typedef T type;
+        };
+
+        template <unsigned int I, unsigned int N>
+        struct For
+        {
+            template <class PointerTuple, class ReferenceTuple>
+            static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
+            {
+                thrust::get<I>(smem)[tid] = thrust::get<I>(data);
+
+                For<I + 1, N>::loadToSmem(smem, data, tid);
+            }
+            template <class PointerTuple, class ReferenceTuple>
+            static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
+            {
+                thrust::get<I>(data) = thrust::get<I>(smem)[tid];
+
+                For<I + 1, N>::loadFromSmem(smem, data, tid);
+            }
+
+            template <class ReferenceTuple>
+            static __device__ void copyShfl(const ReferenceTuple& val, unsigned int delta, int width)
+            {
+                thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
+
+                For<I + 1, N>::copyShfl(val, delta, width);
+            }
+            template <class PointerTuple, class ReferenceTuple>
+            static __device__ void copy(const PointerTuple& svals, const ReferenceTuple& val, unsigned int tid, unsigned int delta)
+            {
+                thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
+
+                For<I + 1, N>::copy(svals, val, tid, delta);
+            }
+
+            template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
+            static __device__ void mergeShfl(const KeyReferenceTuple& key, const ValReferenceTuple& val, const CmpTuple& cmp, unsigned int delta, int width)
+            {
+                typename GetType<typename thrust::tuple_element<I, KeyReferenceTuple>::type>::type reg = shfl_down(thrust::get<I>(key), delta, width);
+
+                if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
+                {
+                    thrust::get<I>(key) = reg;
+                    thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
+                }
+
+                For<I + 1, N>::mergeShfl(key, val, cmp, delta, width);
+            }
+            template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
+            static __device__ void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key,
+                                         const ValPointerTuple& svals, const ValReferenceTuple& val,
+                                         const CmpTuple& cmp,
+                                         unsigned int tid, unsigned int delta)
+            {
+                typename GetType<typename thrust::tuple_element<I, KeyPointerTuple>::type>::type reg = thrust::get<I>(skeys)[tid + delta];
+
+                if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
+                {
+                    thrust::get<I>(skeys)[tid] = thrust::get<I>(key) = reg;
+                    thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
+                }
+
+                For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta);
+            }
+        };
+        template <unsigned int N>
+        struct For<N, N>
+        {
+            template <class PointerTuple, class ReferenceTuple>
+            static __device__ void loadToSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
+            {
+            }
+            template <class PointerTuple, class ReferenceTuple>
+            static __device__ void loadFromSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
+            {
+            }
+
+            template <class ReferenceTuple>
+            static __device__ void copyShfl(const ReferenceTuple&, unsigned int, int)
+            {
+            }
+            template <class PointerTuple, class ReferenceTuple>
+            static __device__ void copy(const PointerTuple&, const ReferenceTuple&, unsigned int, unsigned int)
+            {
+            }
+
+            template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
+            static __device__ void mergeShfl(const KeyReferenceTuple&, const ValReferenceTuple&, const CmpTuple&, unsigned int, int)
+            {
+            }
+            template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
+            static __device__ void merge(const KeyPointerTuple&, const KeyReferenceTuple&,
+                                         const ValPointerTuple&, const ValReferenceTuple&,
+                                         const CmpTuple&,
+                                         unsigned int, unsigned int)
+            {
+            }
+        };
+
+        //////////////////////////////////////////////////////
+        // loadToSmem
+
+        template <typename T>
+        __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid)
+        {
+            smem[tid] = data;
+        }
+        template <typename T>
+        __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid)
+        {
+            data = smem[tid];
+        }
+        template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
+        __device__ __forceinline__ void loadToSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
+                                                   const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
+                                                   unsigned int tid)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
+        }
+        template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
+        __device__ __forceinline__ void loadFromSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
+                                                     const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
+                                                     unsigned int tid)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid);
+        }
+
+        //////////////////////////////////////////////////////
+        // copyVals
+
+        template <typename V>
+        __device__ __forceinline__ void copyValsShfl(V& val, unsigned int delta, int width)
+        {
+            val = shfl_down(val, delta, width);
+        }
+        template <typename V>
+        __device__ __forceinline__ void copyVals(volatile V* svals, V& val, unsigned int tid, unsigned int delta)
+        {
+            svals[tid] = val = svals[tid + delta];
+        }
+        template <typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
+        __device__ __forceinline__ void copyValsShfl(const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                     unsigned int delta,
+                                                     int width)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9> >::value>::copyShfl(val, delta, width);
+        }
+        template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
+        __device__ __forceinline__ void copyVals(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                                 const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                 unsigned int tid, unsigned int delta)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta);
+        }
+
+        //////////////////////////////////////////////////////
+        // merge
+
+        template <typename K, typename V, class Cmp>
+        __device__ __forceinline__ void mergeShfl(K& key, V& val, const Cmp& cmp, unsigned int delta, int width)
+        {
+            K reg = shfl_down(key, delta, width);
+
+            if (cmp(reg, key))
+            {
+                key = reg;
+                copyValsShfl(val, delta, width);
+            }
+        }
+        template <typename K, typename V, class Cmp>
+        __device__ __forceinline__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, unsigned int tid, unsigned int delta)
+        {
+            K reg = skeys[tid + delta];
+
+            if (cmp(reg, key))
+            {
+                skeys[tid] = key = reg;
+                copyVals(svals, val, tid, delta);
+            }
+        }
+        template <typename K,
+                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+                  class Cmp>
+        __device__ __forceinline__ void mergeShfl(K& key,
+                                                  const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                  const Cmp& cmp,
+                                                  unsigned int delta, int width)
+        {
+            K reg = shfl_down(key, delta, width);
+
+            if (cmp(reg, key))
+            {
+                key = reg;
+                copyValsShfl(val, delta, width);
+            }
+        }
+        template <typename K,
+                  typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+                  class Cmp>
+        __device__ __forceinline__ void merge(volatile K* skeys, K& key,
+                                              const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                              const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                              const Cmp& cmp, unsigned int tid, unsigned int delta)
+        {
+            K reg = skeys[tid + delta];
+
+            if (cmp(reg, key))
+            {
+                skeys[tid] = key = reg;
+                copyVals(svals, val, tid, delta);
+            }
+        }
+        template <typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
+                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+                  class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
+        __device__ __forceinline__ void mergeShfl(const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
+                                                  const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                  const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
+                                                  unsigned int delta, int width)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9> >::value>::mergeShfl(key, val, cmp, delta, width);
+        }
+        template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
+                  typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
+                  typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+                  class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
+        __device__ __forceinline__ void merge(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
+                                              const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
+                                              const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                              const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                              const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
+                                              unsigned int tid, unsigned int delta)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta);
+        }
+
+        //////////////////////////////////////////////////////
+        // Generic
+
+        template <unsigned int N> struct Generic
+        {
+            template <class KP, class KR, class VP, class VR, class Cmp>
+            static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
+            {
+                loadToSmem(skeys, key, tid);
+                loadValsToSmem(svals, val, tid);
+                if (N >= 32)
+                    __syncthreads();
+
+                if (N >= 2048)
+                {
+                    if (tid < 1024)
+                        merge(skeys, key, svals, val, cmp, tid, 1024);
+
+                    __syncthreads();
+                }
+                if (N >= 1024)
+                {
+                    if (tid < 512)
+                        merge(skeys, key, svals, val, cmp, tid, 512);
+
+                    __syncthreads();
+                }
+                if (N >= 512)
+                {
+                    if (tid < 256)
+                        merge(skeys, key, svals, val, cmp, tid, 256);
+
+                    __syncthreads();
+                }
+                if (N >= 256)
+                {
+                    if (tid < 128)
+                        merge(skeys, key, svals, val, cmp, tid, 128);
+
+                    __syncthreads();
+                }
+                if (N >= 128)
+                {
+                    if (tid < 64)
+                        merge(skeys, key, svals, val, cmp, tid, 64);
+
+                    __syncthreads();
+                }
+                if (N >= 64)
+                {
+                    if (tid < 32)
+                        merge(skeys, key, svals, val, cmp, tid, 32);
+                }
+
+                if (tid < 16)
+                {
+                    merge(skeys, key, svals, val, cmp, tid, 16);
+                    merge(skeys, key, svals, val, cmp, tid, 8);
+                    merge(skeys, key, svals, val, cmp, tid, 4);
+                    merge(skeys, key, svals, val, cmp, tid, 2);
+                    merge(skeys, key, svals, val, cmp, tid, 1);
+                }
+            }
+        };
+
+        template <unsigned int I, class KP, class KR, class VP, class VR, class Cmp>
+        struct Unroll
+        {
+            static __device__ void loopShfl(KR key, VR val, Cmp cmp, unsigned int N)
+            {
+                mergeShfl(key, val, cmp, I, N);
+                Unroll<I / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
+            }
+            static __device__ void loop(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
+            {
+                merge(skeys, key, svals, val, cmp, tid, I);
+                Unroll<I / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
+            }
+        };
+        template <class KP, class KR, class VP, class VR, class Cmp>
+        struct Unroll<0, KP, KR, VP, VR, Cmp>
+        {
+            static __device__ void loopShfl(KR, VR, Cmp, unsigned int)
+            {
+            }
+            static __device__ void loop(KP, KR, VP, VR, unsigned int, Cmp)
+            {
+            }
+        };
+
+        template <unsigned int N> struct WarpOptimized
+        {
+            template <class KP, class KR, class VP, class VR, class Cmp>
+            static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
+            {
+            #if 0 // __CUDA_ARCH__ >= 300
+                (void) skeys;
+                (void) svals;
+                (void) tid;
+
+                Unroll<N / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
+            #else
+                loadToSmem(skeys, key, tid);
+                loadToSmem(svals, val, tid);
+
+                if (tid < N / 2)
+                    Unroll<N / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
+            #endif
+            }
+        };
+
+        template <unsigned int N> struct GenericOptimized32
+        {
+            enum { M = N / 32 };
+
+            template <class KP, class KR, class VP, class VR, class Cmp>
+            static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
+            {
+                const unsigned int laneId = Warp::laneId();
+
+            #if 0 // __CUDA_ARCH__ >= 300
+                Unroll<16, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, warpSize);
+
+                if (laneId == 0)
+                {
+                    loadToSmem(skeys, key, tid / 32);
+                    loadToSmem(svals, val, tid / 32);
+                }
+            #else
+                loadToSmem(skeys, key, tid);
+                loadToSmem(svals, val, tid);
+
+                if (laneId < 16)
+                    Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
+
+                __syncthreads();
+
+                if (laneId == 0)
+                {
+                    loadToSmem(skeys, key, tid / 32);
+                    loadToSmem(svals, val, tid / 32);
+                }
+            #endif
+
+                __syncthreads();
+
+                loadFromSmem(skeys, key, tid);
+
+                if (tid < 32)
+                {
+                #if 0 // __CUDA_ARCH__ >= 300
+                    loadFromSmem(svals, val, tid);
+
+                    Unroll<M / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, M);
+                #else
+                    Unroll<M / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
+                #endif
+                }
+            }
+        };
+
+        template <bool val, class T1, class T2> struct StaticIf;
+        template <class T1, class T2> struct StaticIf<true, T1, T2>
+        {
+            typedef T1 type;
+        };
+        template <class T1, class T2> struct StaticIf<false, T1, T2>
+        {
+            typedef T2 type;
+        };
+
+        template <unsigned int N> struct IsPowerOf2
+        {
+            enum { value = ((N != 0) && !(N & (N - 1))) };
+        };
+
+        template <unsigned int N> struct Dispatcher
+        {
+            typedef typename StaticIf<
+                (N <= 32) && IsPowerOf2<N>::value,
+                WarpOptimized<N>,
+                typename StaticIf<
+                    (N <= 1024) && IsPowerOf2<N>::value,
+                    GenericOptimized32<N>,
+                    Generic<N>
+                >::type
+            >::type reductor;
+        };
+    }
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/detail/transform_detail.hpp b/thirdparty/linux/include/opencv2/core/cuda/detail/transform_detail.hpp
new file mode 100644
index 0000000..3b72b03
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/detail/transform_detail.hpp
@@ -0,0 +1,399 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_TRANSFORM_DETAIL_HPP
+#define OPENCV_CUDA_TRANSFORM_DETAIL_HPP
+
+#include "../common.hpp"
+#include "../vec_traits.hpp"
+#include "../functional.hpp"
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    namespace transform_detail
+    {
+        //! Read Write Traits
+
+        template <typename T, typename D, int shift> struct UnaryReadWriteTraits
+        {
+            typedef typename TypeVec<T, shift>::vec_type read_type;
+            typedef typename TypeVec<D, shift>::vec_type write_type;
+        };
+
+        template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
+        {
+            typedef typename TypeVec<T1, shift>::vec_type read_type1;
+            typedef typename TypeVec<T2, shift>::vec_type read_type2;
+            typedef typename TypeVec<D, shift>::vec_type write_type;
+        };
+
+        //! Transform kernels
+
+        template <int shift> struct OpUnroller;
+        template <> struct OpUnroller<1>
+        {
+            template <typename T, typename D, typename UnOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src.x);
+            }
+
+            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src1.x, src2.x);
+            }
+        };
+        template <> struct OpUnroller<2>
+        {
+            template <typename T, typename D, typename UnOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src.x);
+                if (mask(y, x_shifted + 1))
+                    dst.y = op(src.y);
+            }
+
+            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src1.x, src2.x);
+                if (mask(y, x_shifted + 1))
+                    dst.y = op(src1.y, src2.y);
+            }
+        };
+        template <> struct OpUnroller<3>
+        {
+            template <typename T, typename D, typename UnOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src.x);
+                if (mask(y, x_shifted + 1))
+                    dst.y = op(src.y);
+                if (mask(y, x_shifted + 2))
+                    dst.z = op(src.z);
+            }
+
+            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src1.x, src2.x);
+                if (mask(y, x_shifted + 1))
+                    dst.y = op(src1.y, src2.y);
+                if (mask(y, x_shifted + 2))
+                    dst.z = op(src1.z, src2.z);
+            }
+        };
+        template <> struct OpUnroller<4>
+        {
+            template <typename T, typename D, typename UnOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src.x);
+                if (mask(y, x_shifted + 1))
+                    dst.y = op(src.y);
+                if (mask(y, x_shifted + 2))
+                    dst.z = op(src.z);
+                if (mask(y, x_shifted + 3))
+                    dst.w = op(src.w);
+            }
+
+            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src1.x, src2.x);
+                if (mask(y, x_shifted + 1))
+                    dst.y = op(src1.y, src2.y);
+                if (mask(y, x_shifted + 2))
+                    dst.z = op(src1.z, src2.z);
+                if (mask(y, x_shifted + 3))
+                    dst.w = op(src1.w, src2.w);
+            }
+        };
+        template <> struct OpUnroller<8>
+        {
+            template <typename T, typename D, typename UnOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.a0 = op(src.a0);
+                if (mask(y, x_shifted + 1))
+                    dst.a1 = op(src.a1);
+                if (mask(y, x_shifted + 2))
+                    dst.a2 = op(src.a2);
+                if (mask(y, x_shifted + 3))
+                    dst.a3 = op(src.a3);
+                if (mask(y, x_shifted + 4))
+                    dst.a4 = op(src.a4);
+                if (mask(y, x_shifted + 5))
+                    dst.a5 = op(src.a5);
+                if (mask(y, x_shifted + 6))
+                    dst.a6 = op(src.a6);
+                if (mask(y, x_shifted + 7))
+                    dst.a7 = op(src.a7);
+            }
+
+            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.a0 = op(src1.a0, src2.a0);
+                if (mask(y, x_shifted + 1))
+                    dst.a1 = op(src1.a1, src2.a1);
+                if (mask(y, x_shifted + 2))
+                    dst.a2 = op(src1.a2, src2.a2);
+                if (mask(y, x_shifted + 3))
+                    dst.a3 = op(src1.a3, src2.a3);
+                if (mask(y, x_shifted + 4))
+                    dst.a4 = op(src1.a4, src2.a4);
+                if (mask(y, x_shifted + 5))
+                    dst.a5 = op(src1.a5, src2.a5);
+                if (mask(y, x_shifted + 6))
+                    dst.a6 = op(src1.a6, src2.a6);
+                if (mask(y, x_shifted + 7))
+                    dst.a7 = op(src1.a7, src2.a7);
+            }
+        };
+
+        template <typename T, typename D, typename UnOp, typename Mask>
+        static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
+        {
+            typedef TransformFunctorTraits<UnOp> ft;
+            typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
+            typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
+
+            const int x = threadIdx.x + blockIdx.x * blockDim.x;
+            const int y = threadIdx.y + blockIdx.y * blockDim.y;
+            const int x_shifted = x * ft::smart_shift;
+
+            if (y < src_.rows)
+            {
+                const T* src = src_.ptr(y);
+                D* dst = dst_.ptr(y);
+
+                if (x_shifted + ft::smart_shift - 1 < src_.cols)
+                {
+                    const read_type src_n_el = ((const read_type*)src)[x];
+                    write_type dst_n_el = ((const write_type*)dst)[x];
+
+                    OpUnroller<ft::smart_shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
+
+                    ((write_type*)dst)[x] = dst_n_el;
+                }
+                else
+                {
+                    for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
+                    {
+                        if (mask(y, real_x))
+                            dst[real_x] = op(src[real_x]);
+                    }
+                }
+            }
+        }
+
+        template <typename T, typename D, typename UnOp, typename Mask>
+        __global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
+        {
+            const int x = blockDim.x * blockIdx.x + threadIdx.x;
+            const int y = blockDim.y * blockIdx.y + threadIdx.y;
+
+            if (x < src.cols && y < src.rows && mask(y, x))
+            {
+                dst.ptr(y)[x] = op(src.ptr(y)[x]);
+            }
+        }
+
+        template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+        static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
+            const Mask mask, const BinOp op)
+        {
+            typedef TransformFunctorTraits<BinOp> ft;
+            typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
+            typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
+            typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
+
+            const int x = threadIdx.x + blockIdx.x * blockDim.x;
+            const int y = threadIdx.y + blockIdx.y * blockDim.y;
+            const int x_shifted = x * ft::smart_shift;
+
+            if (y < src1_.rows)
+            {
+                const T1* src1 = src1_.ptr(y);
+                const T2* src2 = src2_.ptr(y);
+                D* dst = dst_.ptr(y);
+
+                if (x_shifted + ft::smart_shift - 1 < src1_.cols)
+                {
+                    const read_type1 src1_n_el = ((const read_type1*)src1)[x];
+                    const read_type2 src2_n_el = ((const read_type2*)src2)[x];
+                    write_type dst_n_el = ((const write_type*)dst)[x];
+
+                    OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
+
+                    ((write_type*)dst)[x] = dst_n_el;
+                }
+                else
+                {
+                    for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
+                    {
+                        if (mask(y, real_x))
+                            dst[real_x] = op(src1[real_x], src2[real_x]);
+                    }
+                }
+            }
+        }
+
+        template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+        static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
+            const Mask mask, const BinOp op)
+        {
+            const int x = blockDim.x * blockIdx.x + threadIdx.x;
+            const int y = blockDim.y * blockIdx.y + threadIdx.y;
+
+            if (x < src1.cols && y < src1.rows && mask(y, x))
+            {
+                const T1 src1_data = src1.ptr(y)[x];
+                const T2 src2_data = src2.ptr(y)[x];
+                dst.ptr(y)[x] = op(src1_data, src2_data);
+            }
+        }
+
+        template <bool UseSmart> struct TransformDispatcher;
+        template<> struct TransformDispatcher<false>
+        {
+            template <typename T, typename D, typename UnOp, typename Mask>
+            static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
+            {
+                typedef TransformFunctorTraits<UnOp> ft;
+
+                const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
+                const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
+
+                transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
+                cudaSafeCall( cudaGetLastError() );
+
+                if (stream == 0)
+                    cudaSafeCall( cudaDeviceSynchronize() );
+            }
+
+            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+            static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
+            {
+                typedef TransformFunctorTraits<BinOp> ft;
+
+                const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
+                const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
+
+                transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
+                cudaSafeCall( cudaGetLastError() );
+
+                if (stream == 0)
+                    cudaSafeCall( cudaDeviceSynchronize() );
+            }
+        };
+        template<> struct TransformDispatcher<true>
+        {
+            template <typename T, typename D, typename UnOp, typename Mask>
+            static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
+            {
+                typedef TransformFunctorTraits<UnOp> ft;
+
+                CV_StaticAssert(ft::smart_shift != 1, "");
+
+                if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
+                    !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
+                {
+                    TransformDispatcher<false>::call(src, dst, op, mask, stream);
+                    return;
+                }
+
+                const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
+                const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
+
+                transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
+                cudaSafeCall( cudaGetLastError() );
+
+                if (stream == 0)
+                    cudaSafeCall( cudaDeviceSynchronize() );
+            }
+
+            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+            static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
+            {
+                typedef TransformFunctorTraits<BinOp> ft;
+
+                CV_StaticAssert(ft::smart_shift != 1, "");
+
+                if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
+                    !isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
+                    !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
+                {
+                    TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
+                    return;
+                }
+
+                const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
+                const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
+
+                transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
+                cudaSafeCall( cudaGetLastError() );
+
+                if (stream == 0)
+                    cudaSafeCall( cudaDeviceSynchronize() );
+            }
+        };
+    } // namespace transform_detail
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_TRANSFORM_DETAIL_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/detail/type_traits_detail.hpp b/thirdparty/linux/include/opencv2/core/cuda/detail/type_traits_detail.hpp
new file mode 100644
index 0000000..a78bd2c
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/detail/type_traits_detail.hpp
@@ -0,0 +1,191 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
+#define OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
+
+#include "../common.hpp"
+#include "../vec_traits.hpp"
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    namespace type_traits_detail
+    {
+        template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
+        template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
+
+        template <typename T> struct IsSignedIntergral { enum {value = 0}; };
+        template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
+        template <> struct IsSignedIntergral<char1> { enum {value = 1}; };
+        template <> struct IsSignedIntergral<short> { enum {value = 1}; };
+        template <> struct IsSignedIntergral<short1> { enum {value = 1}; };
+        template <> struct IsSignedIntergral<int> { enum {value = 1}; };
+        template <> struct IsSignedIntergral<int1> { enum {value = 1}; };
+
+        template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
+        template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
+        template <> struct IsUnsignedIntegral<uchar1> { enum {value = 1}; };
+        template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
+        template <> struct IsUnsignedIntegral<ushort1> { enum {value = 1}; };
+        template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
+        template <> struct IsUnsignedIntegral<uint1> { enum {value = 1}; };
+
+        template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
+        template <> struct IsIntegral<char> { enum {value = 1}; };
+        template <> struct IsIntegral<bool> { enum {value = 1}; };
+
+        template <typename T> struct IsFloat { enum {value = 0}; };
+        template <> struct IsFloat<float> { enum {value = 1}; };
+        template <> struct IsFloat<double> { enum {value = 1}; };
+
+        template <typename T> struct IsVec { enum {value = 0}; };
+        template <> struct IsVec<uchar1> { enum {value = 1}; };
+        template <> struct IsVec<uchar2> { enum {value = 1}; };
+        template <> struct IsVec<uchar3> { enum {value = 1}; };
+        template <> struct IsVec<uchar4> { enum {value = 1}; };
+        template <> struct IsVec<uchar8> { enum {value = 1}; };
+        template <> struct IsVec<char1> { enum {value = 1}; };
+        template <> struct IsVec<char2> { enum {value = 1}; };
+        template <> struct IsVec<char3> { enum {value = 1}; };
+        template <> struct IsVec<char4> { enum {value = 1}; };
+        template <> struct IsVec<char8> { enum {value = 1}; };
+        template <> struct IsVec<ushort1> { enum {value = 1}; };
+        template <> struct IsVec<ushort2> { enum {value = 1}; };
+        template <> struct IsVec<ushort3> { enum {value = 1}; };
+        template <> struct IsVec<ushort4> { enum {value = 1}; };
+        template <> struct IsVec<ushort8> { enum {value = 1}; };
+        template <> struct IsVec<short1> { enum {value = 1}; };
+        template <> struct IsVec<short2> { enum {value = 1}; };
+        template <> struct IsVec<short3> { enum {value = 1}; };
+        template <> struct IsVec<short4> { enum {value = 1}; };
+        template <> struct IsVec<short8> { enum {value = 1}; };
+        template <> struct IsVec<uint1> { enum {value = 1}; };
+        template <> struct IsVec<uint2> { enum {value = 1}; };
+        template <> struct IsVec<uint3> { enum {value = 1}; };
+        template <> struct IsVec<uint4> { enum {value = 1}; };
+        template <> struct IsVec<uint8> { enum {value = 1}; };
+        template <> struct IsVec<int1> { enum {value = 1}; };
+        template <> struct IsVec<int2> { enum {value = 1}; };
+        template <> struct IsVec<int3> { enum {value = 1}; };
+        template <> struct IsVec<int4> { enum {value = 1}; };
+        template <> struct IsVec<int8> { enum {value = 1}; };
+        template <> struct IsVec<float1> { enum {value = 1}; };
+        template <> struct IsVec<float2> { enum {value = 1}; };
+        template <> struct IsVec<float3> { enum {value = 1}; };
+        template <> struct IsVec<float4> { enum {value = 1}; };
+        template <> struct IsVec<float8> { enum {value = 1}; };
+        template <> struct IsVec<double1> { enum {value = 1}; };
+        template <> struct IsVec<double2> { enum {value = 1}; };
+        template <> struct IsVec<double3> { enum {value = 1}; };
+        template <> struct IsVec<double4> { enum {value = 1}; };
+        template <> struct IsVec<double8> { enum {value = 1}; };
+
+        template <class U> struct AddParameterType { typedef const U& type; };
+        template <class U> struct AddParameterType<U&> { typedef U& type; };
+        template <> struct AddParameterType<void> { typedef void type; };
+
+        template <class U> struct ReferenceTraits
+        {
+            enum { value = false };
+            typedef U type;
+        };
+        template <class U> struct ReferenceTraits<U&>
+        {
+            enum { value = true };
+            typedef U type;
+        };
+
+        template <class U> struct PointerTraits
+        {
+            enum { value = false };
+            typedef void type;
+        };
+        template <class U> struct PointerTraits<U*>
+        {
+            enum { value = true };
+            typedef U type;
+        };
+        template <class U> struct PointerTraits<U*&>
+        {
+            enum { value = true };
+            typedef U type;
+        };
+
+        template <class U> struct UnConst
+        {
+            typedef U type;
+            enum { value = 0 };
+        };
+        template <class U> struct UnConst<const U>
+        {
+            typedef U type;
+            enum { value = 1 };
+        };
+        template <class U> struct UnConst<const U&>
+        {
+            typedef U& type;
+            enum { value = 1 };
+        };
+
+        template <class U> struct UnVolatile
+        {
+            typedef U type;
+            enum { value = 0 };
+        };
+        template <class U> struct UnVolatile<volatile U>
+        {
+            typedef U type;
+            enum { value = 1 };
+        };
+        template <class U> struct UnVolatile<volatile U&>
+        {
+            typedef U& type;
+            enum { value = 1 };
+        };
+    } // namespace type_traits_detail
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/detail/vec_distance_detail.hpp b/thirdparty/linux/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
new file mode 100644
index 0000000..8283a99
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
@@ -0,0 +1,121 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
+#define OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
+
+#include "../datamov_utils.hpp"
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    namespace vec_distance_detail
+    {
+        template <int THREAD_DIM, int N> struct UnrollVecDiffCached
+        {
+            template <typename Dist, typename T1, typename T2>
+            static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind)
+            {
+                if (ind < len)
+                {
+                    T1 val1 = *vecCached++;
+
+                    T2 val2;
+                    ForceGlob<T2>::Load(vecGlob, ind, val2);
+
+                    dist.reduceIter(val1, val2);
+
+                    UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM);
+                }
+            }
+
+            template <typename Dist, typename T1, typename T2>
+            static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist)
+            {
+                T1 val1 = *vecCached++;
+
+                T2 val2;
+                ForceGlob<T2>::Load(vecGlob, 0, val2);
+                vecGlob += THREAD_DIM;
+
+                dist.reduceIter(val1, val2);
+
+                UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist);
+            }
+        };
+        template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0>
+        {
+            template <typename Dist, typename T1, typename T2>
+            static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int)
+            {
+            }
+
+            template <typename Dist, typename T1, typename T2>
+            static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&)
+            {
+            }
+        };
+
+        template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator;
+        template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false>
+        {
+            template <typename Dist, typename T1, typename T2>
+            static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
+            {
+                UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid);
+            }
+        };
+        template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true>
+        {
+            template <typename Dist, typename T1, typename T2>
+            static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
+            {
+                UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
+            }
+        };
+    } // namespace vec_distance_detail
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/dynamic_smem.hpp b/thirdparty/linux/include/opencv2/core/cuda/dynamic_smem.hpp
new file mode 100644
index 0000000..42570c6
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/dynamic_smem.hpp
@@ -0,0 +1,88 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_DYNAMIC_SMEM_HPP
+#define OPENCV_CUDA_DYNAMIC_SMEM_HPP
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template<class T> struct DynamicSharedMem
+    {
+        __device__ __forceinline__ operator T*()
+        {
+            extern __shared__ int __smem[];
+            return (T*)__smem;
+        }
+
+        __device__ __forceinline__ operator const T*() const
+        {
+            extern __shared__ int __smem[];
+            return (T*)__smem;
+        }
+    };
+
+    // specialize for double to avoid unaligned memory access compile errors
+    template<> struct DynamicSharedMem<double>
+    {
+        __device__ __forceinline__ operator double*()
+        {
+            extern __shared__ double __smem_d[];
+            return (double*)__smem_d;
+        }
+
+        __device__ __forceinline__ operator const double*() const
+        {
+            extern __shared__ double __smem_d[];
+            return (double*)__smem_d;
+        }
+    };
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_DYNAMIC_SMEM_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/emulation.hpp b/thirdparty/linux/include/opencv2/core/cuda/emulation.hpp
new file mode 100644
index 0000000..d346865
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/emulation.hpp
@@ -0,0 +1,269 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_EMULATION_HPP_
+#define OPENCV_CUDA_EMULATION_HPP_
+
+#include "common.hpp"
+#include "warp_reduce.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    struct Emulation
+    {
+
+        static __device__ __forceinline__ int syncthreadsOr(int pred)
+        {
+#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
+                // just campilation stab
+                return 0;
+#else
+                return __syncthreads_or(pred);
+#endif
+        }
+
+        template<int CTA_SIZE>
+        static __forceinline__ __device__ int Ballot(int predicate)
+        {
+#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
+            return __ballot(predicate);
+#else
+            __shared__ volatile int cta_buffer[CTA_SIZE];
+
+            int tid = threadIdx.x;
+            cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
+            return warp_reduce(cta_buffer);
+#endif
+        }
+
+        struct smem
+        {
+            enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
+
+            template<typename T>
+            static __device__ __forceinline__ T atomicInc(T* address, T val)
+            {
+#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
+                T count;
+                unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
+                do
+                {
+                    count = *address & TAG_MASK;
+                    count = tag | (count + 1);
+                    *address = count;
+                } while (*address != count);
+
+                return (count & TAG_MASK) - 1;
+#else
+                return ::atomicInc(address, val);
+#endif
+            }
+
+            template<typename T>
+            static __device__ __forceinline__ T atomicAdd(T* address, T val)
+            {
+#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
+                T count;
+                unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
+                do
+                {
+                    count = *address & TAG_MASK;
+                    count = tag | (count + val);
+                    *address = count;
+                } while (*address != count);
+
+                return (count & TAG_MASK) - val;
+#else
+                return ::atomicAdd(address, val);
+#endif
+            }
+
+            template<typename T>
+            static __device__ __forceinline__ T atomicMin(T* address, T val)
+            {
+#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
+                T count = ::min(*address, val);
+                do
+                {
+                    *address = count;
+                } while (*address > count);
+
+                return count;
+#else
+                return ::atomicMin(address, val);
+#endif
+            }
+        }; // struct cmem
+
+        struct glob
+        {
+            static __device__ __forceinline__ int atomicAdd(int* address, int val)
+            {
+                return ::atomicAdd(address, val);
+            }
+            static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val)
+            {
+                return ::atomicAdd(address, val);
+            }
+            static __device__ __forceinline__ float atomicAdd(float* address, float val)
+            {
+            #if __CUDA_ARCH__ >= 200
+                return ::atomicAdd(address, val);
+            #else
+                int* address_as_i = (int*) address;
+                int old = *address_as_i, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_i, assumed,
+                        __float_as_int(val + __int_as_float(assumed)));
+                } while (assumed != old);
+                return __int_as_float(old);
+            #endif
+            }
+            static __device__ __forceinline__ double atomicAdd(double* address, double val)
+            {
+            #if __CUDA_ARCH__ >= 130
+                unsigned long long int* address_as_ull = (unsigned long long int*) address;
+                unsigned long long int old = *address_as_ull, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_ull, assumed,
+                        __double_as_longlong(val + __longlong_as_double(assumed)));
+                } while (assumed != old);
+                return __longlong_as_double(old);
+            #else
+                (void) address;
+                (void) val;
+                return 0.0;
+            #endif
+            }
+
+            static __device__ __forceinline__ int atomicMin(int* address, int val)
+            {
+                return ::atomicMin(address, val);
+            }
+            static __device__ __forceinline__ float atomicMin(float* address, float val)
+            {
+            #if __CUDA_ARCH__ >= 120
+                int* address_as_i = (int*) address;
+                int old = *address_as_i, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_i, assumed,
+                        __float_as_int(::fminf(val, __int_as_float(assumed))));
+                } while (assumed != old);
+                return __int_as_float(old);
+            #else
+                (void) address;
+                (void) val;
+                return 0.0f;
+            #endif
+            }
+            static __device__ __forceinline__ double atomicMin(double* address, double val)
+            {
+            #if __CUDA_ARCH__ >= 130
+                unsigned long long int* address_as_ull = (unsigned long long int*) address;
+                unsigned long long int old = *address_as_ull, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_ull, assumed,
+                        __double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
+                } while (assumed != old);
+                return __longlong_as_double(old);
+            #else
+                (void) address;
+                (void) val;
+                return 0.0;
+            #endif
+            }
+
+            static __device__ __forceinline__ int atomicMax(int* address, int val)
+            {
+                return ::atomicMax(address, val);
+            }
+            static __device__ __forceinline__ float atomicMax(float* address, float val)
+            {
+            #if __CUDA_ARCH__ >= 120
+                int* address_as_i = (int*) address;
+                int old = *address_as_i, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_i, assumed,
+                        __float_as_int(::fmaxf(val, __int_as_float(assumed))));
+                } while (assumed != old);
+                return __int_as_float(old);
+            #else
+                (void) address;
+                (void) val;
+                return 0.0f;
+            #endif
+            }
+            static __device__ __forceinline__ double atomicMax(double* address, double val)
+            {
+            #if __CUDA_ARCH__ >= 130
+                unsigned long long int* address_as_ull = (unsigned long long int*) address;
+                unsigned long long int old = *address_as_ull, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_ull, assumed,
+                        __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
+                } while (assumed != old);
+                return __longlong_as_double(old);
+            #else
+                (void) address;
+                (void) val;
+                return 0.0;
+            #endif
+            }
+        };
+    }; //struct Emulation
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif /* OPENCV_CUDA_EMULATION_HPP_ */
diff --git a/thirdparty/linux/include/opencv2/core/cuda/filters.hpp b/thirdparty/linux/include/opencv2/core/cuda/filters.hpp
new file mode 100644
index 0000000..c2e24dd
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/filters.hpp
@@ -0,0 +1,286 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_FILTERS_HPP
+#define OPENCV_CUDA_FILTERS_HPP
+
+#include "saturate_cast.hpp"
+#include "vec_traits.hpp"
+#include "vec_math.hpp"
+#include "type_traits.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template <typename Ptr2D> struct PointFilter
+    {
+        typedef typename Ptr2D::elem_type elem_type;
+        typedef float index_type;
+
+        explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
+        : src(src_)
+        {
+            (void)fx;
+            (void)fy;
+        }
+
+        __device__ __forceinline__ elem_type operator ()(float y, float x) const
+        {
+            return src(__float2int_rz(y), __float2int_rz(x));
+        }
+
+        Ptr2D src;
+    };
+
+    template <typename Ptr2D> struct LinearFilter
+    {
+        typedef typename Ptr2D::elem_type elem_type;
+        typedef float index_type;
+
+        explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
+        : src(src_)
+        {
+            (void)fx;
+            (void)fy;
+        }
+        __device__ __forceinline__ elem_type operator ()(float y, float x) const
+        {
+            typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
+
+            work_type out = VecTraits<work_type>::all(0);
+
+            const int x1 = __float2int_rd(x);
+            const int y1 = __float2int_rd(y);
+            const int x2 = x1 + 1;
+            const int y2 = y1 + 1;
+
+            elem_type src_reg = src(y1, x1);
+            out = out + src_reg * ((x2 - x) * (y2 - y));
+
+            src_reg = src(y1, x2);
+            out = out + src_reg * ((x - x1) * (y2 - y));
+
+            src_reg = src(y2, x1);
+            out = out + src_reg * ((x2 - x) * (y - y1));
+
+            src_reg = src(y2, x2);
+            out = out + src_reg * ((x - x1) * (y - y1));
+
+            return saturate_cast<elem_type>(out);
+        }
+
+        Ptr2D src;
+    };
+
+    template <typename Ptr2D> struct CubicFilter
+    {
+        typedef typename Ptr2D::elem_type elem_type;
+        typedef float index_type;
+        typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
+
+        explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
+        : src(src_)
+        {
+            (void)fx;
+            (void)fy;
+        }
+
+        static __device__ __forceinline__ float bicubicCoeff(float x_)
+        {
+            float x = fabsf(x_);
+            if (x <= 1.0f)
+            {
+                return x * x * (1.5f * x - 2.5f) + 1.0f;
+            }
+            else if (x < 2.0f)
+            {
+                return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
+            }
+            else
+            {
+                return 0.0f;
+            }
+        }
+
+        __device__ elem_type operator ()(float y, float x) const
+        {
+            const float xmin = ::ceilf(x - 2.0f);
+            const float xmax = ::floorf(x + 2.0f);
+
+            const float ymin = ::ceilf(y - 2.0f);
+            const float ymax = ::floorf(y + 2.0f);
+
+            work_type sum = VecTraits<work_type>::all(0);
+            float wsum = 0.0f;
+
+            for (float cy = ymin; cy <= ymax; cy += 1.0f)
+            {
+                for (float cx = xmin; cx <= xmax; cx += 1.0f)
+                {
+                    const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
+                    sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx));
+                    wsum += w;
+                }
+            }
+
+            work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum;
+
+            return saturate_cast<elem_type>(res);
+        }
+
+        Ptr2D src;
+    };
+    // for integer scaling
+    template <typename Ptr2D> struct IntegerAreaFilter
+    {
+        typedef typename Ptr2D::elem_type elem_type;
+        typedef float index_type;
+
+        explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
+            : src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
+
+        __device__ __forceinline__ elem_type operator ()(float y, float x) const
+        {
+            float fsx1 = x * scale_x;
+            float fsx2 = fsx1 + scale_x;
+
+            int sx1 = __float2int_ru(fsx1);
+            int sx2 = __float2int_rd(fsx2);
+
+            float fsy1 = y * scale_y;
+            float fsy2 = fsy1 + scale_y;
+
+            int sy1 = __float2int_ru(fsy1);
+            int sy2 = __float2int_rd(fsy2);
+
+            typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
+            work_type out = VecTraits<work_type>::all(0.f);
+
+            for(int dy = sy1; dy < sy2; ++dy)
+                for(int dx = sx1; dx < sx2; ++dx)
+                {
+                    out = out + src(dy, dx) * scale;
+                }
+
+            return saturate_cast<elem_type>(out);
+        }
+
+        Ptr2D src;
+        float scale_x, scale_y ,scale;
+    };
+
+    template <typename Ptr2D> struct AreaFilter
+    {
+        typedef typename Ptr2D::elem_type elem_type;
+        typedef float index_type;
+
+        explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
+            : src(src_), scale_x(scale_x_), scale_y(scale_y_){}
+
+        __device__ __forceinline__ elem_type operator ()(float y, float x) const
+        {
+            float fsx1 = x * scale_x;
+            float fsx2 = fsx1 + scale_x;
+
+            int sx1 = __float2int_ru(fsx1);
+            int sx2 = __float2int_rd(fsx2);
+
+            float fsy1 = y * scale_y;
+            float fsy2 = fsy1 + scale_y;
+
+            int sy1 = __float2int_ru(fsy1);
+            int sy2 = __float2int_rd(fsy2);
+
+            float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
+
+            typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
+            work_type out = VecTraits<work_type>::all(0.f);
+
+            for (int dy = sy1; dy < sy2; ++dy)
+            {
+                for (int dx = sx1; dx < sx2; ++dx)
+                    out = out + src(dy, dx) * scale;
+
+                if (sx1 > fsx1)
+                    out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
+
+                if (sx2 < fsx2)
+                    out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
+            }
+
+            if (sy1 > fsy1)
+                for (int dx = sx1; dx < sx2; ++dx)
+                    out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
+
+            if (sy2 < fsy2)
+                for (int dx = sx1; dx < sx2; ++dx)
+                    out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
+
+            if ((sy1 > fsy1) &&  (sx1 > fsx1))
+                out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
+
+            if ((sy1 > fsy1) &&  (sx2 < fsx2))
+                out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
+
+            if ((sy2 < fsy2) &&  (sx2 < fsx2))
+                out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
+
+            if ((sy2 < fsy2) &&  (sx1 > fsx1))
+                out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
+
+            return saturate_cast<elem_type>(out);
+        }
+
+        Ptr2D src;
+        float scale_x, scale_y;
+        int width, haight;
+    };
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_FILTERS_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/funcattrib.hpp b/thirdparty/linux/include/opencv2/core/cuda/funcattrib.hpp
new file mode 100644
index 0000000..f582080
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/funcattrib.hpp
@@ -0,0 +1,79 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP
+#define OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP
+
+#include <cstdio>
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template<class Func>
+    void printFuncAttrib(Func& func)
+    {
+
+        cudaFuncAttributes attrs;
+        cudaFuncGetAttributes(&attrs, func);
+
+        printf("=== Function stats ===\n");
+        printf("Name: \n");
+        printf("sharedSizeBytes    = %d\n", attrs.sharedSizeBytes);
+        printf("constSizeBytes     = %d\n", attrs.constSizeBytes);
+        printf("localSizeBytes     = %d\n", attrs.localSizeBytes);
+        printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock);
+        printf("numRegs            = %d\n", attrs.numRegs);
+        printf("ptxVersion         = %d\n", attrs.ptxVersion);
+        printf("binaryVersion      = %d\n", attrs.binaryVersion);
+        printf("\n");
+        fflush(stdout);
+    }
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif  /* OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP */
diff --git a/thirdparty/linux/include/opencv2/core/cuda/functional.hpp b/thirdparty/linux/include/opencv2/core/cuda/functional.hpp
new file mode 100644
index 0000000..5b8a7eb
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/functional.hpp
@@ -0,0 +1,797 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_FUNCTIONAL_HPP
+#define OPENCV_CUDA_FUNCTIONAL_HPP
+
+#include <functional>
+#include "saturate_cast.hpp"
+#include "vec_traits.hpp"
+#include "type_traits.hpp"
+#include "device_functions.h"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    // Function Objects
+    template<typename Argument, typename Result> struct unary_function : public std::unary_function<Argument, Result> {};
+    template<typename Argument1, typename Argument2, typename Result> struct binary_function : public std::binary_function<Argument1, Argument2, Result> {};
+
+    // Arithmetic Operations
+    template <typename T> struct plus : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a + b;
+        }
+        __host__ __device__ __forceinline__ plus() {}
+        __host__ __device__ __forceinline__ plus(const plus&) {}
+    };
+
+    template <typename T> struct minus : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a - b;
+        }
+        __host__ __device__ __forceinline__ minus() {}
+        __host__ __device__ __forceinline__ minus(const minus&) {}
+    };
+
+    template <typename T> struct multiplies : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a * b;
+        }
+        __host__ __device__ __forceinline__ multiplies() {}
+        __host__ __device__ __forceinline__ multiplies(const multiplies&) {}
+    };
+
+    template <typename T> struct divides : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a / b;
+        }
+        __host__ __device__ __forceinline__ divides() {}
+        __host__ __device__ __forceinline__ divides(const divides&) {}
+    };
+
+    template <typename T> struct modulus : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a % b;
+        }
+        __host__ __device__ __forceinline__ modulus() {}
+        __host__ __device__ __forceinline__ modulus(const modulus&) {}
+    };
+
+    template <typename T> struct negate : unary_function<T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a) const
+        {
+            return -a;
+        }
+        __host__ __device__ __forceinline__ negate() {}
+        __host__ __device__ __forceinline__ negate(const negate&) {}
+    };
+
+    // Comparison Operations
+    template <typename T> struct equal_to : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a == b;
+        }
+        __host__ __device__ __forceinline__ equal_to() {}
+        __host__ __device__ __forceinline__ equal_to(const equal_to&) {}
+    };
+
+    template <typename T> struct not_equal_to : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a != b;
+        }
+        __host__ __device__ __forceinline__ not_equal_to() {}
+        __host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {}
+    };
+
+    template <typename T> struct greater : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a > b;
+        }
+        __host__ __device__ __forceinline__ greater() {}
+        __host__ __device__ __forceinline__ greater(const greater&) {}
+    };
+
+    template <typename T> struct less : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a < b;
+        }
+        __host__ __device__ __forceinline__ less() {}
+        __host__ __device__ __forceinline__ less(const less&) {}
+    };
+
+    template <typename T> struct greater_equal : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a >= b;
+        }
+        __host__ __device__ __forceinline__ greater_equal() {}
+        __host__ __device__ __forceinline__ greater_equal(const greater_equal&) {}
+    };
+
+    template <typename T> struct less_equal : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a <= b;
+        }
+        __host__ __device__ __forceinline__ less_equal() {}
+        __host__ __device__ __forceinline__ less_equal(const less_equal&) {}
+    };
+
+    // Logical Operations
+    template <typename T> struct logical_and : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a && b;
+        }
+        __host__ __device__ __forceinline__ logical_and() {}
+        __host__ __device__ __forceinline__ logical_and(const logical_and&) {}
+    };
+
+    template <typename T> struct logical_or : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a || b;
+        }
+        __host__ __device__ __forceinline__ logical_or() {}
+        __host__ __device__ __forceinline__ logical_or(const logical_or&) {}
+    };
+
+    template <typename T> struct logical_not : unary_function<T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a) const
+        {
+            return !a;
+        }
+        __host__ __device__ __forceinline__ logical_not() {}
+        __host__ __device__ __forceinline__ logical_not(const logical_not&) {}
+    };
+
+    // Bitwise Operations
+    template <typename T> struct bit_and : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a & b;
+        }
+        __host__ __device__ __forceinline__ bit_and() {}
+        __host__ __device__ __forceinline__ bit_and(const bit_and&) {}
+    };
+
+    template <typename T> struct bit_or : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a | b;
+        }
+        __host__ __device__ __forceinline__ bit_or() {}
+        __host__ __device__ __forceinline__ bit_or(const bit_or&) {}
+    };
+
+    template <typename T> struct bit_xor : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a ^ b;
+        }
+        __host__ __device__ __forceinline__ bit_xor() {}
+        __host__ __device__ __forceinline__ bit_xor(const bit_xor&) {}
+    };
+
+    template <typename T> struct bit_not : unary_function<T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType v) const
+        {
+            return ~v;
+        }
+        __host__ __device__ __forceinline__ bit_not() {}
+        __host__ __device__ __forceinline__ bit_not(const bit_not&) {}
+    };
+
+    // Generalized Identity Operations
+    template <typename T> struct identity : unary_function<T, T>
+    {
+        __device__ __forceinline__ typename TypeTraits<T>::ParameterType operator()(typename TypeTraits<T>::ParameterType x) const
+        {
+            return x;
+        }
+        __host__ __device__ __forceinline__ identity() {}
+        __host__ __device__ __forceinline__ identity(const identity&) {}
+    };
+
+    template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
+    {
+        __device__ __forceinline__ typename TypeTraits<T1>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
+        {
+            return lhs;
+        }
+        __host__ __device__ __forceinline__ project1st() {}
+        __host__ __device__ __forceinline__ project1st(const project1st&) {}
+    };
+
+    template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
+    {
+        __device__ __forceinline__ typename TypeTraits<T2>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
+        {
+            return rhs;
+        }
+        __host__ __device__ __forceinline__ project2nd() {}
+        __host__ __device__ __forceinline__ project2nd(const project2nd&) {}
+    };
+
+    // Min/Max Operations
+
+#define OPENCV_CUDA_IMPLEMENT_MINMAX(name, type, op) \
+    template <> struct name<type> : binary_function<type, type, type> \
+    { \
+        __device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
+        __host__ __device__ __forceinline__ name() {}\
+        __host__ __device__ __forceinline__ name(const name&) {}\
+    };
+
+    template <typename T> struct maximum : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
+        {
+            return max(lhs, rhs);
+        }
+        __host__ __device__ __forceinline__ maximum() {}
+        __host__ __device__ __forceinline__ maximum(const maximum&) {}
+    };
+
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uchar, ::max)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, schar, ::max)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, char, ::max)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, ushort, ::max)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, short, ::max)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, int, ::max)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uint, ::max)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, float, ::fmax)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, double, ::fmax)
+
+    template <typename T> struct minimum : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
+        {
+            return min(lhs, rhs);
+        }
+        __host__ __device__ __forceinline__ minimum() {}
+        __host__ __device__ __forceinline__ minimum(const minimum&) {}
+    };
+
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uchar, ::min)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, schar, ::min)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, char, ::min)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, ushort, ::min)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, short, ::min)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, int, ::min)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uint, ::min)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, float, ::fmin)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, double, ::fmin)
+
+#undef OPENCV_CUDA_IMPLEMENT_MINMAX
+
+    // Math functions
+
+    template <typename T> struct abs_func : unary_function<T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType x) const
+        {
+            return abs(x);
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<unsigned char> : unary_function<unsigned char, unsigned char>
+    {
+        __device__ __forceinline__ unsigned char operator ()(unsigned char x) const
+        {
+            return x;
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<signed char> : unary_function<signed char, signed char>
+    {
+        __device__ __forceinline__ signed char operator ()(signed char x) const
+        {
+            return ::abs((int)x);
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<char> : unary_function<char, char>
+    {
+        __device__ __forceinline__ char operator ()(char x) const
+        {
+            return ::abs((int)x);
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<unsigned short> : unary_function<unsigned short, unsigned short>
+    {
+        __device__ __forceinline__ unsigned short operator ()(unsigned short x) const
+        {
+            return x;
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<short> : unary_function<short, short>
+    {
+        __device__ __forceinline__ short operator ()(short x) const
+        {
+            return ::abs((int)x);
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<unsigned int> : unary_function<unsigned int, unsigned int>
+    {
+        __device__ __forceinline__ unsigned int operator ()(unsigned int x) const
+        {
+            return x;
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<int> : unary_function<int, int>
+    {
+        __device__ __forceinline__ int operator ()(int x) const
+        {
+            return ::abs(x);
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<float> : unary_function<float, float>
+    {
+        __device__ __forceinline__ float operator ()(float x) const
+        {
+            return ::fabsf(x);
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<double> : unary_function<double, double>
+    {
+        __device__ __forceinline__ double operator ()(double x) const
+        {
+            return ::fabs(x);
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+
+#define OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(name, func) \
+    template <typename T> struct name ## _func : unary_function<T, float> \
+    { \
+        __device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v) const \
+        { \
+            return func ## f(v); \
+        } \
+        __host__ __device__ __forceinline__ name ## _func() {} \
+        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
+    }; \
+    template <> struct name ## _func<double> : unary_function<double, double> \
+    { \
+        __device__ __forceinline__ double operator ()(double v) const \
+        { \
+            return func(v); \
+        } \
+        __host__ __device__ __forceinline__ name ## _func() {} \
+        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
+    };
+
+#define OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(name, func) \
+    template <typename T> struct name ## _func : binary_function<T, T, float> \
+    { \
+        __device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v1, typename TypeTraits<T>::ParameterType v2) const \
+        { \
+            return func ## f(v1, v2); \
+        } \
+        __host__ __device__ __forceinline__ name ## _func() {} \
+        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
+    }; \
+    template <> struct name ## _func<double> : binary_function<double, double, double> \
+    { \
+        __device__ __forceinline__ double operator ()(double v1, double v2) const \
+        { \
+            return func(v1, v2); \
+        } \
+        __host__ __device__ __forceinline__ name ## _func() {} \
+        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
+    };
+
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp, ::exp)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp10, ::exp10)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log, ::log)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log2, ::log2)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log10, ::log10)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sin, ::sin)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cos, ::cos)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tan, ::tan)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asin, ::asin)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acos, ::acos)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atan, ::atan)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sinh, ::sinh)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cosh, ::cosh)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tanh, ::tanh)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asinh, ::asinh)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acosh, ::acosh)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atanh, ::atanh)
+
+    OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(hypot, ::hypot)
+    OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(atan2, ::atan2)
+    OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(pow, ::pow)
+
+    #undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR
+    #undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR_NO_DOUBLE
+    #undef OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR
+
+    template<typename T> struct hypot_sqr_func : binary_function<T, T, float>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType src1, typename TypeTraits<T>::ParameterType src2) const
+        {
+            return src1 * src1 + src2 * src2;
+        }
+        __host__ __device__ __forceinline__ hypot_sqr_func() {}
+        __host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {}
+    };
+
+    // Saturate Cast Functor
+    template <typename T, typename D> struct saturate_cast_func : unary_function<T, D>
+    {
+        __device__ __forceinline__ D operator ()(typename TypeTraits<T>::ParameterType v) const
+        {
+            return saturate_cast<D>(v);
+        }
+        __host__ __device__ __forceinline__ saturate_cast_func() {}
+        __host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {}
+    };
+
+    // Threshold Functors
+    template <typename T> struct thresh_binary_func : unary_function<T, T>
+    {
+        __host__ __device__ __forceinline__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
+
+        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
+        {
+            return (src > thresh) * maxVal;
+        }
+
+        __host__ __device__ __forceinline__ thresh_binary_func() {}
+        __host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
+            : thresh(other.thresh), maxVal(other.maxVal) {}
+
+        T thresh;
+        T maxVal;
+    };
+
+    template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
+    {
+        __host__ __device__ __forceinline__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
+
+        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
+        {
+            return (src <= thresh) * maxVal;
+        }
+
+        __host__ __device__ __forceinline__ thresh_binary_inv_func() {}
+        __host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
+            : thresh(other.thresh), maxVal(other.maxVal) {}
+
+        T thresh;
+        T maxVal;
+    };
+
+    template <typename T> struct thresh_trunc_func : unary_function<T, T>
+    {
+        explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
+
+        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
+        {
+            return minimum<T>()(src, thresh);
+        }
+
+        __host__ __device__ __forceinline__ thresh_trunc_func() {}
+        __host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
+            : thresh(other.thresh) {}
+
+        T thresh;
+    };
+
+    template <typename T> struct thresh_to_zero_func : unary_function<T, T>
+    {
+        explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
+
+        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
+        {
+            return (src > thresh) * src;
+        }
+
+        __host__ __device__ __forceinline__ thresh_to_zero_func() {}
+       __host__  __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
+            : thresh(other.thresh) {}
+
+        T thresh;
+    };
+
+    template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
+    {
+        explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
+
+        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
+        {
+            return (src <= thresh) * src;
+        }
+
+        __host__ __device__ __forceinline__ thresh_to_zero_inv_func() {}
+        __host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
+            : thresh(other.thresh) {}
+
+        T thresh;
+    };
+
+    // Function Object Adaptors
+    template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
+    {
+      explicit __host__ __device__ __forceinline__ unary_negate(const Predicate& p) : pred(p) {}
+
+      __device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::argument_type>::ParameterType x) const
+      {
+          return !pred(x);
+      }
+
+      __host__ __device__ __forceinline__ unary_negate() {}
+      __host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {}
+
+      Predicate pred;
+    };
+
+    template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred)
+    {
+        return unary_negate<Predicate>(pred);
+    }
+
+    template <typename Predicate> struct binary_negate : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>
+    {
+        explicit __host__ __device__ __forceinline__ binary_negate(const Predicate& p) : pred(p) {}
+
+        __device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::first_argument_type>::ParameterType x,
+                                                   typename TypeTraits<typename Predicate::second_argument_type>::ParameterType y) const
+        {
+            return !pred(x,y);
+        }
+
+        __host__ __device__ __forceinline__ binary_negate() {}
+        __host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {}
+
+        Predicate pred;
+    };
+
+    template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
+    {
+        return binary_negate<BinaryPredicate>(pred);
+    }
+
+    template <typename Op> struct binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type>
+    {
+        __host__ __device__ __forceinline__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {}
+
+        __device__ __forceinline__ typename Op::result_type operator ()(typename TypeTraits<typename Op::second_argument_type>::ParameterType a) const
+        {
+            return op(arg1, a);
+        }
+
+        __host__ __device__ __forceinline__ binder1st() {}
+        __host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {}
+
+        Op op;
+        typename Op::first_argument_type arg1;
+    };
+
+    template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x)
+    {
+        return binder1st<Op>(op, typename Op::first_argument_type(x));
+    }
+
+    template <typename Op> struct binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type>
+    {
+        __host__ __device__ __forceinline__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {}
+
+        __forceinline__ __device__ typename Op::result_type operator ()(typename TypeTraits<typename Op::first_argument_type>::ParameterType a) const
+        {
+            return op(a, arg2);
+        }
+
+        __host__ __device__ __forceinline__ binder2nd() {}
+        __host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {}
+
+        Op op;
+        typename Op::second_argument_type arg2;
+    };
+
+    template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x)
+    {
+        return binder2nd<Op>(op, typename Op::second_argument_type(x));
+    }
+
+    // Functor Traits
+    template <typename F> struct IsUnaryFunction
+    {
+        typedef char Yes;
+        struct No {Yes a[2];};
+
+        template <typename T, typename D> static Yes check(unary_function<T, D>);
+        static No check(...);
+
+        static F makeF();
+
+        enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
+    };
+
+    template <typename F> struct IsBinaryFunction
+    {
+        typedef char Yes;
+        struct No {Yes a[2];};
+
+        template <typename T1, typename T2, typename D> static Yes check(binary_function<T1, T2, D>);
+        static No check(...);
+
+        static F makeF();
+
+        enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
+    };
+
+    namespace functional_detail
+    {
+        template <size_t src_elem_size, size_t dst_elem_size> struct UnOpShift { enum { shift = 1 }; };
+        template <size_t src_elem_size> struct UnOpShift<src_elem_size, 1> { enum { shift = 4 }; };
+        template <size_t src_elem_size> struct UnOpShift<src_elem_size, 2> { enum { shift = 2 }; };
+
+        template <typename T, typename D> struct DefaultUnaryShift
+        {
+            enum { shift = UnOpShift<sizeof(T), sizeof(D)>::shift };
+        };
+
+        template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size> struct BinOpShift { enum { shift = 1 }; };
+        template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 1> { enum { shift = 4 }; };
+        template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 2> { enum { shift = 2 }; };
+
+        template <typename T1, typename T2, typename D> struct DefaultBinaryShift
+        {
+            enum { shift = BinOpShift<sizeof(T1), sizeof(T2), sizeof(D)>::shift };
+        };
+
+        template <typename Func, bool unary = IsUnaryFunction<Func>::value> struct ShiftDispatcher;
+        template <typename Func> struct ShiftDispatcher<Func, true>
+        {
+            enum { shift = DefaultUnaryShift<typename Func::argument_type, typename Func::result_type>::shift };
+        };
+        template <typename Func> struct ShiftDispatcher<Func, false>
+        {
+            enum { shift = DefaultBinaryShift<typename Func::first_argument_type, typename Func::second_argument_type, typename Func::result_type>::shift };
+        };
+    }
+
+    template <typename Func> struct DefaultTransformShift
+    {
+        enum { shift = functional_detail::ShiftDispatcher<Func>::shift };
+    };
+
+    template <typename Func> struct DefaultTransformFunctorTraits
+    {
+        enum { simple_block_dim_x = 16 };
+        enum { simple_block_dim_y = 16 };
+
+        enum { smart_block_dim_x = 16 };
+        enum { smart_block_dim_y = 16 };
+        enum { smart_shift = DefaultTransformShift<Func>::shift };
+    };
+
+    template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {};
+
+#define OPENCV_CUDA_TRANSFORM_FUNCTOR_TRAITS(type) \
+    template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_FUNCTIONAL_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/limits.hpp b/thirdparty/linux/include/opencv2/core/cuda/limits.hpp
new file mode 100644
index 0000000..7e15ed6
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/limits.hpp
@@ -0,0 +1,128 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_LIMITS_HPP
+#define OPENCV_CUDA_LIMITS_HPP
+
+#include <limits.h>
+#include <float.h>
+#include "common.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+template <class T> struct numeric_limits;
+
+template <> struct numeric_limits<bool>
+{
+    __device__ __forceinline__ static bool min() { return false; }
+    __device__ __forceinline__ static bool max() { return true;  }
+    static const bool is_signed = false;
+};
+
+template <> struct numeric_limits<signed char>
+{
+    __device__ __forceinline__ static signed char min() { return SCHAR_MIN; }
+    __device__ __forceinline__ static signed char max() { return SCHAR_MAX; }
+    static const bool is_signed = true;
+};
+
+template <> struct numeric_limits<unsigned char>
+{
+    __device__ __forceinline__ static unsigned char min() { return 0; }
+    __device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; }
+    static const bool is_signed = false;
+};
+
+template <> struct numeric_limits<short>
+{
+    __device__ __forceinline__ static short min() { return SHRT_MIN; }
+    __device__ __forceinline__ static short max() { return SHRT_MAX; }
+    static const bool is_signed = true;
+};
+
+template <> struct numeric_limits<unsigned short>
+{
+    __device__ __forceinline__ static unsigned short min() { return 0; }
+    __device__ __forceinline__ static unsigned short max() { return USHRT_MAX; }
+    static const bool is_signed = false;
+};
+
+template <> struct numeric_limits<int>
+{
+    __device__ __forceinline__ static int min() { return INT_MIN; }
+    __device__ __forceinline__ static int max() { return INT_MAX; }
+    static const bool is_signed = true;
+};
+
+template <> struct numeric_limits<unsigned int>
+{
+    __device__ __forceinline__ static unsigned int min() { return 0; }
+    __device__ __forceinline__ static unsigned int max() { return UINT_MAX; }
+    static const bool is_signed = false;
+};
+
+template <> struct numeric_limits<float>
+{
+    __device__ __forceinline__ static float min() { return FLT_MIN; }
+    __device__ __forceinline__ static float max() { return FLT_MAX; }
+    __device__ __forceinline__ static float epsilon() { return FLT_EPSILON; }
+    static const bool is_signed = true;
+};
+
+template <> struct numeric_limits<double>
+{
+    __device__ __forceinline__ static double min() { return DBL_MIN; }
+    __device__ __forceinline__ static double max() { return DBL_MAX; }
+    __device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
+    static const bool is_signed = true;
+};
+}}} // namespace cv { namespace cuda { namespace cudev {
+
+//! @endcond
+
+#endif // OPENCV_CUDA_LIMITS_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/reduce.hpp b/thirdparty/linux/include/opencv2/core/cuda/reduce.hpp
new file mode 100644
index 0000000..5de3650
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/reduce.hpp
@@ -0,0 +1,209 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_REDUCE_HPP
+#define OPENCV_CUDA_REDUCE_HPP
+
+#ifndef THRUST_DEBUG // eliminate -Wundef warning
+#define THRUST_DEBUG 0
+#endif
+
+#include <thrust/tuple.h>
+#include "detail/reduce.hpp"
+#include "detail/reduce_key_val.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template <int N, typename T, class Op>
+    __device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op)
+    {
+        reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
+    }
+    template <int N,
+              typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
+              typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
+              class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
+    __device__ __forceinline__ void reduce(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
+                                           const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                           unsigned int tid,
+                                           const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
+    {
+        reduce_detail::Dispatcher<N>::reductor::template reduce<
+                const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>&,
+                const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>&,
+                const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
+    }
+
+    template <unsigned int N, typename K, typename V, class Cmp>
+    __device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, unsigned int tid, const Cmp& cmp)
+    {
+        reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, volatile V*, V&, const Cmp&>(skeys, key, svals, val, tid, cmp);
+    }
+    template <unsigned int N,
+              typename K,
+              typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+              class Cmp>
+    __device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key,
+                                                 const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                                 const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                 unsigned int tid, const Cmp& cmp)
+    {
+        reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&,
+                const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
+                const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
+                const Cmp&>(skeys, key, svals, val, tid, cmp);
+    }
+    template <unsigned int N,
+              typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
+              typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
+              typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+              class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
+    __device__ __forceinline__ void reduceKeyVal(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
+                                                 const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
+                                                 const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                                 const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                 unsigned int tid,
+                                                 const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp)
+    {
+        reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<
+                const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>&,
+                const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>&,
+                const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
+                const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
+                const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>&
+                >(skeys, key, svals, val, tid, cmp);
+    }
+
+    // smem_tuple
+
+    template <typename T0>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*>
+    smem_tuple(T0* t0)
+    {
+        return thrust::make_tuple((volatile T0*) t0);
+    }
+
+    template <typename T0, typename T1>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*>
+    smem_tuple(T0* t0, T1* t1)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1);
+    }
+
+    template <typename T0, typename T1, typename T2>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*>
+    smem_tuple(T0* t0, T1* t1, T2* t2)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2);
+    }
+
+    template <typename T0, typename T1, typename T2, typename T3>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*>
+    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3);
+    }
+
+    template <typename T0, typename T1, typename T2, typename T3, typename T4>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*>
+    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4);
+    }
+
+    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*>
+    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5);
+    }
+
+    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*>
+    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6);
+    }
+
+    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*>
+    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7);
+    }
+
+    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*>
+    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8);
+    }
+
+    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*, volatile T9*>
+    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9);
+    }
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_REDUCE_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/saturate_cast.hpp b/thirdparty/linux/include/opencv2/core/cuda/saturate_cast.hpp
new file mode 100644
index 0000000..c3a3d1c
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/saturate_cast.hpp
@@ -0,0 +1,292 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_SATURATE_CAST_HPP
+#define OPENCV_CUDA_SATURATE_CAST_HPP
+
+#include "common.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
+
+    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
+    {
+        uint res = 0;
+        int vi = v;
+        asm("cvt.sat.u8.s8 %0, %1;" : "=r"(res) : "r"(vi));
+        return res;
+    }
+    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
+    {
+        uint res = 0;
+        asm("cvt.sat.u8.s16 %0, %1;" : "=r"(res) : "h"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
+    {
+        uint res = 0;
+        asm("cvt.sat.u8.u16 %0, %1;" : "=r"(res) : "h"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
+    {
+        uint res = 0;
+        asm("cvt.sat.u8.s32 %0, %1;" : "=r"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
+    {
+        uint res = 0;
+        asm("cvt.sat.u8.u32 %0, %1;" : "=r"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
+    {
+        uint res = 0;
+        asm("cvt.rni.sat.u8.f32 %0, %1;" : "=r"(res) : "f"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
+        uint res = 0;
+        asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v));
+        return res;
+    #else
+        return saturate_cast<uchar>((float)v);
+    #endif
+    }
+
+    template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
+    {
+        uint res = 0;
+        uint vi = v;
+        asm("cvt.sat.s8.u8 %0, %1;" : "=r"(res) : "r"(vi));
+        return res;
+    }
+    template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
+    {
+        uint res = 0;
+        asm("cvt.sat.s8.s16 %0, %1;" : "=r"(res) : "h"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
+    {
+        uint res = 0;
+        asm("cvt.sat.s8.u16 %0, %1;" : "=r"(res) : "h"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
+    {
+        uint res = 0;
+        asm("cvt.sat.s8.s32 %0, %1;" : "=r"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
+    {
+        uint res = 0;
+        asm("cvt.sat.s8.u32 %0, %1;" : "=r"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
+    {
+        uint res = 0;
+        asm("cvt.rni.sat.s8.f32 %0, %1;" : "=r"(res) : "f"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
+        uint res = 0;
+        asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v));
+        return res;
+    #else
+        return saturate_cast<schar>((float)v);
+    #endif
+    }
+
+    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
+    {
+        ushort res = 0;
+        int vi = v;
+        asm("cvt.sat.u16.s8 %0, %1;" : "=h"(res) : "r"(vi));
+        return res;
+    }
+    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
+    {
+        ushort res = 0;
+        asm("cvt.sat.u16.s16 %0, %1;" : "=h"(res) : "h"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
+    {
+        ushort res = 0;
+        asm("cvt.sat.u16.s32 %0, %1;" : "=h"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
+    {
+        ushort res = 0;
+        asm("cvt.sat.u16.u32 %0, %1;" : "=h"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
+    {
+        ushort res = 0;
+        asm("cvt.rni.sat.u16.f32 %0, %1;" : "=h"(res) : "f"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
+        ushort res = 0;
+        asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v));
+        return res;
+    #else
+        return saturate_cast<ushort>((float)v);
+    #endif
+    }
+
+    template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
+    {
+        short res = 0;
+        asm("cvt.sat.s16.u16 %0, %1;" : "=h"(res) : "h"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ short saturate_cast<short>(int v)
+    {
+        short res = 0;
+        asm("cvt.sat.s16.s32 %0, %1;" : "=h"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
+    {
+        short res = 0;
+        asm("cvt.sat.s16.u32 %0, %1;" : "=h"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ short saturate_cast<short>(float v)
+    {
+        short res = 0;
+        asm("cvt.rni.sat.s16.f32 %0, %1;" : "=h"(res) : "f"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ short saturate_cast<short>(double v)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
+        short res = 0;
+        asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v));
+        return res;
+    #else
+        return saturate_cast<short>((float)v);
+    #endif
+    }
+
+    template<> __device__ __forceinline__ int saturate_cast<int>(uint v)
+    {
+        int res = 0;
+        asm("cvt.sat.s32.u32 %0, %1;" : "=r"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ int saturate_cast<int>(float v)
+    {
+        return __float2int_rn(v);
+    }
+    template<> __device__ __forceinline__ int saturate_cast<int>(double v)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
+        return __double2int_rn(v);
+    #else
+        return saturate_cast<int>((float)v);
+    #endif
+    }
+
+    template<> __device__ __forceinline__ uint saturate_cast<uint>(schar v)
+    {
+        uint res = 0;
+        int vi = v;
+        asm("cvt.sat.u32.s8 %0, %1;" : "=r"(res) : "r"(vi));
+        return res;
+    }
+    template<> __device__ __forceinline__ uint saturate_cast<uint>(short v)
+    {
+        uint res = 0;
+        asm("cvt.sat.u32.s16 %0, %1;" : "=r"(res) : "h"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ uint saturate_cast<uint>(int v)
+    {
+        uint res = 0;
+        asm("cvt.sat.u32.s32 %0, %1;" : "=r"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
+    {
+        return __float2uint_rn(v);
+    }
+    template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
+        return __double2uint_rn(v);
+    #else
+        return saturate_cast<uint>((float)v);
+    #endif
+    }
+}}}
+
+//! @endcond
+
+#endif /* OPENCV_CUDA_SATURATE_CAST_HPP */
diff --git a/thirdparty/linux/include/opencv2/core/cuda/scan.hpp b/thirdparty/linux/include/opencv2/core/cuda/scan.hpp
new file mode 100644
index 0000000..e07ee65
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/scan.hpp
@@ -0,0 +1,258 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_SCAN_HPP
+#define OPENCV_CUDA_SCAN_HPP
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/utility.hpp"
+#include "opencv2/core/cuda/warp.hpp"
+#include "opencv2/core/cuda/warp_shuffle.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    enum ScanKind { EXCLUSIVE = 0,  INCLUSIVE = 1 };
+
+    template <ScanKind Kind, typename T, typename F> struct WarpScan
+    {
+        __device__ __forceinline__ WarpScan() {}
+        __device__ __forceinline__ WarpScan(const WarpScan& other) { (void)other; }
+
+        __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
+        {
+            const unsigned int lane = idx & 31;
+            F op;
+
+            if ( lane >=  1) ptr [idx ] = op(ptr [idx -  1], ptr [idx]);
+            if ( lane >=  2) ptr [idx ] = op(ptr [idx -  2], ptr [idx]);
+            if ( lane >=  4) ptr [idx ] = op(ptr [idx -  4], ptr [idx]);
+            if ( lane >=  8) ptr [idx ] = op(ptr [idx -  8], ptr [idx]);
+            if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
+
+            if( Kind == INCLUSIVE )
+                return ptr [idx];
+            else
+                return (lane > 0) ? ptr [idx - 1] : 0;
+        }
+
+        __device__ __forceinline__ unsigned int index(const unsigned int tid)
+        {
+            return tid;
+        }
+
+        __device__ __forceinline__ void init(volatile T *ptr){}
+
+        static const int warp_offset      = 0;
+
+        typedef WarpScan<INCLUSIVE, T, F>  merge;
+    };
+
+    template <ScanKind Kind , typename T, typename F> struct WarpScanNoComp
+    {
+        __device__ __forceinline__ WarpScanNoComp() {}
+        __device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { (void)other; }
+
+        __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
+        {
+            const unsigned int lane = threadIdx.x & 31;
+            F op;
+
+            ptr [idx ] = op(ptr [idx -  1], ptr [idx]);
+            ptr [idx ] = op(ptr [idx -  2], ptr [idx]);
+            ptr [idx ] = op(ptr [idx -  4], ptr [idx]);
+            ptr [idx ] = op(ptr [idx -  8], ptr [idx]);
+            ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
+
+            if( Kind == INCLUSIVE )
+                return ptr [idx];
+            else
+                return (lane > 0) ? ptr [idx - 1] : 0;
+        }
+
+        __device__ __forceinline__ unsigned int index(const unsigned int tid)
+        {
+            return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask);
+        }
+
+        __device__ __forceinline__ void init(volatile T *ptr)
+        {
+            ptr[threadIdx.x] = 0;
+        }
+
+        static const int warp_smem_stride = 32 + 16 + 1;
+        static const int warp_offset      = 16;
+        static const int warp_log         = 5;
+        static const int warp_mask        = 31;
+
+        typedef WarpScanNoComp<INCLUSIVE, T, F> merge;
+    };
+
+    template <ScanKind Kind , typename T, typename Sc, typename F> struct BlockScan
+    {
+        __device__ __forceinline__ BlockScan() {}
+        __device__ __forceinline__ BlockScan(const BlockScan& other) { (void)other; }
+
+        __device__ __forceinline__ T operator()(volatile T *ptr)
+        {
+            const unsigned int tid  = threadIdx.x;
+            const unsigned int lane = tid & warp_mask;
+            const unsigned int warp = tid >> warp_log;
+
+            Sc scan;
+            typename Sc::merge merge_scan;
+            const unsigned int idx = scan.index(tid);
+
+            T val = scan(ptr, idx);
+            __syncthreads ();
+
+            if( warp == 0)
+                scan.init(ptr);
+            __syncthreads ();
+
+            if( lane == 31 )
+                ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx];
+            __syncthreads ();
+
+            if( warp == 0 )
+                merge_scan(ptr, idx);
+            __syncthreads();
+
+            if ( warp > 0)
+                val = ptr [scan.warp_offset + warp - 1] + val;
+            __syncthreads ();
+
+            ptr[idx] = val;
+            __syncthreads ();
+
+            return val ;
+        }
+
+        static const int warp_log  = 5;
+        static const int warp_mask = 31;
+    };
+
+    template <typename T>
+    __device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
+    {
+    #if __CUDA_ARCH__ >= 300
+        const unsigned int laneId = cv::cuda::device::Warp::laneId();
+
+        // scan on shuffl functions
+        #pragma unroll
+        for (int i = 1; i <= (OPENCV_CUDA_WARP_SIZE / 2); i *= 2)
+        {
+            const T n = cv::cuda::device::shfl_up(idata, i);
+            if (laneId >= i)
+                  idata += n;
+        }
+
+        return idata;
+    #else
+        unsigned int pos = 2 * tid - (tid & (OPENCV_CUDA_WARP_SIZE - 1));
+        s_Data[pos] = 0;
+        pos += OPENCV_CUDA_WARP_SIZE;
+        s_Data[pos] = idata;
+
+        s_Data[pos] += s_Data[pos - 1];
+        s_Data[pos] += s_Data[pos - 2];
+        s_Data[pos] += s_Data[pos - 4];
+        s_Data[pos] += s_Data[pos - 8];
+        s_Data[pos] += s_Data[pos - 16];
+
+        return s_Data[pos];
+    #endif
+    }
+
+    template <typename T>
+    __device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid)
+    {
+        return warpScanInclusive(idata, s_Data, tid) - idata;
+    }
+
+    template <int tiNumScanThreads, typename T>
+    __device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
+    {
+        if (tiNumScanThreads > OPENCV_CUDA_WARP_SIZE)
+        {
+            //Bottom-level inclusive warp scan
+            T warpResult = warpScanInclusive(idata, s_Data, tid);
+
+            //Save top elements of each warp for exclusive warp scan
+            //sync to wait for warp scans to complete (because s_Data is being overwritten)
+            __syncthreads();
+            if ((tid & (OPENCV_CUDA_WARP_SIZE - 1)) == (OPENCV_CUDA_WARP_SIZE - 1))
+            {
+                s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE] = warpResult;
+            }
+
+            //wait for warp scans to complete
+            __syncthreads();
+
+            if (tid < (tiNumScanThreads / OPENCV_CUDA_WARP_SIZE) )
+            {
+                //grab top warp elements
+                T val = s_Data[tid];
+                //calculate exclusive scan and write back to shared memory
+                s_Data[tid] = warpScanExclusive(val, s_Data, tid);
+            }
+
+            //return updated warp scans with exclusive scan results
+            __syncthreads();
+
+            return warpResult + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE];
+        }
+        else
+        {
+            return warpScanInclusive(idata, s_Data, tid);
+        }
+    }
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_SCAN_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/simd_functions.hpp b/thirdparty/linux/include/opencv2/core/cuda/simd_functions.hpp
new file mode 100644
index 0000000..3d8c2e0
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/simd_functions.hpp
@@ -0,0 +1,869 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+/*
+ * Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ *   Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ *   Neither the name of NVIDIA Corporation nor the names of its contributors
+ *   may be used to endorse or promote products derived from this software
+ *   without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef OPENCV_CUDA_SIMD_FUNCTIONS_HPP
+#define OPENCV_CUDA_SIMD_FUNCTIONS_HPP
+
+#include "common.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    // 2
+
+    static __device__ __forceinline__ unsigned int vadd2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s;
+        s = a ^ b;          // sum bits
+        r = a + b;          // actual sum
+        s = s ^ r;          // determine carry-ins for each bit position
+        s = s & 0x00010000; // carry-in to high word (= carry-out from low word)
+        r = r - s;          // subtract out carry-out from low word
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsub2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s;
+        s = a ^ b;          // sum bits
+        r = a - b;          // actual sum
+        s = s ^ r;          // determine carry-ins for each bit position
+        s = s & 0x00010000; // borrow to high word
+        r = r + s;          // compensate for borrow from low word
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vabsdiff2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s, t, u, v;
+        s = a & 0x0000ffff; // extract low halfword
+        r = b & 0x0000ffff; // extract low halfword
+        u = ::max(r, s);    // maximum of low halfwords
+        v = ::min(r, s);    // minimum of low halfwords
+        s = a & 0xffff0000; // extract high halfword
+        r = b & 0xffff0000; // extract high halfword
+        t = ::max(r, s);    // maximum of high halfwords
+        s = ::min(r, s);    // minimum of high halfwords
+        r = u | t;          // maximum of both halfwords
+        s = v | s;          // minimum of both halfwords
+        r = r - s;          // |a - b| = max(a,b) - min(a,b);
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vavg2(unsigned int a, unsigned int b)
+    {
+        unsigned int r, s;
+
+        // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
+        // (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
+        s = a ^ b;
+        r = a & b;
+        s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries
+        s = s >> 1;
+        s = r + s;
+
+        return s;
+    }
+
+    static __device__ __forceinline__ unsigned int vavrg2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
+        // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
+        unsigned int s;
+        s = a ^ b;
+        r = a | b;
+        s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries
+        s = s >> 1;
+        r = r - s;
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vseteq2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        unsigned int c;
+        r = a ^ b;          // 0x0000 if a == b
+        c = r | 0x80008000; // set msbs, to catch carry out
+        r = r ^ c;          // extract msbs, msb = 1 if r < 0x8000
+        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
+        c = r & ~c;         // msb = 1, if r was 0x0000
+        r = c >> 15;        // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpeq2(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vseteq2(a, b);
+        c = r << 16;        // convert bool
+        r = c - r;          //  into mask
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        r = a ^ b;          // 0x0000 if a == b
+        c = r | 0x80008000; // set msbs, to catch carry out
+        r = r ^ c;          // extract msbs, msb = 1 if r < 0x8000
+        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
+        c = r & ~c;         // msb = 1, if r was 0x0000
+        r = c >> 15;        // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetge2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(b));
+        c = vavrg2(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2
+        c = c & 0x80008000; // msb = carry-outs
+        r = c >> 15;        // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpge2(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetge2(a, b);
+        c = r << 16;        // convert bool
+        r = c - r;          //  into mask
+    #else
+        asm("not.b32 %0, %0;" : "+r"(b));
+        c = vavrg2(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2
+        c = c & 0x80008000; // msb = carry-outs
+        r = c >> 15;        // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetgt2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(b));
+        c = vavg2(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
+        c = c & 0x80008000; // msbs = carry-outs
+        r = c >> 15;        // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpgt2(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetgt2(a, b);
+        c = r << 16;        // convert bool
+        r = c - r;          //  into mask
+    #else
+        asm("not.b32 %0, %0;" : "+r"(b));
+        c = vavg2(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
+        c = c & 0x80008000; // msbs = carry-outs
+        r = c >> 15;        // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetle2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavrg2(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
+        c = c & 0x80008000; // msb = carry-outs
+        r = c >> 15;        // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmple2(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetle2(a, b);
+        c = r << 16;        // convert bool
+        r = c - r;          //  into mask
+    #else
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavrg2(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
+        c = c & 0x80008000; // msb = carry-outs
+        r = c >> 15;        // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetlt2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavg2(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
+        c = c & 0x80008000; // msb = carry-outs
+        r = c >> 15;        // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmplt2(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetlt2(a, b);
+        c = r << 16;        // convert bool
+        r = c - r;          //  into mask
+    #else
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavg2(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
+        c = c & 0x80008000; // msb = carry-outs
+        r = c >> 15;        // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetne2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        unsigned int c;
+        r = a ^ b;          // 0x0000 if a == b
+        c = r | 0x80008000; // set msbs, to catch carry out
+        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
+        c = r | c;          // msb = 1, if r was not 0x0000
+        c = c & 0x80008000; // extract msbs
+        r = c >> 15;        // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpne2(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetne2(a, b);
+        c = r << 16;        // convert bool
+        r = c - r;          //  into mask
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        r = a ^ b;          // 0x0000 if a == b
+        c = r | 0x80008000; // set msbs, to catch carry out
+        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
+        c = r | c;          // msb = 1, if r was not 0x0000
+        c = c & 0x80008000; // extract msbs
+        r = c >> 15;        // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vmax2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s, t, u;
+        r = a & 0x0000ffff; // extract low halfword
+        s = b & 0x0000ffff; // extract low halfword
+        t = ::max(r, s);    // maximum of low halfwords
+        r = a & 0xffff0000; // extract high halfword
+        s = b & 0xffff0000; // extract high halfword
+        u = ::max(r, s);    // maximum of high halfwords
+        r = t | u;          // combine halfword maximums
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vmin2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s, t, u;
+        r = a & 0x0000ffff; // extract low halfword
+        s = b & 0x0000ffff; // extract low halfword
+        t = ::min(r, s);    // minimum of low halfwords
+        r = a & 0xffff0000; // extract high halfword
+        s = b & 0xffff0000; // extract high halfword
+        u = ::min(r, s);    // minimum of high halfwords
+        r = t | u;          // combine halfword minimums
+    #endif
+
+        return r;
+    }
+
+    // 4
+
+    static __device__ __forceinline__ unsigned int vadd4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s, t;
+        s = a ^ b;          // sum bits
+        r = a & 0x7f7f7f7f; // clear msbs
+        t = b & 0x7f7f7f7f; // clear msbs
+        s = s & 0x80808080; // msb sum bits
+        r = r + t;          // add without msbs, record carry-out in msbs
+        r = r ^ s;          // sum of msb sum and carry-in bits, w/o carry-out
+    #endif /* __CUDA_ARCH__ >= 300 */
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsub4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s, t;
+        s = a ^ ~b;         // inverted sum bits
+        r = a | 0x80808080; // set msbs
+        t = b & 0x7f7f7f7f; // clear msbs
+        s = s & 0x80808080; // inverted msb sum bits
+        r = r - t;          // subtract w/o msbs, record inverted borrows in msb
+        r = r ^ s;          // combine inverted msb sum bits and borrows
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vavg4(unsigned int a, unsigned int b)
+    {
+        unsigned int r, s;
+
+        // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
+        // (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
+        s = a ^ b;
+        r = a & b;
+        s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
+        s = s >> 1;
+        s = r + s;
+
+        return s;
+    }
+
+    static __device__ __forceinline__ unsigned int vavrg4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
+        // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
+        unsigned int c;
+        c = a ^ b;
+        r = a | b;
+        c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
+        c = c >> 1;
+        r = r - c;
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vseteq4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        unsigned int c;
+        r = a ^ b;          // 0x00 if a == b
+        c = r | 0x80808080; // set msbs, to catch carry out
+        r = r ^ c;          // extract msbs, msb = 1 if r < 0x80
+        c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
+        c = r & ~c;         // msb = 1, if r was 0x00
+        r = c >> 7;         // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpeq4(unsigned int a, unsigned int b)
+    {
+        unsigned int r, t;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vseteq4(a, b);
+        t = r << 8;         // convert bool
+        r = t - r;          //  to mask
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        t = a ^ b;          // 0x00 if a == b
+        r = t | 0x80808080; // set msbs, to catch carry out
+        t = t ^ r;          // extract msbs, msb = 1 if t < 0x80
+        r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80
+        r = t & ~r;         // msb = 1, if t was 0x00
+        t = r >> 7;         // build mask
+        t = r - t;          //  from
+        r = t | r;          //   msbs
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetle4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavrg4(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
+        c = c & 0x80808080; // msb = carry-outs
+        r = c >> 7;         // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmple4(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetle4(a, b);
+        c = r << 8;         // convert bool
+        r = c - r;          //  to mask
+    #else
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavrg4(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
+        c = c & 0x80808080; // msbs = carry-outs
+        r = c >> 7;         // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetlt4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavg4(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
+        c = c & 0x80808080; // msb = carry-outs
+        r = c >> 7;         // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmplt4(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetlt4(a, b);
+        c = r << 8;         // convert bool
+        r = c - r;          //  to mask
+    #else
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavg4(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
+        c = c & 0x80808080; // msbs = carry-outs
+        r = c >> 7;         // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetge4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(b));
+        c = vavrg4(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2
+        c = c & 0x80808080; // msb = carry-outs
+        r = c >> 7;         // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpge4(unsigned int a, unsigned int b)
+    {
+        unsigned int r, s;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetge4(a, b);
+        s = r << 8;         // convert bool
+        r = s - r;          //  to mask
+    #else
+        asm ("not.b32 %0,%0;" : "+r"(b));
+        r = vavrg4 (a, b);  // (a + ~b + 1) / 2 = (a - b) / 2
+        r = r & 0x80808080; // msb = carry-outs
+        s = r >> 7;         // build mask
+        s = r - s;          //  from
+        r = s | r;          //   msbs
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetgt4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(b));
+        c = vavg4(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
+        c = c & 0x80808080; // msb = carry-outs
+        r = c >> 7;         // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpgt4(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetgt4(a, b);
+        c = r << 8;         // convert bool
+        r = c - r;          //  to mask
+    #else
+        asm("not.b32 %0, %0;" : "+r"(b));
+        c = vavg4(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
+        c = c & 0x80808080; // msb = carry-outs
+        r = c >> 7;         // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetne4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        unsigned int c;
+        r = a ^ b;          // 0x00 if a == b
+        c = r | 0x80808080; // set msbs, to catch carry out
+        c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
+        c = r | c;          // msb = 1, if r was not 0x00
+        c = c & 0x80808080; // extract msbs
+        r = c >> 7;         // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpne4(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetne4(a, b);
+        c = r << 8;         // convert bool
+        r = c - r;          //  to mask
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        r = a ^ b;          // 0x00 if a == b
+        c = r | 0x80808080; // set msbs, to catch carry out
+        c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
+        c = r | c;          // msb = 1, if r was not 0x00
+        c = c & 0x80808080; // extract msbs
+        r = c >> 7;         // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vabsdiff4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s;
+        s = vcmpge4(a, b);  // mask = 0xff if a >= b
+        r = a ^ b;          //
+        s = (r &  s) ^ b;   // select a when a >= b, else select b => max(a,b)
+        r = s ^ r;          // select a when b >= a, else select b => min(a,b)
+        r = s - r;          // |a - b| = max(a,b) - min(a,b);
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vmax4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s;
+        s = vcmpge4(a, b);  // mask = 0xff if a >= b
+        r = a & s;          // select a when b >= a
+        s = b & ~s;         // select b when b < a
+        r = r | s;          // combine byte selections
+    #endif
+
+        return r;           // byte-wise unsigned maximum
+    }
+
+    static __device__ __forceinline__ unsigned int vmin4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s;
+        s = vcmpge4(b, a);  // mask = 0xff if a >= b
+        r = a & s;          // select a when b >= a
+        s = b & ~s;         // select b when b < a
+        r = r | s;          // combine byte selections
+    #endif
+
+        return r;
+    }
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_SIMD_FUNCTIONS_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/transform.hpp b/thirdparty/linux/include/opencv2/core/cuda/transform.hpp
new file mode 100644
index 0000000..42aa6ea
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/transform.hpp
@@ -0,0 +1,75 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_TRANSFORM_HPP
+#define OPENCV_CUDA_TRANSFORM_HPP
+
+#include "common.hpp"
+#include "utility.hpp"
+#include "detail/transform_detail.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template <typename T, typename D, typename UnOp, typename Mask>
+    static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
+    {
+        typedef TransformFunctorTraits<UnOp> ft;
+        transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
+    }
+
+    template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+    static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
+    {
+        typedef TransformFunctorTraits<BinOp> ft;
+        transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
+    }
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_TRANSFORM_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/type_traits.hpp b/thirdparty/linux/include/opencv2/core/cuda/type_traits.hpp
new file mode 100644
index 0000000..8b7a3fd
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/type_traits.hpp
@@ -0,0 +1,90 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_TYPE_TRAITS_HPP
+#define OPENCV_CUDA_TYPE_TRAITS_HPP
+
+#include "detail/type_traits_detail.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template <typename T> struct IsSimpleParameter
+    {
+        enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
+            type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
+    };
+
+    template <typename T> struct TypeTraits
+    {
+        typedef typename type_traits_detail::UnConst<T>::type                                                NonConstType;
+        typedef typename type_traits_detail::UnVolatile<T>::type                                             NonVolatileType;
+        typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
+        typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type                            PointeeType;
+        typedef typename type_traits_detail::ReferenceTraits<T>::type                                        ReferredType;
+
+        enum { isConst          = type_traits_detail::UnConst<T>::value };
+        enum { isVolatile       = type_traits_detail::UnVolatile<T>::value };
+
+        enum { isReference      = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
+        enum { isPointer        = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };
+
+        enum { isUnsignedInt    = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
+        enum { isSignedInt      = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
+        enum { isIntegral       = type_traits_detail::IsIntegral<UnqualifiedType>::value };
+        enum { isFloat          = type_traits_detail::IsFloat<UnqualifiedType>::value };
+        enum { isArith          = isIntegral || isFloat };
+        enum { isVec            = type_traits_detail::IsVec<UnqualifiedType>::value };
+
+        typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
+            T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
+    };
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_TYPE_TRAITS_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/utility.hpp b/thirdparty/linux/include/opencv2/core/cuda/utility.hpp
new file mode 100644
index 0000000..7f5db48
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/utility.hpp
@@ -0,0 +1,230 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_UTILITY_HPP
+#define OPENCV_CUDA_UTILITY_HPP
+
+#include "saturate_cast.hpp"
+#include "datamov_utils.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    struct CV_EXPORTS ThrustAllocator
+    {
+        typedef uchar value_type;
+        virtual ~ThrustAllocator();
+        virtual __device__ __host__ uchar* allocate(size_t numBytes) = 0;
+        virtual __device__ __host__ void deallocate(uchar* ptr, size_t numBytes) = 0;
+        static ThrustAllocator& getAllocator();
+        static void setAllocator(ThrustAllocator* allocator);
+    };
+    #define OPENCV_CUDA_LOG_WARP_SIZE        (5)
+    #define OPENCV_CUDA_WARP_SIZE            (1 << OPENCV_CUDA_LOG_WARP_SIZE)
+    #define OPENCV_CUDA_LOG_MEM_BANKS        ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
+    #define OPENCV_CUDA_MEM_BANKS            (1 << OPENCV_CUDA_LOG_MEM_BANKS)
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // swap
+
+    template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
+    {
+        const T temp = a;
+        a = b;
+        b = temp;
+    }
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Mask Reader
+
+    struct SingleMask
+    {
+        explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
+        __host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){}
+
+        __device__ __forceinline__ bool operator()(int y, int x) const
+        {
+            return mask.ptr(y)[x] != 0;
+        }
+
+        PtrStepb mask;
+    };
+
+    struct SingleMaskChannels
+    {
+        __host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_)
+        : mask(mask_), channels(channels_) {}
+        __host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_)
+            :mask(mask_.mask), channels(mask_.channels){}
+
+        __device__ __forceinline__ bool operator()(int y, int x) const
+        {
+            return mask.ptr(y)[x / channels] != 0;
+        }
+
+        PtrStepb mask;
+        int channels;
+    };
+
+    struct MaskCollection
+    {
+        explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_)
+            : maskCollection(maskCollection_) {}
+
+        __device__ __forceinline__ MaskCollection(const MaskCollection& masks_)
+            : maskCollection(masks_.maskCollection), curMask(masks_.curMask){}
+
+        __device__ __forceinline__ void next()
+        {
+            curMask = *maskCollection++;
+        }
+        __device__ __forceinline__ void setMask(int z)
+        {
+            curMask = maskCollection[z];
+        }
+
+        __device__ __forceinline__ bool operator()(int y, int x) const
+        {
+            uchar val;
+            return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0));
+        }
+
+        const PtrStepb* maskCollection;
+        PtrStepb curMask;
+    };
+
+    struct WithOutMask
+    {
+        __host__ __device__ __forceinline__ WithOutMask(){}
+        __host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){}
+
+        __device__ __forceinline__ void next() const
+        {
+        }
+        __device__ __forceinline__ void setMask(int) const
+        {
+        }
+
+        __device__ __forceinline__ bool operator()(int, int) const
+        {
+            return true;
+        }
+
+        __device__ __forceinline__ bool operator()(int, int, int) const
+        {
+            return true;
+        }
+
+        static __device__ __forceinline__ bool check(int, int)
+        {
+            return true;
+        }
+
+        static __device__ __forceinline__ bool check(int, int, int)
+        {
+            return true;
+        }
+    };
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Solve linear system
+
+    // solve 2x2 linear system Ax=b
+    template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
+    {
+        T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
+
+        if (det != 0)
+        {
+            double invdet = 1.0 / det;
+
+            x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));
+
+            x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));
+
+            return true;
+        }
+
+        return false;
+    }
+
+    // solve 3x3 linear system Ax=b
+    template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
+    {
+        T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
+              - A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
+              + A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
+
+        if (det != 0)
+        {
+            double invdet = 1.0 / det;
+
+            x[0] = saturate_cast<T>(invdet *
+                (b[0]    * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
+                 A[0][1] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) +
+                 A[0][2] * (b[1]    * A[2][1] - A[1][1] * b[2]   )));
+
+            x[1] = saturate_cast<T>(invdet *
+                (A[0][0] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) -
+                 b[0]    * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
+                 A[0][2] * (A[1][0] * b[2]    - b[1]    * A[2][0])));
+
+            x[2] = saturate_cast<T>(invdet *
+                (A[0][0] * (A[1][1] * b[2]    - b[1]    * A[2][1]) -
+                 A[0][1] * (A[1][0] * b[2]    - b[1]    * A[2][0]) +
+                 b[0]    * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));
+
+            return true;
+        }
+
+        return false;
+    }
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_UTILITY_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/vec_distance.hpp b/thirdparty/linux/include/opencv2/core/cuda/vec_distance.hpp
new file mode 100644
index 0000000..ef6e510
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/vec_distance.hpp
@@ -0,0 +1,232 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_VEC_DISTANCE_HPP
+#define OPENCV_CUDA_VEC_DISTANCE_HPP
+
+#include "reduce.hpp"
+#include "functional.hpp"
+#include "detail/vec_distance_detail.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template <typename T> struct L1Dist
+    {
+        typedef int value_type;
+        typedef int result_type;
+
+        __device__ __forceinline__ L1Dist() : mySum(0) {}
+
+        __device__ __forceinline__ void reduceIter(int val1, int val2)
+        {
+            mySum = __sad(val1, val2, mySum);
+        }
+
+        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
+        {
+            reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
+        }
+
+        __device__ __forceinline__ operator int() const
+        {
+            return mySum;
+        }
+
+        int mySum;
+    };
+    template <> struct L1Dist<float>
+    {
+        typedef float value_type;
+        typedef float result_type;
+
+        __device__ __forceinline__ L1Dist() : mySum(0.0f) {}
+
+        __device__ __forceinline__ void reduceIter(float val1, float val2)
+        {
+            mySum += ::fabs(val1 - val2);
+        }
+
+        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
+        {
+            reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
+        }
+
+        __device__ __forceinline__ operator float() const
+        {
+            return mySum;
+        }
+
+        float mySum;
+    };
+
+    struct L2Dist
+    {
+        typedef float value_type;
+        typedef float result_type;
+
+        __device__ __forceinline__ L2Dist() : mySum(0.0f) {}
+
+        __device__ __forceinline__ void reduceIter(float val1, float val2)
+        {
+            float reg = val1 - val2;
+            mySum += reg * reg;
+        }
+
+        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
+        {
+            reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
+        }
+
+        __device__ __forceinline__ operator float() const
+        {
+            return sqrtf(mySum);
+        }
+
+        float mySum;
+    };
+
+    struct HammingDist
+    {
+        typedef int value_type;
+        typedef int result_type;
+
+        __device__ __forceinline__ HammingDist() : mySum(0) {}
+
+        __device__ __forceinline__ void reduceIter(int val1, int val2)
+        {
+            mySum += __popc(val1 ^ val2);
+        }
+
+        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
+        {
+            reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
+        }
+
+        __device__ __forceinline__ operator int() const
+        {
+            return mySum;
+        }
+
+        int mySum;
+    };
+
+    // calc distance between two vectors in global memory
+    template <int THREAD_DIM, typename Dist, typename T1, typename T2>
+    __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
+    {
+        for (int i = tid; i < len; i += THREAD_DIM)
+        {
+            T1 val1;
+            ForceGlob<T1>::Load(vec1, i, val1);
+
+            T2 val2;
+            ForceGlob<T2>::Load(vec2, i, val2);
+
+            dist.reduceIter(val1, val2);
+        }
+
+        dist.reduceAll<THREAD_DIM>(smem, tid);
+    }
+
+    // calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
+    template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
+    __device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
+    {
+        vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
+
+        dist.reduceAll<THREAD_DIM>(smem, tid);
+    }
+
+    // calc distance between two vectors in global memory
+    template <int THREAD_DIM, typename T1> struct VecDiffGlobal
+    {
+        explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
+        {
+            vec1 = vec1_;
+        }
+
+        template <typename T2, typename Dist>
+        __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
+        {
+            calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
+        }
+
+        const T1* vec1;
+    };
+
+    // calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
+    template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
+    {
+        template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
+        {
+            if (glob_tid < len)
+                smem[glob_tid] = vec1[glob_tid];
+            __syncthreads();
+
+            U* vec1ValsPtr = vec1Vals;
+
+            #pragma unroll
+            for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
+                *vec1ValsPtr++ = smem[i];
+
+            __syncthreads();
+        }
+
+        template <typename T2, typename Dist>
+        __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
+        {
+            calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
+        }
+
+        U vec1Vals[MAX_LEN / THREAD_DIM];
+    };
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_VEC_DISTANCE_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/vec_math.hpp b/thirdparty/linux/include/opencv2/core/cuda/vec_math.hpp
new file mode 100644
index 0000000..9085b92
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/vec_math.hpp
@@ -0,0 +1,930 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_VECMATH_HPP
+#define OPENCV_CUDA_VECMATH_HPP
+
+#include "vec_traits.hpp"
+#include "saturate_cast.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+
+// saturate_cast
+
+namespace vec_math_detail
+{
+    template <int cn, typename VecD> struct SatCastHelper;
+    template <typename VecD> struct SatCastHelper<1, VecD>
+    {
+        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
+        {
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x));
+        }
+    };
+    template <typename VecD> struct SatCastHelper<2, VecD>
+    {
+        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
+        {
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
+        }
+    };
+    template <typename VecD> struct SatCastHelper<3, VecD>
+    {
+        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
+        {
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
+        }
+    };
+    template <typename VecD> struct SatCastHelper<4, VecD>
+    {
+        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
+        {
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
+        }
+    };
+
+    template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v)
+    {
+        return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
+    }
+}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+
+// unary operators
+
+#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \
+    { \
+        return VecTraits<output_type ## 1>::make(op (a.x)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \
+    { \
+        return VecTraits<output_type ## 2>::make(op (a.x), op (a.y)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \
+    { \
+        return VecTraits<output_type ## 3>::make(op (a.x), op (a.y), op (a.z)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \
+    { \
+        return VecTraits<output_type ## 4>::make(op (a.x), op (a.y), op (a.z), op (a.w)); \
+    }
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP
+
+// unary functions
+
+#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \
+    { \
+        return VecTraits<output_type ## 1>::make(func (a.x)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \
+    { \
+        return VecTraits<output_type ## 2>::make(func (a.x), func (a.y)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \
+    { \
+        return VecTraits<output_type ## 3>::make(func (a.x), func (a.y), func (a.z)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \
+    { \
+        return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
+    }
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabs, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC
+
+// binary operators (vec & vec)
+
+#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \
+    { \
+        return VecTraits<output_type ## 1>::make(a.x op b.x); \
+    } \
+    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \
+    { \
+        return VecTraits<output_type ## 2>::make(a.x op b.x, a.y op b.y); \
+    } \
+    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \
+    { \
+        return VecTraits<output_type ## 3>::make(a.x op b.x, a.y op b.y, a.z op b.z); \
+    } \
+    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \
+    { \
+        return VecTraits<output_type ## 4>::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \
+    }
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP
+
+// binary operators (vec & scalar)
+
+#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 1>::make(a.x op s); \
+    } \
+    __device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \
+    { \
+        return VecTraits<output_type ## 1>::make(s op b.x); \
+    } \
+    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 2>::make(a.x op s, a.y op s); \
+    } \
+    __device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \
+    { \
+        return VecTraits<output_type ## 2>::make(s op b.x, s op b.y); \
+    } \
+    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 3>::make(a.x op s, a.y op s, a.z op s); \
+    } \
+    __device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \
+    { \
+        return VecTraits<output_type ## 3>::make(s op b.x, s op b.y, s op b.z); \
+    } \
+    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 4>::make(a.x op s, a.y op s, a.z op s, a.w op s); \
+    } \
+    __device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \
+    { \
+        return VecTraits<output_type ## 4>::make(s op b.x, s op b.y, s op b.z, s op b.w); \
+    }
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint)
+
+#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP
+
+// binary function (vec & vec)
+
+#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \
+    { \
+        return VecTraits<output_type ## 1>::make(func (a.x, b.x)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \
+    { \
+        return VecTraits<output_type ## 2>::make(func (a.x, b.x), func (a.y, b.y)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \
+    { \
+        return VecTraits<output_type ## 3>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \
+    { \
+        return VecTraits<output_type ## 4>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \
+    }
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC
+
+// binary function (vec & scalar)
+
+#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 1>::make(func ((output_type) a.x, (output_type) s)); \
+    } \
+    __device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \
+    { \
+        return VecTraits<output_type ## 1>::make(func ((output_type) s, (output_type) b.x)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 2>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \
+    { \
+        return VecTraits<output_type ## 2>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 3>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \
+    { \
+        return VecTraits<output_type ## 3>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 4>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \
+    { \
+        return VecTraits<output_type ## 4>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \
+    }
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)
+
+#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC
+
+}}} // namespace cv { namespace cuda { namespace device
+
+//! @endcond
+
+#endif // OPENCV_CUDA_VECMATH_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/vec_traits.hpp b/thirdparty/linux/include/opencv2/core/cuda/vec_traits.hpp
new file mode 100644
index 0000000..b5ff281
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/vec_traits.hpp
@@ -0,0 +1,288 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_VEC_TRAITS_HPP
+#define OPENCV_CUDA_VEC_TRAITS_HPP
+
+#include "common.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template<typename T, int N> struct TypeVec;
+
+    struct __align__(8) uchar8
+    {
+        uchar a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
+    {
+        uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+    struct __align__(8) char8
+    {
+        schar a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
+    {
+        char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+    struct __align__(16) ushort8
+    {
+        ushort a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
+    {
+        ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+    struct __align__(16) short8
+    {
+        short a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
+    {
+        short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+    struct __align__(32) uint8
+    {
+        uint a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
+    {
+        uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+    struct __align__(32) int8
+    {
+        int a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
+    {
+        int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+    struct __align__(32) float8
+    {
+        float a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
+    {
+        float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+    struct double8
+    {
+        double a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
+    {
+        double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_TYPE_VEC(type) \
+    template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
+    template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
+    template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
+    template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
+    template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
+    template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
+    template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
+    template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; }; \
+    template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
+    template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };
+
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uchar)
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(char)
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(ushort)
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(short)
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(int)
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uint)
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(float)
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(double)
+
+    #undef OPENCV_CUDA_IMPLEMENT_TYPE_VEC
+
+    template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
+    template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
+    template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
+    template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
+    template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };
+
+    template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
+    template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
+    template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
+    template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
+    template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
+
+    template<typename T> struct VecTraits;
+
+#define OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(type) \
+    template<> struct VecTraits<type> \
+    { \
+        typedef type elem_type; \
+        enum {cn=1}; \
+        static __device__ __host__ __forceinline__ type all(type v) {return v;} \
+        static __device__ __host__ __forceinline__ type make(type x) {return x;} \
+        static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
+    }; \
+    template<> struct VecTraits<type ## 1> \
+    { \
+        typedef type elem_type; \
+        enum {cn=1}; \
+        static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
+        static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
+        static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
+    }; \
+    template<> struct VecTraits<type ## 2> \
+    { \
+        typedef type elem_type; \
+        enum {cn=2}; \
+        static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
+        static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
+        static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
+    }; \
+    template<> struct VecTraits<type ## 3> \
+    { \
+        typedef type elem_type; \
+        enum {cn=3}; \
+        static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
+        static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
+        static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
+    }; \
+    template<> struct VecTraits<type ## 4> \
+    { \
+        typedef type elem_type; \
+        enum {cn=4}; \
+        static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
+        static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
+        static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
+    }; \
+    template<> struct VecTraits<type ## 8> \
+    { \
+        typedef type elem_type; \
+        enum {cn=8}; \
+        static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
+        static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
+        static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
+    };
+
+    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uchar)
+    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(ushort)
+    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(short)
+    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(int)
+    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uint)
+    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(float)
+    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(double)
+
+    #undef OPENCV_CUDA_IMPLEMENT_VEC_TRAITS
+
+    template<> struct VecTraits<char>
+    {
+        typedef char elem_type;
+        enum {cn=1};
+        static __device__ __host__ __forceinline__ char all(char v) {return v;}
+        static __device__ __host__ __forceinline__ char make(char x) {return x;}
+        static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
+    };
+    template<> struct VecTraits<schar>
+    {
+        typedef schar elem_type;
+        enum {cn=1};
+        static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
+        static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
+        static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
+    };
+    template<> struct VecTraits<char1>
+    {
+        typedef schar elem_type;
+        enum {cn=1};
+        static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
+        static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
+        static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
+    };
+    template<> struct VecTraits<char2>
+    {
+        typedef schar elem_type;
+        enum {cn=2};
+        static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
+        static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
+        static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
+    };
+    template<> struct VecTraits<char3>
+    {
+        typedef schar elem_type;
+        enum {cn=3};
+        static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
+        static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
+        static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
+    };
+    template<> struct VecTraits<char4>
+    {
+        typedef schar elem_type;
+        enum {cn=4};
+        static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
+        static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
+        static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
+    };
+    template<> struct VecTraits<char8>
+    {
+        typedef schar elem_type;
+        enum {cn=8};
+        static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
+        static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
+        static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
+    };
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_VEC_TRAITS_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda/warp.hpp b/thirdparty/linux/include/opencv2/core/cuda/warp.hpp
new file mode 100644
index 0000000..ae1f8ea
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/warp.hpp
@@ -0,0 +1,139 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_DEVICE_WARP_HPP
+#define OPENCV_CUDA_DEVICE_WARP_HPP
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    struct Warp
+    {
+        enum
+        {
+            LOG_WARP_SIZE = 5,
+            WARP_SIZE     = 1 << LOG_WARP_SIZE,
+            STRIDE        = WARP_SIZE
+        };
+
+        /** \brief Returns the warp lane ID of the calling thread. */
+        static __device__ __forceinline__ unsigned int laneId()
+        {
+            unsigned int ret;
+            asm("mov.u32 %0, %laneid;" : "=r"(ret) );
+            return ret;
+        }
+
+        template<typename It, typename T>
+        static __device__ __forceinline__ void fill(It beg, It end, const T& value)
+        {
+            for(It t = beg + laneId(); t < end; t += STRIDE)
+                *t = value;
+        }
+
+        template<typename InIt, typename OutIt>
+        static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
+        {
+            for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
+                *out = *t;
+            return out;
+        }
+
+        template<typename InIt, typename OutIt, class UnOp>
+        static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
+        {
+            for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
+                *out = op(*t);
+            return out;
+        }
+
+        template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
+        static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
+        {
+            unsigned int lane = laneId();
+
+            InIt1 t1 = beg1 + lane;
+            InIt2 t2 = beg2 + lane;
+            for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
+                *out = op(*t1, *t2);
+            return out;
+        }
+
+        template <class T, class BinOp>
+        static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op)
+        {
+            const unsigned int lane = laneId();
+
+            if (lane < 16)
+            {
+                T partial = ptr[lane];
+
+                ptr[lane] = partial = op(partial, ptr[lane + 16]);
+                ptr[lane] = partial = op(partial, ptr[lane + 8]);
+                ptr[lane] = partial = op(partial, ptr[lane + 4]);
+                ptr[lane] = partial = op(partial, ptr[lane + 2]);
+                ptr[lane] = partial = op(partial, ptr[lane + 1]);
+            }
+
+            return *ptr;
+        }
+
+        template<typename OutIt, typename T>
+        static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
+        {
+            unsigned int lane = laneId();
+            value += lane;
+
+            for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
+                *t = value;
+        }
+    };
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif /* OPENCV_CUDA_DEVICE_WARP_HPP */
diff --git a/thirdparty/linux/include/opencv2/core/cuda/warp_reduce.hpp b/thirdparty/linux/include/opencv2/core/cuda/warp_reduce.hpp
new file mode 100644
index 0000000..530303d
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/warp_reduce.hpp
@@ -0,0 +1,76 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_WARP_REDUCE_HPP__
+#define OPENCV_CUDA_WARP_REDUCE_HPP__
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template <class T>
+    __device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
+    {
+        const unsigned int lane = tid & 31; // index of thread in warp (0..31)
+
+        if (lane < 16)
+        {
+            T partial = ptr[tid];
+
+            ptr[tid] = partial = partial + ptr[tid + 16];
+            ptr[tid] = partial = partial + ptr[tid + 8];
+            ptr[tid] = partial = partial + ptr[tid + 4];
+            ptr[tid] = partial = partial + ptr[tid + 2];
+            ptr[tid] = partial = partial + ptr[tid + 1];
+        }
+
+        return ptr[tid - lane];
+    }
+}}} // namespace cv { namespace cuda { namespace cudev {
+
+//! @endcond
+
+#endif /* OPENCV_CUDA_WARP_REDUCE_HPP__ */
diff --git a/thirdparty/linux/include/opencv2/core/cuda/warp_shuffle.hpp b/thirdparty/linux/include/opencv2/core/cuda/warp_shuffle.hpp
new file mode 100644
index 0000000..14a9a4d
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda/warp_shuffle.hpp
@@ -0,0 +1,153 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_WARP_SHUFFLE_HPP
+#define OPENCV_CUDA_WARP_SHUFFLE_HPP
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template <typename T>
+    __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        return __shfl(val, srcLane, width);
+    #else
+        return T();
+    #endif
+    }
+    __device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        return (unsigned int) __shfl((int) val, srcLane, width);
+    #else
+        return 0;
+    #endif
+    }
+    __device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        int lo = __double2loint(val);
+        int hi = __double2hiint(val);
+
+        lo = __shfl(lo, srcLane, width);
+        hi = __shfl(hi, srcLane, width);
+
+        return __hiloint2double(hi, lo);
+    #else
+        return 0.0;
+    #endif
+    }
+
+    template <typename T>
+    __device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        return __shfl_down(val, delta, width);
+    #else
+        return T();
+    #endif
+    }
+    __device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        return (unsigned int) __shfl_down((int) val, delta, width);
+    #else
+        return 0;
+    #endif
+    }
+    __device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        int lo = __double2loint(val);
+        int hi = __double2hiint(val);
+
+        lo = __shfl_down(lo, delta, width);
+        hi = __shfl_down(hi, delta, width);
+
+        return __hiloint2double(hi, lo);
+    #else
+        return 0.0;
+    #endif
+    }
+
+    template <typename T>
+    __device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        return __shfl_up(val, delta, width);
+    #else
+        return T();
+    #endif
+    }
+    __device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        return (unsigned int) __shfl_up((int) val, delta, width);
+    #else
+        return 0;
+    #endif
+    }
+    __device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        int lo = __double2loint(val);
+        int hi = __double2hiint(val);
+
+        lo = __shfl_up(lo, delta, width);
+        hi = __shfl_up(hi, delta, width);
+
+        return __hiloint2double(hi, lo);
+    #else
+        return 0.0;
+    #endif
+    }
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_WARP_SHUFFLE_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cuda_stream_accessor.hpp b/thirdparty/linux/include/opencv2/core/cuda_stream_accessor.hpp
new file mode 100644
index 0000000..deaf356
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda_stream_accessor.hpp
@@ -0,0 +1,86 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP
+#define OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP
+
+#ifndef __cplusplus
+#  error cuda_stream_accessor.hpp header must be compiled as C++
+#endif
+
+/** @file cuda_stream_accessor.hpp
+ * This is only header file that depends on CUDA Runtime API. All other headers are independent.
+ */
+
+#include <cuda_runtime.h>
+#include "opencv2/core/cuda.hpp"
+
+namespace cv
+{
+    namespace cuda
+    {
+
+//! @addtogroup cudacore_struct
+//! @{
+
+        /** @brief Class that enables getting cudaStream_t from cuda::Stream
+         */
+        struct StreamAccessor
+        {
+            CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
+            CV_EXPORTS static Stream wrapStream(cudaStream_t stream);
+        };
+
+        /** @brief Class that enables getting cudaEvent_t from cuda::Event
+         */
+        struct EventAccessor
+        {
+            CV_EXPORTS static cudaEvent_t getEvent(const Event& event);
+            CV_EXPORTS static Event wrapEvent(cudaEvent_t event);
+        };
+
+//! @}
+
+    }
+}
+
+#endif /* OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP */
diff --git a/thirdparty/linux/include/opencv2/core/cuda_types.hpp b/thirdparty/linux/include/opencv2/core/cuda_types.hpp
new file mode 100644
index 0000000..f13a847
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cuda_types.hpp
@@ -0,0 +1,135 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CUDA_TYPES_HPP
+#define OPENCV_CORE_CUDA_TYPES_HPP
+
+#ifndef __cplusplus
+#  error cuda_types.hpp header must be compiled as C++
+#endif
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+#ifdef __CUDACC__
+    #define __CV_CUDA_HOST_DEVICE__ __host__ __device__ __forceinline__
+#else
+    #define __CV_CUDA_HOST_DEVICE__
+#endif
+
+namespace cv
+{
+    namespace cuda
+    {
+
+        // Simple lightweight structures that encapsulates information about an image on device.
+        // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
+
+        template <typename T> struct DevPtr
+        {
+            typedef T elem_type;
+            typedef int index_type;
+
+            enum { elem_size = sizeof(elem_type) };
+
+            T* data;
+
+            __CV_CUDA_HOST_DEVICE__ DevPtr() : data(0) {}
+            __CV_CUDA_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
+
+            __CV_CUDA_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
+            __CV_CUDA_HOST_DEVICE__ operator       T*()       { return data; }
+            __CV_CUDA_HOST_DEVICE__ operator const T*() const { return data; }
+        };
+
+        template <typename T> struct PtrSz : public DevPtr<T>
+        {
+            __CV_CUDA_HOST_DEVICE__ PtrSz() : size(0) {}
+            __CV_CUDA_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
+
+            size_t size;
+        };
+
+        template <typename T> struct PtrStep : public DevPtr<T>
+        {
+            __CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {}
+            __CV_CUDA_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
+
+            size_t step;
+
+            __CV_CUDA_HOST_DEVICE__       T* ptr(int y = 0)       { return (      T*)( (      char*)DevPtr<T>::data + y * step); }
+            __CV_CUDA_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
+
+            __CV_CUDA_HOST_DEVICE__       T& operator ()(int y, int x)       { return ptr(y)[x]; }
+            __CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
+        };
+
+        template <typename T> struct PtrStepSz : public PtrStep<T>
+        {
+            __CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
+            __CV_CUDA_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
+                : PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
+
+            template <typename U>
+            explicit PtrStepSz(const PtrStepSz<U>& d) : PtrStep<T>((T*)d.data, d.step), cols(d.cols), rows(d.rows){}
+
+            int cols;
+            int rows;
+        };
+
+        typedef PtrStepSz<unsigned char> PtrStepSzb;
+        typedef PtrStepSz<float> PtrStepSzf;
+        typedef PtrStepSz<int> PtrStepSzi;
+
+        typedef PtrStep<unsigned char> PtrStepb;
+        typedef PtrStep<float> PtrStepf;
+        typedef PtrStep<int> PtrStepi;
+
+    }
+}
+
+//! @endcond
+
+#endif /* OPENCV_CORE_CUDA_TYPES_HPP */
diff --git a/thirdparty/linux/include/opencv2/core/cvdef.h b/thirdparty/linux/include/opencv2/core/cvdef.h
new file mode 100644
index 0000000..699b166
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cvdef.h
@@ -0,0 +1,481 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CVDEF_H
+#define OPENCV_CORE_CVDEF_H
+
+//! @addtogroup core_utils
+//! @{
+
+#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
+#  define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
+#endif
+
+// undef problematic defines sometimes defined by system headers (windows.h in particular)
+#undef small
+#undef min
+#undef max
+#undef abs
+#undef Complex
+
+#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
+#  define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
+#endif
+
+#include <limits.h>
+#include "opencv2/core/hal/interface.h"
+
+#if defined __ICL
+#  define CV_ICC   __ICL
+#elif defined __ICC
+#  define CV_ICC   __ICC
+#elif defined __ECL
+#  define CV_ICC   __ECL
+#elif defined __ECC
+#  define CV_ICC   __ECC
+#elif defined __INTEL_COMPILER
+#  define CV_ICC   __INTEL_COMPILER
+#endif
+
+#ifndef CV_INLINE
+#  if defined __cplusplus
+#    define CV_INLINE static inline
+#  elif defined _MSC_VER
+#    define CV_INLINE __inline
+#  else
+#    define CV_INLINE static
+#  endif
+#endif
+
+#if defined CV_ICC && !defined CV_ENABLE_UNROLLED
+#  define CV_ENABLE_UNROLLED 0
+#else
+#  define CV_ENABLE_UNROLLED 1
+#endif
+
+#ifdef __GNUC__
+#  define CV_DECL_ALIGNED(x) __attribute__ ((aligned (x)))
+#elif defined _MSC_VER
+#  define CV_DECL_ALIGNED(x) __declspec(align(x))
+#else
+#  define CV_DECL_ALIGNED(x)
+#endif
+
+/* CPU features and intrinsics support */
+#define CV_CPU_NONE             0
+#define CV_CPU_MMX              1
+#define CV_CPU_SSE              2
+#define CV_CPU_SSE2             3
+#define CV_CPU_SSE3             4
+#define CV_CPU_SSSE3            5
+#define CV_CPU_SSE4_1           6
+#define CV_CPU_SSE4_2           7
+#define CV_CPU_POPCNT           8
+#define CV_CPU_FP16             9
+#define CV_CPU_AVX              10
+#define CV_CPU_AVX2             11
+#define CV_CPU_FMA3             12
+
+#define CV_CPU_AVX_512F         13
+#define CV_CPU_AVX_512BW        14
+#define CV_CPU_AVX_512CD        15
+#define CV_CPU_AVX_512DQ        16
+#define CV_CPU_AVX_512ER        17
+#define CV_CPU_AVX_512IFMA512   18
+#define CV_CPU_AVX_512PF        19
+#define CV_CPU_AVX_512VBMI      20
+#define CV_CPU_AVX_512VL        21
+
+#define CV_CPU_NEON   100
+
+// when adding to this list remember to update the following enum
+#define CV_HARDWARE_MAX_FEATURE 255
+
+/** @brief Available CPU features.
+*/
+enum CpuFeatures {
+    CPU_MMX             = 1,
+    CPU_SSE             = 2,
+    CPU_SSE2            = 3,
+    CPU_SSE3            = 4,
+    CPU_SSSE3           = 5,
+    CPU_SSE4_1          = 6,
+    CPU_SSE4_2          = 7,
+    CPU_POPCNT          = 8,
+    CPU_FP16            = 9,
+    CPU_AVX             = 10,
+    CPU_AVX2            = 11,
+    CPU_FMA3            = 12,
+
+    CPU_AVX_512F        = 13,
+    CPU_AVX_512BW       = 14,
+    CPU_AVX_512CD       = 15,
+    CPU_AVX_512DQ       = 16,
+    CPU_AVX_512ER       = 17,
+    CPU_AVX_512IFMA512  = 18,
+    CPU_AVX_512PF       = 19,
+    CPU_AVX_512VBMI     = 20,
+    CPU_AVX_512VL       = 21,
+
+    CPU_NEON            = 100
+};
+
+// do not include SSE/AVX/NEON headers for NVCC compiler
+#ifndef __CUDACC__
+
+#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
+#  include <emmintrin.h>
+#  define CV_MMX 1
+#  define CV_SSE 1
+#  define CV_SSE2 1
+#  if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
+#    include <pmmintrin.h>
+#    define CV_SSE3 1
+#  endif
+#  if defined __SSSE3__  || (defined _MSC_VER && _MSC_VER >= 1500)
+#    include <tmmintrin.h>
+#    define CV_SSSE3 1
+#  endif
+#  if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500)
+#    include <smmintrin.h>
+#    define CV_SSE4_1 1
+#  endif
+#  if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500)
+#    include <nmmintrin.h>
+#    define CV_SSE4_2 1
+#  endif
+#  if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500)
+#    ifdef _MSC_VER
+#      include <nmmintrin.h>
+#    else
+#      include <popcntintrin.h>
+#    endif
+#    define CV_POPCNT 1
+#  endif
+#  if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0)
+// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
+// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
+#    include <immintrin.h>
+#    define CV_AVX 1
+#    if defined(_XCR_XFEATURE_ENABLED_MASK)
+#      define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
+#    else
+#      define __xgetbv() 0
+#    endif
+#  endif
+#  if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0)
+#    include <immintrin.h>
+#    define CV_AVX2 1
+#    if defined __FMA__
+#      define CV_FMA3 1
+#    endif
+#  endif
+#endif
+
+#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
+# include <Intrin.h>
+# include <arm_neon.h>
+# define CV_NEON 1
+# define CPU_HAS_NEON_FEATURE (true)
+#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
+#  include <arm_neon.h>
+#  define CV_NEON 1
+#endif
+
+#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
+#  define CV_VFP 1
+#endif
+
+#endif // __CUDACC__
+
+#ifndef CV_POPCNT
+#define CV_POPCNT 0
+#endif
+#ifndef CV_MMX
+#  define CV_MMX 0
+#endif
+#ifndef CV_SSE
+#  define CV_SSE 0
+#endif
+#ifndef CV_SSE2
+#  define CV_SSE2 0
+#endif
+#ifndef CV_SSE3
+#  define CV_SSE3 0
+#endif
+#ifndef CV_SSSE3
+#  define CV_SSSE3 0
+#endif
+#ifndef CV_SSE4_1
+#  define CV_SSE4_1 0
+#endif
+#ifndef CV_SSE4_2
+#  define CV_SSE4_2 0
+#endif
+#ifndef CV_AVX
+#  define CV_AVX 0
+#endif
+#ifndef CV_AVX2
+#  define CV_AVX2 0
+#endif
+#ifndef CV_FMA3
+#  define CV_FMA3 0
+#endif
+#ifndef CV_AVX_512F
+#  define CV_AVX_512F 0
+#endif
+#ifndef CV_AVX_512BW
+#  define CV_AVX_512BW 0
+#endif
+#ifndef CV_AVX_512CD
+#  define CV_AVX_512CD 0
+#endif
+#ifndef CV_AVX_512DQ
+#  define CV_AVX_512DQ 0
+#endif
+#ifndef CV_AVX_512ER
+#  define CV_AVX_512ER 0
+#endif
+#ifndef CV_AVX_512IFMA512
+#  define CV_AVX_512IFMA512 0
+#endif
+#ifndef CV_AVX_512PF
+#  define CV_AVX_512PF 0
+#endif
+#ifndef CV_AVX_512VBMI
+#  define CV_AVX_512VBMI 0
+#endif
+#ifndef CV_AVX_512VL
+#  define CV_AVX_512VL 0
+#endif
+
+#ifndef CV_NEON
+#  define CV_NEON 0
+#endif
+
+#ifndef CV_VFP
+#  define CV_VFP 0
+#endif
+
+/* fundamental constants */
+#define CV_PI   3.1415926535897932384626433832795
+#define CV_2PI 6.283185307179586476925286766559
+#define CV_LOG2 0.69314718055994530941723212145818
+
+#if defined __ARM_FP16_FORMAT_IEEE \
+    && !defined __CUDACC__
+#  define CV_FP16_TYPE 1
+#else
+#  define CV_FP16_TYPE 0
+#endif
+
+typedef union Cv16suf
+{
+    short i;
+#if CV_FP16_TYPE
+    __fp16 h;
+#endif
+    struct _fp16Format
+    {
+        unsigned int significand : 10;
+        unsigned int exponent    : 5;
+        unsigned int sign        : 1;
+    } fmt;
+}
+Cv16suf;
+
+typedef union Cv32suf
+{
+    int i;
+    unsigned u;
+    float f;
+    struct _fp32Format
+    {
+        unsigned int significand : 23;
+        unsigned int exponent    : 8;
+        unsigned int sign        : 1;
+    } fmt;
+}
+Cv32suf;
+
+typedef union Cv64suf
+{
+    int64 i;
+    uint64 u;
+    double f;
+}
+Cv64suf;
+
+#define OPENCV_ABI_COMPATIBILITY 300
+
+#ifdef __OPENCV_BUILD
+#  define DISABLE_OPENCV_24_COMPATIBILITY
+#endif
+
+#if (defined WIN32 || defined _WIN32 || defined WINCE || defined __CYGWIN__) && defined CVAPI_EXPORTS
+#  define CV_EXPORTS __declspec(dllexport)
+#elif defined __GNUC__ && __GNUC__ >= 4
+#  define CV_EXPORTS __attribute__ ((visibility ("default")))
+#else
+#  define CV_EXPORTS
+#endif
+
+#ifndef CV_EXTERN_C
+#  ifdef __cplusplus
+#    define CV_EXTERN_C extern "C"
+#  else
+#    define CV_EXTERN_C
+#  endif
+#endif
+
+/* special informative macros for wrapper generators */
+#define CV_EXPORTS_W CV_EXPORTS
+#define CV_EXPORTS_W_SIMPLE CV_EXPORTS
+#define CV_EXPORTS_AS(synonym) CV_EXPORTS
+#define CV_EXPORTS_W_MAP CV_EXPORTS
+#define CV_IN_OUT
+#define CV_OUT
+#define CV_PROP
+#define CV_PROP_RW
+#define CV_WRAP
+#define CV_WRAP_AS(synonym)
+
+/****************************************************************************************\
+*                                  Matrix type (Mat)                                     *
+\****************************************************************************************/
+
+#define CV_MAT_CN_MASK          ((CV_CN_MAX - 1) << CV_CN_SHIFT)
+#define CV_MAT_CN(flags)        ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)
+#define CV_MAT_TYPE_MASK        (CV_DEPTH_MAX*CV_CN_MAX - 1)
+#define CV_MAT_TYPE(flags)      ((flags) & CV_MAT_TYPE_MASK)
+#define CV_MAT_CONT_FLAG_SHIFT  14
+#define CV_MAT_CONT_FLAG        (1 << CV_MAT_CONT_FLAG_SHIFT)
+#define CV_IS_MAT_CONT(flags)   ((flags) & CV_MAT_CONT_FLAG)
+#define CV_IS_CONT_MAT          CV_IS_MAT_CONT
+#define CV_SUBMAT_FLAG_SHIFT    15
+#define CV_SUBMAT_FLAG          (1 << CV_SUBMAT_FLAG_SHIFT)
+#define CV_IS_SUBMAT(flags)     ((flags) & CV_MAT_SUBMAT_FLAG)
+
+/** Size of each channel item,
+   0x124489 = 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
+#define CV_ELEM_SIZE1(type) \
+    ((((sizeof(size_t)<<28)|0x8442211) >> CV_MAT_DEPTH(type)*4) & 15)
+
+/** 0x3a50 = 11 10 10 01 01 00 00 ~ array of log2(sizeof(arr_type_elem)) */
+#define CV_ELEM_SIZE(type) \
+    (CV_MAT_CN(type) << ((((sizeof(size_t)/4+1)*16384|0x3a50) >> CV_MAT_DEPTH(type)*2) & 3))
+
+#ifndef MIN
+#  define MIN(a,b)  ((a) > (b) ? (b) : (a))
+#endif
+
+#ifndef MAX
+#  define MAX(a,b)  ((a) < (b) ? (b) : (a))
+#endif
+
+/****************************************************************************************\
+*          exchange-add operation for atomic operations on reference counters            *
+\****************************************************************************************/
+
+#ifdef CV_XADD
+  // allow to use user-defined macro
+#elif defined __GNUC__
+#  if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)
+#    ifdef __ATOMIC_ACQ_REL
+#      define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
+#    else
+#      define CV_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4)
+#    endif
+#  else
+#    if defined __ATOMIC_ACQ_REL && !defined __clang__
+       // version for gcc >= 4.7
+#      define CV_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL)
+#    else
+#      define CV_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta))
+#    endif
+#  endif
+#elif defined _MSC_VER && !defined RC_INVOKED
+#  include <intrin.h>
+#  define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
+#else
+   CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
+#endif
+
+
+/****************************************************************************************\
+*                                  CV_NORETURN attribute                                 *
+\****************************************************************************************/
+
+#ifndef CV_NORETURN
+#  if defined(__GNUC__)
+#    define CV_NORETURN __attribute__((__noreturn__))
+#  elif defined(_MSC_VER) && (_MSC_VER >= 1300)
+#    define CV_NORETURN __declspec(noreturn)
+#  else
+#    define CV_NORETURN /* nothing by default */
+#  endif
+#endif
+
+
+/****************************************************************************************\
+*                                    C++ Move semantics                                  *
+\****************************************************************************************/
+
+#ifndef CV_CXX_MOVE_SEMANTICS
+#  if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) || defined(_MSC_VER) && _MSC_VER >= 1600
+#    define CV_CXX_MOVE_SEMANTICS 1
+#  elif defined(__clang)
+#    if __has_feature(cxx_rvalue_references)
+#      define CV_CXX_MOVE_SEMANTICS 1
+#    endif
+#  endif
+#else
+#  if CV_CXX_MOVE_SEMANTICS == 0
+#    undef CV_CXX_MOVE_SEMANTICS
+#  endif
+#endif
+
+//! @}
+
+#endif // OPENCV_CORE_CVDEF_H
diff --git a/thirdparty/linux/include/opencv2/core/cvstd.hpp b/thirdparty/linux/include/opencv2/core/cvstd.hpp
new file mode 100644
index 0000000..2d40bd0
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cvstd.hpp
@@ -0,0 +1,1066 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CVSTD_HPP
+#define OPENCV_CORE_CVSTD_HPP
+
+#ifndef __cplusplus
+#  error cvstd.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/cvdef.h"
+
+#include <cstddef>
+#include <cstring>
+#include <cctype>
+
+#ifndef OPENCV_NOSTL
+#  include <string>
+#endif
+
+// import useful primitives from stl
+#ifndef OPENCV_NOSTL_TRANSITIONAL
+#  include <algorithm>
+#  include <utility>
+#  include <cstdlib> //for abs(int)
+#  include <cmath>
+
+namespace cv
+{
+    static inline uchar abs(uchar a) { return a; }
+    static inline ushort abs(ushort a) { return a; }
+    static inline unsigned abs(unsigned a) { return a; }
+    static inline uint64 abs(uint64 a) { return a; }
+
+    using std::min;
+    using std::max;
+    using std::abs;
+    using std::swap;
+    using std::sqrt;
+    using std::exp;
+    using std::pow;
+    using std::log;
+}
+
+#else
+namespace cv
+{
+    template<typename T> static inline T min(T a, T b) { return a < b ? a : b; }
+    template<typename T> static inline T max(T a, T b) { return a > b ? a : b; }
+    template<typename T> static inline T abs(T a) { return a < 0 ? -a : a; }
+    template<typename T> static inline void swap(T& a, T& b) { T tmp = a; a = b; b = tmp; }
+
+    template<> inline uchar abs(uchar a) { return a; }
+    template<> inline ushort abs(ushort a) { return a; }
+    template<> inline unsigned abs(unsigned a) { return a; }
+    template<> inline uint64 abs(uint64 a) { return a; }
+}
+#endif
+
+namespace cv {
+
+//! @addtogroup core_utils
+//! @{
+
+//////////////////////////// memory management functions ////////////////////////////
+
+/** @brief Allocates an aligned memory buffer.
+
+The function allocates the buffer of the specified size and returns it. When the buffer size is 16
+bytes or more, the returned buffer is aligned to 16 bytes.
+@param bufSize Allocated buffer size.
+ */
+CV_EXPORTS void* fastMalloc(size_t bufSize);
+
+/** @brief Deallocates a memory buffer.
+
+The function deallocates the buffer allocated with fastMalloc . If NULL pointer is passed, the
+function does nothing. C version of the function clears the pointer *pptr* to avoid problems with
+double memory deallocation.
+@param ptr Pointer to the allocated buffer.
+ */
+CV_EXPORTS void fastFree(void* ptr);
+
+/*!
+  The STL-compilant memory Allocator based on cv::fastMalloc() and cv::fastFree()
+*/
+template<typename _Tp> class Allocator
+{
+public:
+    typedef _Tp value_type;
+    typedef value_type* pointer;
+    typedef const value_type* const_pointer;
+    typedef value_type& reference;
+    typedef const value_type& const_reference;
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+    template<typename U> class rebind { typedef Allocator<U> other; };
+
+    explicit Allocator() {}
+    ~Allocator() {}
+    explicit Allocator(Allocator const&) {}
+    template<typename U>
+    explicit Allocator(Allocator<U> const&) {}
+
+    // address
+    pointer address(reference r) { return &r; }
+    const_pointer address(const_reference r) { return &r; }
+
+    pointer allocate(size_type count, const void* =0) { return reinterpret_cast<pointer>(fastMalloc(count * sizeof (_Tp))); }
+    void deallocate(pointer p, size_type) { fastFree(p); }
+
+    void construct(pointer p, const _Tp& v) { new(static_cast<void*>(p)) _Tp(v); }
+    void destroy(pointer p) { p->~_Tp(); }
+
+    size_type max_size() const { return cv::max(static_cast<_Tp>(-1)/sizeof(_Tp), 1); }
+};
+
+//! @} core_utils
+
+//! @cond IGNORED
+
+namespace detail
+{
+
+// Metafunction to avoid taking a reference to void.
+template<typename T>
+struct RefOrVoid { typedef T& type; };
+
+template<>
+struct RefOrVoid<void>{ typedef void type; };
+
+template<>
+struct RefOrVoid<const void>{ typedef const void type; };
+
+template<>
+struct RefOrVoid<volatile void>{ typedef volatile void type; };
+
+template<>
+struct RefOrVoid<const volatile void>{ typedef const volatile void type; };
+
+// This class would be private to Ptr, if it didn't have to be a non-template.
+struct PtrOwner;
+
+}
+
+template<typename Y>
+struct DefaultDeleter
+{
+    void operator () (Y* p) const;
+};
+
+//! @endcond
+
+//! @addtogroup core_basic
+//! @{
+
+/** @brief Template class for smart pointers with shared ownership
+
+A Ptr\<T\> pretends to be a pointer to an object of type T. Unlike an ordinary pointer, however, the
+object will be automatically cleaned up once all Ptr instances pointing to it are destroyed.
+
+Ptr is similar to boost::shared_ptr that is part of the Boost library
+(<http://www.boost.org/doc/libs/release/libs/smart_ptr/shared_ptr.htm>) and std::shared_ptr from
+the [C++11](http://en.wikipedia.org/wiki/C++11) standard.
+
+This class provides the following advantages:
+-   Default constructor, copy constructor, and assignment operator for an arbitrary C++ class or C
+    structure. For some objects, like files, windows, mutexes, sockets, and others, a copy
+    constructor or an assignment operator are difficult to define. For some other objects, like
+    complex classifiers in OpenCV, copy constructors are absent and not easy to implement. Finally,
+    some of complex OpenCV and your own data structures may be written in C. However, copy
+    constructors and default constructors can simplify programming a lot. Besides, they are often
+    required (for example, by STL containers). By using a Ptr to such an object instead of the
+    object itself, you automatically get all of the necessary constructors and the assignment
+    operator.
+-   *O(1)* complexity of the above-mentioned operations. While some structures, like std::vector,
+    provide a copy constructor and an assignment operator, the operations may take a considerable
+    amount of time if the data structures are large. But if the structures are put into a Ptr, the
+    overhead is small and independent of the data size.
+-   Automatic and customizable cleanup, even for C structures. See the example below with FILE\*.
+-   Heterogeneous collections of objects. The standard STL and most other C++ and OpenCV containers
+    can store only objects of the same type and the same size. The classical solution to store
+    objects of different types in the same container is to store pointers to the base class (Base\*)
+    instead but then you lose the automatic memory management. Again, by using Ptr\<Base\> instead
+    of raw pointers, you can solve the problem.
+
+A Ptr is said to *own* a pointer - that is, for each Ptr there is a pointer that will be deleted
+once all Ptr instances that own it are destroyed. The owned pointer may be null, in which case
+nothing is deleted. Each Ptr also *stores* a pointer. The stored pointer is the pointer the Ptr
+pretends to be; that is, the one you get when you use Ptr::get or the conversion to T\*. It's
+usually the same as the owned pointer, but if you use casts or the general shared-ownership
+constructor, the two may diverge: the Ptr will still own the original pointer, but will itself point
+to something else.
+
+The owned pointer is treated as a black box. The only thing Ptr needs to know about it is how to
+delete it. This knowledge is encapsulated in the *deleter* - an auxiliary object that is associated
+with the owned pointer and shared between all Ptr instances that own it. The default deleter is an
+instance of DefaultDeleter, which uses the standard C++ delete operator; as such it will work with
+any pointer allocated with the standard new operator.
+
+However, if the pointer must be deleted in a different way, you must specify a custom deleter upon
+Ptr construction. A deleter is simply a callable object that accepts the pointer as its sole
+argument. For example, if you want to wrap FILE, you may do so as follows:
+@code
+    Ptr<FILE> f(fopen("myfile.txt", "w"), fclose);
+    if(!f) throw ...;
+    fprintf(f, ....);
+    ...
+    // the file will be closed automatically by f's destructor.
+@endcode
+Alternatively, if you want all pointers of a particular type to be deleted the same way, you can
+specialize DefaultDeleter<T>::operator() for that type, like this:
+@code
+    namespace cv {
+    template<> void DefaultDeleter<FILE>::operator ()(FILE * obj) const
+    {
+        fclose(obj);
+    }
+    }
+@endcode
+For convenience, the following types from the OpenCV C API already have such a specialization that
+calls the appropriate release function:
+-   CvCapture
+-   CvFileStorage
+-   CvHaarClassifierCascade
+-   CvMat
+-   CvMatND
+-   CvMemStorage
+-   CvSparseMat
+-   CvVideoWriter
+-   IplImage
+@note The shared ownership mechanism is implemented with reference counting. As such, cyclic
+ownership (e.g. when object a contains a Ptr to object b, which contains a Ptr to object a) will
+lead to all involved objects never being cleaned up. Avoid such situations.
+@note It is safe to concurrently read (but not write) a Ptr instance from multiple threads and
+therefore it is normally safe to use it in multi-threaded applications. The same is true for Mat and
+other C++ OpenCV classes that use internal reference counts.
+*/
+template<typename T>
+struct Ptr
+{
+    /** Generic programming support. */
+    typedef T element_type;
+
+    /** The default constructor creates a null Ptr - one that owns and stores a null pointer.
+    */
+    Ptr();
+
+    /**
+    If p is null, these are equivalent to the default constructor.
+    Otherwise, these constructors assume ownership of p - that is, the created Ptr owns and stores p
+    and assumes it is the sole owner of it. Don't use them if p is already owned by another Ptr, or
+    else p will get deleted twice.
+    With the first constructor, DefaultDeleter\<Y\>() becomes the associated deleter (so p will
+    eventually be deleted with the standard delete operator). Y must be a complete type at the point
+    of invocation.
+    With the second constructor, d becomes the associated deleter.
+    Y\* must be convertible to T\*.
+    @param p Pointer to own.
+    @note It is often easier to use makePtr instead.
+     */
+    template<typename Y>
+#ifdef DISABLE_OPENCV_24_COMPATIBILITY
+    explicit
+#endif
+    Ptr(Y* p);
+
+    /** @overload
+    @param d Deleter to use for the owned pointer.
+    @param p Pointer to own.
+    */
+    template<typename Y, typename D>
+    Ptr(Y* p, D d);
+
+    /**
+    These constructors create a Ptr that shares ownership with another Ptr - that is, own the same
+    pointer as o.
+    With the first two, the same pointer is stored, as well; for the second, Y\* must be convertible
+    to T\*.
+    With the third, p is stored, and Y may be any type. This constructor allows to have completely
+    unrelated owned and stored pointers, and should be used with care to avoid confusion. A relatively
+    benign use is to create a non-owning Ptr, like this:
+    @code
+        ptr = Ptr<T>(Ptr<T>(), dont_delete_me); // owns nothing; will not delete the pointer.
+    @endcode
+    @param o Ptr to share ownership with.
+    */
+    Ptr(const Ptr& o);
+
+    /** @overload
+    @param o Ptr to share ownership with.
+    */
+    template<typename Y>
+    Ptr(const Ptr<Y>& o);
+
+    /** @overload
+    @param o Ptr to share ownership with.
+    @param p Pointer to store.
+    */
+    template<typename Y>
+    Ptr(const Ptr<Y>& o, T* p);
+
+    /** The destructor is equivalent to calling Ptr::release. */
+    ~Ptr();
+
+    /**
+    Assignment replaces the current Ptr instance with one that owns and stores same pointers as o and
+    then destroys the old instance.
+    @param o Ptr to share ownership with.
+     */
+    Ptr& operator = (const Ptr& o);
+
+    /** @overload */
+    template<typename Y>
+    Ptr& operator = (const Ptr<Y>& o);
+
+    /** If no other Ptr instance owns the owned pointer, deletes it with the associated deleter. Then sets
+    both the owned and the stored pointers to NULL.
+    */
+    void release();
+
+    /**
+    `ptr.reset(...)` is equivalent to `ptr = Ptr<T>(...)`.
+    @param p Pointer to own.
+    */
+    template<typename Y>
+    void reset(Y* p);
+
+    /** @overload
+    @param d Deleter to use for the owned pointer.
+    @param p Pointer to own.
+    */
+    template<typename Y, typename D>
+    void reset(Y* p, D d);
+
+    /**
+    Swaps the owned and stored pointers (and deleters, if any) of this and o.
+    @param o Ptr to swap with.
+    */
+    void swap(Ptr& o);
+
+    /** Returns the stored pointer. */
+    T* get() const;
+
+    /** Ordinary pointer emulation. */
+    typename detail::RefOrVoid<T>::type operator * () const;
+
+    /** Ordinary pointer emulation. */
+    T* operator -> () const;
+
+    /** Equivalent to get(). */
+    operator T* () const;
+
+    /** ptr.empty() is equivalent to `!ptr.get()`. */
+    bool empty() const;
+
+    /** Returns a Ptr that owns the same pointer as this, and stores the same
+       pointer as this, except converted via static_cast to Y*.
+    */
+    template<typename Y>
+    Ptr<Y> staticCast() const;
+
+    /** Ditto for const_cast. */
+    template<typename Y>
+    Ptr<Y> constCast() const;
+
+    /** Ditto for dynamic_cast. */
+    template<typename Y>
+    Ptr<Y> dynamicCast() const;
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+    Ptr(Ptr&& o);
+    Ptr& operator = (Ptr&& o);
+#endif
+
+private:
+    detail::PtrOwner* owner;
+    T* stored;
+
+    template<typename Y>
+    friend struct Ptr; // have to do this for the cross-type copy constructor
+};
+
+/** Equivalent to ptr1.swap(ptr2). Provided to help write generic algorithms. */
+template<typename T>
+void swap(Ptr<T>& ptr1, Ptr<T>& ptr2);
+
+/** Return whether ptr1.get() and ptr2.get() are equal and not equal, respectively. */
+template<typename T>
+bool operator == (const Ptr<T>& ptr1, const Ptr<T>& ptr2);
+template<typename T>
+bool operator != (const Ptr<T>& ptr1, const Ptr<T>& ptr2);
+
+/** `makePtr<T>(...)` is equivalent to `Ptr<T>(new T(...))`. It is shorter than the latter, and it's
+marginally safer than using a constructor or Ptr::reset, since it ensures that the owned pointer
+is new and thus not owned by any other Ptr instance.
+Unfortunately, perfect forwarding is impossible to implement in C++03, and so makePtr is limited
+to constructors of T that have up to 10 arguments, none of which are non-const references.
+ */
+template<typename T>
+Ptr<T> makePtr();
+/** @overload */
+template<typename T, typename A1>
+Ptr<T> makePtr(const A1& a1);
+/** @overload */
+template<typename T, typename A1, typename A2>
+Ptr<T> makePtr(const A1& a1, const A2& a2);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3, typename A4>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10);
+
+//////////////////////////////// string class ////////////////////////////////
+
+class CV_EXPORTS FileNode; //for string constructor from FileNode
+
+class CV_EXPORTS String
+{
+public:
+    typedef char value_type;
+    typedef char& reference;
+    typedef const char& const_reference;
+    typedef char* pointer;
+    typedef const char* const_pointer;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+    typedef char* iterator;
+    typedef const char* const_iterator;
+
+    static const size_t npos = size_t(-1);
+
+    explicit String();
+    String(const String& str);
+    String(const String& str, size_t pos, size_t len = npos);
+    String(const char* s);
+    String(const char* s, size_t n);
+    String(size_t n, char c);
+    String(const char* first, const char* last);
+    template<typename Iterator> String(Iterator first, Iterator last);
+    explicit String(const FileNode& fn);
+    ~String();
+
+    String& operator=(const String& str);
+    String& operator=(const char* s);
+    String& operator=(char c);
+
+    String& operator+=(const String& str);
+    String& operator+=(const char* s);
+    String& operator+=(char c);
+
+    size_t size() const;
+    size_t length() const;
+
+    char operator[](size_t idx) const;
+    char operator[](int idx) const;
+
+    const char* begin() const;
+    const char* end() const;
+
+    const char* c_str() const;
+
+    bool empty() const;
+    void clear();
+
+    int compare(const char* s) const;
+    int compare(const String& str) const;
+
+    void swap(String& str);
+    String substr(size_t pos = 0, size_t len = npos) const;
+
+    size_t find(const char* s, size_t pos, size_t n) const;
+    size_t find(char c, size_t pos = 0) const;
+    size_t find(const String& str, size_t pos = 0) const;
+    size_t find(const char* s, size_t pos = 0) const;
+
+    size_t rfind(const char* s, size_t pos, size_t n) const;
+    size_t rfind(char c, size_t pos = npos) const;
+    size_t rfind(const String& str, size_t pos = npos) const;
+    size_t rfind(const char* s, size_t pos = npos) const;
+
+    size_t find_first_of(const char* s, size_t pos, size_t n) const;
+    size_t find_first_of(char c, size_t pos = 0) const;
+    size_t find_first_of(const String& str, size_t pos = 0) const;
+    size_t find_first_of(const char* s, size_t pos = 0) const;
+
+    size_t find_last_of(const char* s, size_t pos, size_t n) const;
+    size_t find_last_of(char c, size_t pos = npos) const;
+    size_t find_last_of(const String& str, size_t pos = npos) const;
+    size_t find_last_of(const char* s, size_t pos = npos) const;
+
+    friend String operator+ (const String& lhs, const String& rhs);
+    friend String operator+ (const String& lhs, const char*   rhs);
+    friend String operator+ (const char*   lhs, const String& rhs);
+    friend String operator+ (const String& lhs, char          rhs);
+    friend String operator+ (char          lhs, const String& rhs);
+
+    String toLowerCase() const;
+
+#ifndef OPENCV_NOSTL
+    String(const std::string& str);
+    String(const std::string& str, size_t pos, size_t len = npos);
+    String& operator=(const std::string& str);
+    String& operator+=(const std::string& str);
+    operator std::string() const;
+
+    friend String operator+ (const String& lhs, const std::string& rhs);
+    friend String operator+ (const std::string& lhs, const String& rhs);
+#endif
+
+private:
+    char*  cstr_;
+    size_t len_;
+
+    char* allocate(size_t len); // len without trailing 0
+    void deallocate();
+
+    String(int); // disabled and invalid. Catch invalid usages like, commandLineParser.has(0) problem
+};
+
+//! @} core_basic
+
+////////////////////////// cv::String implementation /////////////////////////
+
+//! @cond IGNORED
+
+inline
+String::String()
+    : cstr_(0), len_(0)
+{}
+
+inline
+String::String(const String& str)
+    : cstr_(str.cstr_), len_(str.len_)
+{
+    if (cstr_)
+        CV_XADD(((int*)cstr_)-1, 1);
+}
+
+inline
+String::String(const String& str, size_t pos, size_t len)
+    : cstr_(0), len_(0)
+{
+    pos = min(pos, str.len_);
+    len = min(str.len_ - pos, len);
+    if (!len) return;
+    if (len == str.len_)
+    {
+        CV_XADD(((int*)str.cstr_)-1, 1);
+        cstr_ = str.cstr_;
+        len_ = str.len_;
+        return;
+    }
+    memcpy(allocate(len), str.cstr_ + pos, len);
+}
+
+inline
+String::String(const char* s)
+    : cstr_(0), len_(0)
+{
+    if (!s) return;
+    size_t len = strlen(s);
+    memcpy(allocate(len), s, len);
+}
+
+inline
+String::String(const char* s, size_t n)
+    : cstr_(0), len_(0)
+{
+    if (!n) return;
+    memcpy(allocate(n), s, n);
+}
+
+inline
+String::String(size_t n, char c)
+    : cstr_(0), len_(0)
+{
+    memset(allocate(n), c, n);
+}
+
+inline
+String::String(const char* first, const char* last)
+    : cstr_(0), len_(0)
+{
+    size_t len = (size_t)(last - first);
+    memcpy(allocate(len), first, len);
+}
+
+template<typename Iterator> inline
+String::String(Iterator first, Iterator last)
+    : cstr_(0), len_(0)
+{
+    size_t len = (size_t)(last - first);
+    char* str = allocate(len);
+    while (first != last)
+    {
+        *str++ = *first;
+        ++first;
+    }
+}
+
+inline
+String::~String()
+{
+    deallocate();
+}
+
+inline
+String& String::operator=(const String& str)
+{
+    if (&str == this) return *this;
+
+    deallocate();
+    if (str.cstr_) CV_XADD(((int*)str.cstr_)-1, 1);
+    cstr_ = str.cstr_;
+    len_ = str.len_;
+    return *this;
+}
+
+inline
+String& String::operator=(const char* s)
+{
+    deallocate();
+    if (!s) return *this;
+    size_t len = strlen(s);
+    memcpy(allocate(len), s, len);
+    return *this;
+}
+
+inline
+String& String::operator=(char c)
+{
+    deallocate();
+    allocate(1)[0] = c;
+    return *this;
+}
+
+inline
+String& String::operator+=(const String& str)
+{
+    *this = *this + str;
+    return *this;
+}
+
+inline
+String& String::operator+=(const char* s)
+{
+    *this = *this + s;
+    return *this;
+}
+
+inline
+String& String::operator+=(char c)
+{
+    *this = *this + c;
+    return *this;
+}
+
+inline
+size_t String::size() const
+{
+    return len_;
+}
+
+inline
+size_t String::length() const
+{
+    return len_;
+}
+
+inline
+char String::operator[](size_t idx) const
+{
+    return cstr_[idx];
+}
+
+inline
+char String::operator[](int idx) const
+{
+    return cstr_[idx];
+}
+
+inline
+const char* String::begin() const
+{
+    return cstr_;
+}
+
+inline
+const char* String::end() const
+{
+    return len_ ? cstr_ + 1 : 0;
+}
+
+inline
+bool String::empty() const
+{
+    return len_ == 0;
+}
+
+inline
+const char* String::c_str() const
+{
+    return cstr_ ? cstr_ : "";
+}
+
+inline
+void String::swap(String& str)
+{
+    cv::swap(cstr_, str.cstr_);
+    cv::swap(len_, str.len_);
+}
+
+inline
+void String::clear()
+{
+    deallocate();
+}
+
+inline
+int String::compare(const char* s) const
+{
+    if (cstr_ == s) return 0;
+    return strcmp(c_str(), s);
+}
+
+inline
+int String::compare(const String& str) const
+{
+    if (cstr_ == str.cstr_) return 0;
+    return strcmp(c_str(), str.c_str());
+}
+
+inline
+String String::substr(size_t pos, size_t len) const
+{
+    return String(*this, pos, len);
+}
+
+inline
+size_t String::find(const char* s, size_t pos, size_t n) const
+{
+    if (n == 0 || pos + n > len_) return npos;
+    const char* lmax = cstr_ + len_ - n;
+    for (const char* i = cstr_ + pos; i <= lmax; ++i)
+    {
+        size_t j = 0;
+        while (j < n && s[j] == i[j]) ++j;
+        if (j == n) return (size_t)(i - cstr_);
+    }
+    return npos;
+}
+
+inline
+size_t String::find(char c, size_t pos) const
+{
+    return find(&c, pos, 1);
+}
+
+inline
+size_t String::find(const String& str, size_t pos) const
+{
+    return find(str.c_str(), pos, str.len_);
+}
+
+inline
+size_t String::find(const char* s, size_t pos) const
+{
+    if (pos >= len_ || !s[0]) return npos;
+    const char* lmax = cstr_ + len_;
+    for (const char* i = cstr_ + pos; i < lmax; ++i)
+    {
+        size_t j = 0;
+        while (s[j] && s[j] == i[j])
+        {   if(i + j >= lmax) return npos;
+            ++j;
+        }
+        if (!s[j]) return (size_t)(i - cstr_);
+    }
+    return npos;
+}
+
+inline
+size_t String::rfind(const char* s, size_t pos, size_t n) const
+{
+    if (n > len_) return npos;
+    if (pos > len_ - n) pos = len_ - n;
+    for (const char* i = cstr_ + pos; i >= cstr_; --i)
+    {
+        size_t j = 0;
+        while (j < n && s[j] == i[j]) ++j;
+        if (j == n) return (size_t)(i - cstr_);
+    }
+    return npos;
+}
+
+inline
+size_t String::rfind(char c, size_t pos) const
+{
+    return rfind(&c, pos, 1);
+}
+
+inline
+size_t String::rfind(const String& str, size_t pos) const
+{
+    return rfind(str.c_str(), pos, str.len_);
+}
+
+inline
+size_t String::rfind(const char* s, size_t pos) const
+{
+    return rfind(s, pos, strlen(s));
+}
+
+inline
+size_t String::find_first_of(const char* s, size_t pos, size_t n) const
+{
+    if (n == 0 || pos + n > len_) return npos;
+    const char* lmax = cstr_ + len_;
+    for (const char* i = cstr_ + pos; i < lmax; ++i)
+    {
+        for (size_t j = 0; j < n; ++j)
+            if (s[j] == *i)
+                return (size_t)(i - cstr_);
+    }
+    return npos;
+}
+
+inline
+size_t String::find_first_of(char c, size_t pos) const
+{
+    return find_first_of(&c, pos, 1);
+}
+
+inline
+size_t String::find_first_of(const String& str, size_t pos) const
+{
+    return find_first_of(str.c_str(), pos, str.len_);
+}
+
+inline
+size_t String::find_first_of(const char* s, size_t pos) const
+{
+    if (len_ == 0) return npos;
+    if (pos >= len_ || !s[0]) return npos;
+    const char* lmax = cstr_ + len_;
+    for (const char* i = cstr_ + pos; i < lmax; ++i)
+    {
+        for (size_t j = 0; s[j]; ++j)
+            if (s[j] == *i)
+                return (size_t)(i - cstr_);
+    }
+    return npos;
+}
+
+inline
+size_t String::find_last_of(const char* s, size_t pos, size_t n) const
+{
+    if (len_ == 0) return npos;
+    if (pos >= len_) pos = len_ - 1;
+    for (const char* i = cstr_ + pos; i >= cstr_; --i)
+    {
+        for (size_t j = 0; j < n; ++j)
+            if (s[j] == *i)
+                return (size_t)(i - cstr_);
+    }
+    return npos;
+}
+
+inline
+size_t String::find_last_of(char c, size_t pos) const
+{
+    return find_last_of(&c, pos, 1);
+}
+
+inline
+size_t String::find_last_of(const String& str, size_t pos) const
+{
+    return find_last_of(str.c_str(), pos, str.len_);
+}
+
+inline
+size_t String::find_last_of(const char* s, size_t pos) const
+{
+    if (len_ == 0) return npos;
+    if (pos >= len_) pos = len_ - 1;
+    for (const char* i = cstr_ + pos; i >= cstr_; --i)
+    {
+        for (size_t j = 0; s[j]; ++j)
+            if (s[j] == *i)
+                return (size_t)(i - cstr_);
+    }
+    return npos;
+}
+
+inline
+String String::toLowerCase() const
+{
+    String res(cstr_, len_);
+
+    for (size_t i = 0; i < len_; ++i)
+        res.cstr_[i] = (char) ::tolower(cstr_[i]);
+
+    return res;
+}
+
+//! @endcond
+
+// ************************* cv::String non-member functions *************************
+
+//! @relates cv::String
+//! @{
+
+inline
+String operator + (const String& lhs, const String& rhs)
+{
+    String s;
+    s.allocate(lhs.len_ + rhs.len_);
+    memcpy(s.cstr_, lhs.cstr_, lhs.len_);
+    memcpy(s.cstr_ + lhs.len_, rhs.cstr_, rhs.len_);
+    return s;
+}
+
+inline
+String operator + (const String& lhs, const char* rhs)
+{
+    String s;
+    size_t rhslen = strlen(rhs);
+    s.allocate(lhs.len_ + rhslen);
+    memcpy(s.cstr_, lhs.cstr_, lhs.len_);
+    memcpy(s.cstr_ + lhs.len_, rhs, rhslen);
+    return s;
+}
+
+inline
+String operator + (const char* lhs, const String& rhs)
+{
+    String s;
+    size_t lhslen = strlen(lhs);
+    s.allocate(lhslen + rhs.len_);
+    memcpy(s.cstr_, lhs, lhslen);
+    memcpy(s.cstr_ + lhslen, rhs.cstr_, rhs.len_);
+    return s;
+}
+
+inline
+String operator + (const String& lhs, char rhs)
+{
+    String s;
+    s.allocate(lhs.len_ + 1);
+    memcpy(s.cstr_, lhs.cstr_, lhs.len_);
+    s.cstr_[lhs.len_] = rhs;
+    return s;
+}
+
+inline
+String operator + (char lhs, const String& rhs)
+{
+    String s;
+    s.allocate(rhs.len_ + 1);
+    s.cstr_[0] = lhs;
+    memcpy(s.cstr_ + 1, rhs.cstr_, rhs.len_);
+    return s;
+}
+
+static inline bool operator== (const String& lhs, const String& rhs) { return 0 == lhs.compare(rhs); }
+static inline bool operator== (const char*   lhs, const String& rhs) { return 0 == rhs.compare(lhs); }
+static inline bool operator== (const String& lhs, const char*   rhs) { return 0 == lhs.compare(rhs); }
+static inline bool operator!= (const String& lhs, const String& rhs) { return 0 != lhs.compare(rhs); }
+static inline bool operator!= (const char*   lhs, const String& rhs) { return 0 != rhs.compare(lhs); }
+static inline bool operator!= (const String& lhs, const char*   rhs) { return 0 != lhs.compare(rhs); }
+static inline bool operator<  (const String& lhs, const String& rhs) { return lhs.compare(rhs) <  0; }
+static inline bool operator<  (const char*   lhs, const String& rhs) { return rhs.compare(lhs) >  0; }
+static inline bool operator<  (const String& lhs, const char*   rhs) { return lhs.compare(rhs) <  0; }
+static inline bool operator<= (const String& lhs, const String& rhs) { return lhs.compare(rhs) <= 0; }
+static inline bool operator<= (const char*   lhs, const String& rhs) { return rhs.compare(lhs) >= 0; }
+static inline bool operator<= (const String& lhs, const char*   rhs) { return lhs.compare(rhs) <= 0; }
+static inline bool operator>  (const String& lhs, const String& rhs) { return lhs.compare(rhs) >  0; }
+static inline bool operator>  (const char*   lhs, const String& rhs) { return rhs.compare(lhs) <  0; }
+static inline bool operator>  (const String& lhs, const char*   rhs) { return lhs.compare(rhs) >  0; }
+static inline bool operator>= (const String& lhs, const String& rhs) { return lhs.compare(rhs) >= 0; }
+static inline bool operator>= (const char*   lhs, const String& rhs) { return rhs.compare(lhs) <= 0; }
+static inline bool operator>= (const String& lhs, const char*   rhs) { return lhs.compare(rhs) >= 0; }
+
+//! @} relates cv::String
+
+} // cv
+
+#ifndef OPENCV_NOSTL_TRANSITIONAL
+namespace std
+{
+    static inline void swap(cv::String& a, cv::String& b) { a.swap(b); }
+}
+#else
+namespace cv
+{
+    template<> inline
+    void swap<cv::String>(cv::String& a, cv::String& b)
+    {
+        a.swap(b);
+    }
+}
+#endif
+
+#include "opencv2/core/ptr.inl.hpp"
+
+#endif //OPENCV_CORE_CVSTD_HPP
diff --git a/thirdparty/linux/include/opencv2/core/cvstd.inl.hpp b/thirdparty/linux/include/opencv2/core/cvstd.inl.hpp
new file mode 100644
index 0000000..876def8
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/cvstd.inl.hpp
@@ -0,0 +1,267 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CVSTDINL_HPP
+#define OPENCV_CORE_CVSTDINL_HPP
+
+#ifndef OPENCV_NOSTL
+#  include <complex>
+#  include <ostream>
+#endif
+
+//! @cond IGNORED
+
+namespace cv
+{
+#ifndef OPENCV_NOSTL
+
+template<typename _Tp> class DataType< std::complex<_Tp> >
+{
+public:
+    typedef std::complex<_Tp>  value_type;
+    typedef value_type         work_type;
+    typedef _Tp                channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = 2,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels) };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+inline
+String::String(const std::string& str)
+    : cstr_(0), len_(0)
+{
+    if (!str.empty())
+    {
+        size_t len = str.size();
+        memcpy(allocate(len), str.c_str(), len);
+    }
+}
+
+inline
+String::String(const std::string& str, size_t pos, size_t len)
+    : cstr_(0), len_(0)
+{
+    size_t strlen = str.size();
+    pos = min(pos, strlen);
+    len = min(strlen - pos, len);
+    if (!len) return;
+    memcpy(allocate(len), str.c_str() + pos, len);
+}
+
+inline
+String& String::operator = (const std::string& str)
+{
+    deallocate();
+    if (!str.empty())
+    {
+        size_t len = str.size();
+        memcpy(allocate(len), str.c_str(), len);
+    }
+    return *this;
+}
+
+inline
+String& String::operator += (const std::string& str)
+{
+    *this = *this + str;
+    return *this;
+}
+
+inline
+String::operator std::string() const
+{
+    return std::string(cstr_, len_);
+}
+
+inline
+String operator + (const String& lhs, const std::string& rhs)
+{
+    String s;
+    size_t rhslen = rhs.size();
+    s.allocate(lhs.len_ + rhslen);
+    memcpy(s.cstr_, lhs.cstr_, lhs.len_);
+    memcpy(s.cstr_ + lhs.len_, rhs.c_str(), rhslen);
+    return s;
+}
+
+inline
+String operator + (const std::string& lhs, const String& rhs)
+{
+    String s;
+    size_t lhslen = lhs.size();
+    s.allocate(lhslen + rhs.len_);
+    memcpy(s.cstr_, lhs.c_str(), lhslen);
+    memcpy(s.cstr_ + lhslen, rhs.cstr_, rhs.len_);
+    return s;
+}
+
+inline
+FileNode::operator std::string() const
+{
+    String value;
+    read(*this, value, value);
+    return value;
+}
+
+template<> inline
+void operator >> (const FileNode& n, std::string& value)
+{
+    String val;
+    read(n, val, val);
+    value = val;
+}
+
+template<> inline
+FileStorage& operator << (FileStorage& fs, const std::string& value)
+{
+    return fs << cv::String(value);
+}
+
+static inline
+std::ostream& operator << (std::ostream& os, const String& str)
+{
+    return os << str.c_str();
+}
+
+static inline
+std::ostream& operator << (std::ostream& out, Ptr<Formatted> fmtd)
+{
+    fmtd->reset();
+    for(const char* str = fmtd->next(); str; str = fmtd->next())
+        out << str;
+    return out;
+}
+
+static inline
+std::ostream& operator << (std::ostream& out, const Mat& mtx)
+{
+    return out << Formatter::get()->format(mtx);
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const std::vector<Point_<_Tp> >& vec)
+{
+    return out << Formatter::get()->format(Mat(vec));
+}
+
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const std::vector<Point3_<_Tp> >& vec)
+{
+    return out << Formatter::get()->format(Mat(vec));
+}
+
+
+template<typename _Tp, int m, int n> static inline
+std::ostream& operator << (std::ostream& out, const Matx<_Tp, m, n>& matx)
+{
+    return out << Formatter::get()->format(Mat(matx));
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const Point_<_Tp>& p)
+{
+    out << "[" << p.x << ", " << p.y << "]";
+    return out;
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const Point3_<_Tp>& p)
+{
+    out << "[" << p.x << ", " << p.y << ", " << p.z << "]";
+    return out;
+}
+
+template<typename _Tp, int n> static inline
+std::ostream& operator << (std::ostream& out, const Vec<_Tp, n>& vec)
+{
+    out << "[";
+#ifdef _MSC_VER
+#pragma warning( push )
+#pragma warning( disable: 4127 )
+#endif
+    if(Vec<_Tp, n>::depth < CV_32F)
+#ifdef _MSC_VER
+#pragma warning( pop )
+#endif
+    {
+        for (int i = 0; i < n - 1; ++i) {
+            out << (int)vec[i] << ", ";
+        }
+        out << (int)vec[n-1] << "]";
+    }
+    else
+    {
+        for (int i = 0; i < n - 1; ++i) {
+            out << vec[i] << ", ";
+        }
+        out << vec[n-1] << "]";
+    }
+
+    return out;
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const Size_<_Tp>& size)
+{
+    return out << "[" << size.width << " x " << size.height << "]";
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect)
+{
+    return out << "[" << rect.width << " x " << rect.height << " from (" << rect.x << ", " << rect.y << ")]";
+}
+
+
+#endif // OPENCV_NOSTL
+} // cv
+
+//! @endcond
+
+#endif // OPENCV_CORE_CVSTDINL_HPP
diff --git a/thirdparty/linux/include/opencv2/core/directx.hpp b/thirdparty/linux/include/opencv2/core/directx.hpp
new file mode 100644
index 0000000..056a85a
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/directx.hpp
@@ -0,0 +1,184 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the copyright holders or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_DIRECTX_HPP
+#define OPENCV_CORE_DIRECTX_HPP
+
+#include "mat.hpp"
+#include "ocl.hpp"
+
+#if !defined(__d3d11_h__)
+struct ID3D11Device;
+struct ID3D11Texture2D;
+#endif
+
+#if !defined(__d3d10_h__)
+struct ID3D10Device;
+struct ID3D10Texture2D;
+#endif
+
+#if !defined(_D3D9_H_)
+struct IDirect3DDevice9;
+struct IDirect3DDevice9Ex;
+struct IDirect3DSurface9;
+#endif
+
+
+namespace cv { namespace directx {
+
+namespace ocl {
+using namespace cv::ocl;
+
+//! @addtogroup core_directx
+// This section describes OpenCL and DirectX interoperability.
+//
+// To enable DirectX support, configure OpenCV using CMake with WITH_DIRECTX=ON . Note, DirectX is
+// supported only on Windows.
+//
+// To use OpenCL functionality you should first initialize OpenCL context from DirectX resource.
+//
+//! @{
+
+// TODO static functions in the Context class
+//! @brief Creates OpenCL context from D3D11 device
+//
+//! @param pD3D11Device - pointer to D3D11 device
+//! @return Returns reference to OpenCL Context
+CV_EXPORTS Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device);
+
+//! @brief Creates OpenCL context from D3D10 device
+//
+//! @param pD3D10Device - pointer to D3D10 device
+//! @return Returns reference to OpenCL Context
+CV_EXPORTS Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device);
+
+//! @brief Creates OpenCL context from Direct3DDevice9Ex device
+//
+//! @param pDirect3DDevice9Ex - pointer to Direct3DDevice9Ex device
+//! @return Returns reference to OpenCL Context
+CV_EXPORTS Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDevice9Ex);
+
+//! @brief Creates OpenCL context from Direct3DDevice9 device
+//
+//! @param pDirect3DDevice9 - pointer to Direct3Device9 device
+//! @return Returns reference to OpenCL Context
+CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9);
+
+//! @}
+
+} // namespace cv::directx::ocl
+
+//! @addtogroup core_directx
+//! @{
+
+//! @brief Converts InputArray to ID3D11Texture2D. If destination texture format is DXGI_FORMAT_NV12 then
+//!        input UMat expected to be in BGR format and data will be downsampled and color-converted to NV12.
+//
+//! @note Note: Destination texture must be allocated by application. Function does memory copy from src to
+//!             pD3D11Texture2D
+//
+//! @param src - source InputArray
+//! @param pD3D11Texture2D - destination D3D11 texture
+CV_EXPORTS void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D);
+
+//! @brief Converts ID3D11Texture2D to OutputArray. If input texture format is DXGI_FORMAT_NV12 then
+//!        data will be upsampled and color-converted to BGR format.
+//
+//! @note Note: Destination matrix will be re-allocated if it has not enough memory to match texture size.
+//!             function does memory copy from pD3D11Texture2D to dst
+//
+//! @param pD3D11Texture2D - source D3D11 texture
+//! @param dst             - destination OutputArray
+CV_EXPORTS void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst);
+
+//! @brief Converts InputArray to ID3D10Texture2D
+//
+//! @note Note: function does memory copy from src to
+//!             pD3D10Texture2D
+//
+//! @param src             - source InputArray
+//! @param pD3D10Texture2D - destination D3D10 texture
+CV_EXPORTS void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D);
+
+//! @brief Converts ID3D10Texture2D to OutputArray
+//
+//! @note Note: function does memory copy from pD3D10Texture2D
+//!             to dst
+//
+//! @param pD3D10Texture2D - source D3D10 texture
+//! @param dst             - destination OutputArray
+CV_EXPORTS void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst);
+
+//! @brief Converts InputArray to IDirect3DSurface9
+//
+//! @note Note: function does memory copy from src to
+//!             pDirect3DSurface9
+//
+//! @param src                 - source InputArray
+//! @param pDirect3DSurface9   - destination D3D10 texture
+//! @param surfaceSharedHandle - shared handle
+CV_EXPORTS void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurface9, void* surfaceSharedHandle = NULL);
+
+//! @brief Converts IDirect3DSurface9 to OutputArray
+//
+//! @note Note: function does memory copy from pDirect3DSurface9
+//!             to dst
+//
+//! @param pDirect3DSurface9   - source D3D10 texture
+//! @param dst                 - destination OutputArray
+//! @param surfaceSharedHandle - shared handle
+CV_EXPORTS void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArray dst, void* surfaceSharedHandle = NULL);
+
+//! @brief Get OpenCV type from DirectX type
+//! @param iDXGI_FORMAT - enum DXGI_FORMAT for D3D10/D3D11
+//! @return OpenCV type or -1 if there is no equivalent
+CV_EXPORTS int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT); // enum DXGI_FORMAT for D3D10/D3D11
+
+//! @brief Get OpenCV type from DirectX type
+//! @param iD3DFORMAT - enum D3DTYPE for D3D9
+//! @return OpenCV type or -1 if there is no equivalent
+CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D3D9
+
+//! @}
+
+} } // namespace cv::directx
+
+#endif // OPENCV_CORE_DIRECTX_HPP
diff --git a/thirdparty/linux/include/opencv2/core/eigen.hpp b/thirdparty/linux/include/opencv2/core/eigen.hpp
new file mode 100644
index 0000000..c2f1ee6
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/eigen.hpp
@@ -0,0 +1,280 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#ifndef OPENCV_CORE_EIGEN_HPP
+#define OPENCV_CORE_EIGEN_HPP
+
+#include "opencv2/core.hpp"
+
+#if defined _MSC_VER && _MSC_VER >= 1200
+#pragma warning( disable: 4714 ) //__forceinline is not inlined
+#pragma warning( disable: 4127 ) //conditional expression is constant
+#pragma warning( disable: 4244 ) //conversion from '__int64' to 'int', possible loss of data
+#endif
+
+namespace cv
+{
+
+//! @addtogroup core_eigen
+//! @{
+
+template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
+void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, Mat& dst )
+{
+    if( !(src.Flags & Eigen::RowMajorBit) )
+    {
+        Mat _src(src.cols(), src.rows(), DataType<_Tp>::type,
+              (void*)src.data(), src.stride()*sizeof(_Tp));
+        transpose(_src, dst);
+    }
+    else
+    {
+        Mat _src(src.rows(), src.cols(), DataType<_Tp>::type,
+                 (void*)src.data(), src.stride()*sizeof(_Tp));
+        _src.copyTo(dst);
+    }
+}
+
+// Matx case
+template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
+void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src,
+               Matx<_Tp, _rows, _cols>& dst )
+{
+    if( !(src.Flags & Eigen::RowMajorBit) )
+    {
+        dst = Matx<_Tp, _cols, _rows>(static_cast<const _Tp*>(src.data())).t();
+    }
+    else
+    {
+        dst = Matx<_Tp, _rows, _cols>(static_cast<const _Tp*>(src.data()));
+    }
+}
+
+template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
+void cv2eigen( const Mat& src,
+               Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
+{
+    CV_DbgAssert(src.rows == _rows && src.cols == _cols);
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
+                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        if( src.type() == _dst.type() )
+            transpose(src, _dst);
+        else if( src.cols == src.rows )
+        {
+            src.convertTo(_dst, _dst.type());
+            transpose(_dst, _dst);
+        }
+        else
+            Mat(src.t()).convertTo(_dst, _dst.type());
+    }
+    else
+    {
+        const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
+                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        src.convertTo(_dst, _dst.type());
+    }
+}
+
+// Matx case
+template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
+void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
+               Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
+{
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(_cols, _rows, DataType<_Tp>::type,
+                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        transpose(src, _dst);
+    }
+    else
+    {
+        const Mat _dst(_rows, _cols, DataType<_Tp>::type,
+                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        Mat(src).copyTo(_dst);
+    }
+}
+
+template<typename _Tp>  static inline
+void cv2eigen( const Mat& src,
+               Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
+{
+    dst.resize(src.rows, src.cols);
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
+             dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        if( src.type() == _dst.type() )
+            transpose(src, _dst);
+        else if( src.cols == src.rows )
+        {
+            src.convertTo(_dst, _dst.type());
+            transpose(_dst, _dst);
+        }
+        else
+            Mat(src.t()).convertTo(_dst, _dst.type());
+    }
+    else
+    {
+        const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
+                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        src.convertTo(_dst, _dst.type());
+    }
+}
+
+// Matx case
+template<typename _Tp, int _rows, int _cols> static inline
+void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
+               Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
+{
+    dst.resize(_rows, _cols);
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(_cols, _rows, DataType<_Tp>::type,
+             dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        transpose(src, _dst);
+    }
+    else
+    {
+        const Mat _dst(_rows, _cols, DataType<_Tp>::type,
+                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        Mat(src).copyTo(_dst);
+    }
+}
+
+template<typename _Tp> static inline
+void cv2eigen( const Mat& src,
+               Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
+{
+    CV_Assert(src.cols == 1);
+    dst.resize(src.rows);
+
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
+                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        if( src.type() == _dst.type() )
+            transpose(src, _dst);
+        else
+            Mat(src.t()).convertTo(_dst, _dst.type());
+    }
+    else
+    {
+        const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
+                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        src.convertTo(_dst, _dst.type());
+    }
+}
+
+// Matx case
+template<typename _Tp, int _rows> static inline
+void cv2eigen( const Matx<_Tp, _rows, 1>& src,
+               Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
+{
+    dst.resize(_rows);
+
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(1, _rows, DataType<_Tp>::type,
+                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        transpose(src, _dst);
+    }
+    else
+    {
+        const Mat _dst(_rows, 1, DataType<_Tp>::type,
+                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        src.copyTo(_dst);
+    }
+}
+
+
+template<typename _Tp> static inline
+void cv2eigen( const Mat& src,
+               Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
+{
+    CV_Assert(src.rows == 1);
+    dst.resize(src.cols);
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
+                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        if( src.type() == _dst.type() )
+            transpose(src, _dst);
+        else
+            Mat(src.t()).convertTo(_dst, _dst.type());
+    }
+    else
+    {
+        const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
+                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        src.convertTo(_dst, _dst.type());
+    }
+}
+
+//Matx
+template<typename _Tp, int _cols> static inline
+void cv2eigen( const Matx<_Tp, 1, _cols>& src,
+               Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
+{
+    dst.resize(_cols);
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(_cols, 1, DataType<_Tp>::type,
+                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        transpose(src, _dst);
+    }
+    else
+    {
+        const Mat _dst(1, _cols, DataType<_Tp>::type,
+                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        Mat(src).copyTo(_dst);
+    }
+}
+
+//! @}
+
+} // cv
+
+#endif
diff --git a/thirdparty/linux/include/opencv2/core/fast_math.hpp b/thirdparty/linux/include/opencv2/core/fast_math.hpp
new file mode 100644
index 0000000..c76936a
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/fast_math.hpp
@@ -0,0 +1,303 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_FAST_MATH_HPP
+#define OPENCV_CORE_FAST_MATH_HPP
+
+#include "opencv2/core/cvdef.h"
+
+//! @addtogroup core_utils
+//! @{
+
+/****************************************************************************************\
+*                                      fast math                                         *
+\****************************************************************************************/
+
+#if defined __BORLANDC__
+#  include <fastmath.h>
+#elif defined __cplusplus
+#  include <cmath>
+#else
+#  include <math.h>
+#endif
+
+#ifdef HAVE_TEGRA_OPTIMIZATION
+#  include "tegra_round.hpp"
+#endif
+
+#if CV_VFP
+    // 1. general scheme
+    #define ARM_ROUND(_value, _asm_string) \
+        int res; \
+        float temp; \
+        (void)temp; \
+        asm(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
+        return res
+    // 2. version for double
+    #ifdef __clang__
+        #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
+    #else
+        #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
+    #endif
+    // 3. version for float
+    #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
+#endif // CV_VFP
+
+/** @brief Rounds floating-point number to the nearest integer
+
+ @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
+ result is not defined.
+ */
+CV_INLINE int
+cvRound( double value )
+{
+#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
+    && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
+    __m128d t = _mm_set_sd( value );
+    return _mm_cvtsd_si32(t);
+#elif defined _MSC_VER && defined _M_IX86
+    int t;
+    __asm
+    {
+        fld value;
+        fistp t;
+    }
+    return t;
+#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
+        defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
+    TEGRA_ROUND_DBL(value);
+#elif defined CV_ICC || defined __GNUC__
+# if CV_VFP
+    ARM_ROUND_DBL(value);
+# else
+    return (int)lrint(value);
+# endif
+#else
+    /* it's ok if round does not comply with IEEE754 standard;
+       the tests should allow +/-1 difference when the tested functions use round */
+    return (int)(value + (value >= 0 ? 0.5 : -0.5));
+#endif
+}
+
+
+/** @brief Rounds floating-point number to the nearest integer not larger than the original.
+
+ The function computes an integer i such that:
+ \f[i \le \texttt{value} < i+1\f]
+ @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
+ result is not defined.
+ */
+CV_INLINE int cvFloor( double value )
+{
+#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
+    __m128d t = _mm_set_sd( value );
+    int i = _mm_cvtsd_si32(t);
+    return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i)));
+#elif defined __GNUC__
+    int i = (int)value;
+    return i - (i > value);
+#else
+    int i = cvRound(value);
+    float diff = (float)(value - i);
+    return i - (diff < 0);
+#endif
+}
+
+/** @brief Rounds floating-point number to the nearest integer not smaller than the original.
+
+ The function computes an integer i such that:
+ \f[i \le \texttt{value} < i+1\f]
+ @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
+ result is not defined.
+ */
+CV_INLINE int cvCeil( double value )
+{
+#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
+    __m128d t = _mm_set_sd( value );
+    int i = _mm_cvtsd_si32(t);
+    return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t));
+#elif defined __GNUC__
+    int i = (int)value;
+    return i + (i < value);
+#else
+    int i = cvRound(value);
+    float diff = (float)(i - value);
+    return i + (diff < 0);
+#endif
+}
+
+/** @brief Determines if the argument is Not A Number.
+
+ @param value The input floating-point value
+
+ The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0
+ otherwise. */
+CV_INLINE int cvIsNaN( double value )
+{
+    Cv64suf ieee754;
+    ieee754.f = value;
+    return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
+           ((unsigned)ieee754.u != 0) > 0x7ff00000;
+}
+
+/** @brief Determines if the argument is Infinity.
+
+ @param value The input floating-point value
+
+ The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard)
+ and 0 otherwise. */
+CV_INLINE int cvIsInf( double value )
+{
+    Cv64suf ieee754;
+    ieee754.f = value;
+    return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
+            (unsigned)ieee754.u == 0;
+}
+
+#ifdef __cplusplus
+
+/** @overload */
+CV_INLINE int cvRound(float value)
+{
+#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \
+      defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
+    __m128 t = _mm_set_ss( value );
+    return _mm_cvtss_si32(t);
+#elif defined _MSC_VER && defined _M_IX86
+    int t;
+    __asm
+    {
+        fld value;
+        fistp t;
+    }
+    return t;
+#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
+        defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
+    TEGRA_ROUND_FLT(value);
+#elif defined CV_ICC || defined __GNUC__
+# if CV_VFP
+    ARM_ROUND_FLT(value);
+# else
+    return (int)lrintf(value);
+# endif
+#else
+    /* it's ok if round does not comply with IEEE754 standard;
+     the tests should allow +/-1 difference when the tested functions use round */
+    return (int)(value + (value >= 0 ? 0.5f : -0.5f));
+#endif
+}
+
+/** @overload */
+CV_INLINE int cvRound( int value )
+{
+    return value;
+}
+
+/** @overload */
+CV_INLINE int cvFloor( float value )
+{
+#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
+    __m128 t = _mm_set_ss( value );
+    int i = _mm_cvtss_si32(t);
+    return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i)));
+#elif defined __GNUC__
+    int i = (int)value;
+    return i - (i > value);
+#else
+    int i = cvRound(value);
+    float diff = (float)(value - i);
+    return i - (diff < 0);
+#endif
+}
+
+/** @overload */
+CV_INLINE int cvFloor( int value )
+{
+    return value;
+}
+
+/** @overload */
+CV_INLINE int cvCeil( float value )
+{
+#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
+    __m128 t = _mm_set_ss( value );
+    int i = _mm_cvtss_si32(t);
+    return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t));
+#elif defined __GNUC__
+    int i = (int)value;
+    return i + (i < value);
+#else
+    int i = cvRound(value);
+    float diff = (float)(i - value);
+    return i + (diff < 0);
+#endif
+}
+
+/** @overload */
+CV_INLINE int cvCeil( int value )
+{
+    return value;
+}
+
+/** @overload */
+CV_INLINE int cvIsNaN( float value )
+{
+    Cv32suf ieee754;
+    ieee754.f = value;
+    return (ieee754.u & 0x7fffffff) > 0x7f800000;
+}
+
+/** @overload */
+CV_INLINE int cvIsInf( float value )
+{
+    Cv32suf ieee754;
+    ieee754.f = value;
+    return (ieee754.u & 0x7fffffff) == 0x7f800000;
+}
+
+#endif // __cplusplus
+
+//! @} core_utils
+
+#endif
diff --git a/thirdparty/linux/include/opencv2/core/hal/hal.hpp b/thirdparty/linux/include/opencv2/core/hal/hal.hpp
new file mode 100644
index 0000000..68900ec
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/hal/hal.hpp
@@ -0,0 +1,250 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_HPP
+#define OPENCV_HAL_HPP
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/cvstd.hpp"
+#include "opencv2/core/hal/interface.h"
+
+namespace cv { namespace hal {
+
+//! @addtogroup core_hal_functions
+//! @{
+
+CV_EXPORTS int normHamming(const uchar* a, int n);
+CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n);
+
+CV_EXPORTS int normHamming(const uchar* a, int n, int cellSize);
+CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n, int cellSize);
+
+CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags);
+CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags);
+CV_EXPORTS int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors);
+CV_EXPORTS int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors);
+
+CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
+                        float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+CV_EXPORTS void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
+                        double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+CV_EXPORTS void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
+                        float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+CV_EXPORTS void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
+                        double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+
+CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n);
+CV_EXPORTS float normL1_(const float* a, const float* b, int n);
+CV_EXPORTS float normL2Sqr_(const float* a, const float* b, int n);
+
+CV_EXPORTS void exp32f(const float* src, float* dst, int n);
+CV_EXPORTS void exp64f(const double* src, double* dst, int n);
+CV_EXPORTS void log32f(const float* src, float* dst, int n);
+CV_EXPORTS void log64f(const double* src, double* dst, int n);
+
+CV_EXPORTS void fastAtan32f(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
+CV_EXPORTS void fastAtan64f(const double* y, const double* x, double* dst, int n, bool angleInDegrees);
+CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n);
+CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n);
+CV_EXPORTS void sqrt32f(const float* src, float* dst, int len);
+CV_EXPORTS void sqrt64f(const double* src, double* dst, int len);
+CV_EXPORTS void invSqrt32f(const float* src, float* dst, int len);
+CV_EXPORTS void invSqrt64f(const double* src, double* dst, int len);
+
+CV_EXPORTS void split8u(const uchar* src, uchar** dst, int len, int cn );
+CV_EXPORTS void split16u(const ushort* src, ushort** dst, int len, int cn );
+CV_EXPORTS void split32s(const int* src, int** dst, int len, int cn );
+CV_EXPORTS void split64s(const int64* src, int64** dst, int len, int cn );
+
+CV_EXPORTS void merge8u(const uchar** src, uchar* dst, int len, int cn );
+CV_EXPORTS void merge16u(const ushort** src, ushort* dst, int len, int cn );
+CV_EXPORTS void merge32s(const int** src, int* dst, int len, int cn );
+CV_EXPORTS void merge64s(const int64** src, int64* dst, int len, int cn );
+
+CV_EXPORTS void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void max8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void min8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+
+CV_EXPORTS void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
+
+CV_EXPORTS void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
+
+CV_EXPORTS void recip8u( const uchar *, size_t, const uchar * src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip8s( const schar *, size_t, const schar * src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip16u( const ushort *, size_t, const ushort * src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip16s( const short *, size_t, const short * src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip32s( const int *, size_t, const int * src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip32f( const float *, size_t, const float * src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip64f( const double *, size_t, const double * src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
+
+CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars );
+CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
+
+struct CV_EXPORTS DFT1D
+{
+    static Ptr<DFT1D> create(int len, int count, int depth, int flags, bool * useBuffer = 0);
+    virtual void apply(const uchar *src, uchar *dst) = 0;
+    virtual ~DFT1D() {}
+};
+
+struct CV_EXPORTS DFT2D
+{
+    static Ptr<DFT2D> create(int width, int height, int depth,
+                             int src_channels, int dst_channels,
+                             int flags, int nonzero_rows = 0);
+    virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
+    virtual ~DFT2D() {}
+};
+
+struct CV_EXPORTS DCT2D
+{
+    static Ptr<DCT2D> create(int width, int height, int depth, int flags);
+    virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
+    virtual ~DCT2D() {}
+};
+
+//! @} core_hal
+
+//=============================================================================
+// for binary compatibility with 3.0
+
+//! @cond IGNORED
+
+CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+
+CV_EXPORTS void exp(const float* src, float* dst, int n);
+CV_EXPORTS void exp(const double* src, double* dst, int n);
+CV_EXPORTS void log(const float* src, float* dst, int n);
+CV_EXPORTS void log(const double* src, double* dst, int n);
+
+CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
+CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n);
+CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n);
+CV_EXPORTS void sqrt(const float* src, float* dst, int len);
+CV_EXPORTS void sqrt(const double* src, double* dst, int len);
+CV_EXPORTS void invSqrt(const float* src, float* dst, int len);
+CV_EXPORTS void invSqrt(const double* src, double* dst, int len);
+
+//! @endcond
+
+}} //cv::hal
+
+#endif //OPENCV_HAL_HPP
diff --git a/thirdparty/linux/include/opencv2/core/hal/interface.h b/thirdparty/linux/include/opencv2/core/hal/interface.h
new file mode 100644
index 0000000..4a97e65
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/hal/interface.h
@@ -0,0 +1,178 @@
+#ifndef OPENCV_CORE_HAL_INTERFACE_H
+#define OPENCV_CORE_HAL_INTERFACE_H
+
+//! @addtogroup core_hal_interface
+//! @{
+
+//! @name Return codes
+//! @{
+#define CV_HAL_ERROR_OK 0
+#define CV_HAL_ERROR_NOT_IMPLEMENTED 1
+#define CV_HAL_ERROR_UNKNOWN -1
+//! @}
+
+#ifdef __cplusplus
+#include <cstddef>
+#else
+#include <stddef.h>
+#include <stdbool.h>
+#endif
+
+//! @name Data types
+//! primitive types
+//! - schar  - signed 1 byte integer
+//! - uchar  - unsigned 1 byte integer
+//! - short  - signed 2 byte integer
+//! - ushort - unsigned 2 byte integer
+//! - int    - signed 4 byte integer
+//! - uint   - unsigned 4 byte integer
+//! - int64  - signed 8 byte integer
+//! - uint64 - unsigned 8 byte integer
+//! @{
+#if !defined _MSC_VER && !defined __BORLANDC__
+#  if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__
+#    include <cstdint>
+     typedef std::uint32_t uint;
+#  else
+#    include <stdint.h>
+     typedef uint32_t uint;
+#  endif
+#else
+   typedef unsigned uint;
+#endif
+
+typedef signed char schar;
+
+#ifndef __IPL_H__
+   typedef unsigned char uchar;
+   typedef unsigned short ushort;
+#endif
+
+#if defined _MSC_VER || defined __BORLANDC__
+   typedef __int64 int64;
+   typedef unsigned __int64 uint64;
+#  define CV_BIG_INT(n)   n##I64
+#  define CV_BIG_UINT(n)  n##UI64
+#else
+   typedef int64_t int64;
+   typedef uint64_t uint64;
+#  define CV_BIG_INT(n)   n##LL
+#  define CV_BIG_UINT(n)  n##ULL
+#endif
+
+#define CV_CN_MAX     512
+#define CV_CN_SHIFT   3
+#define CV_DEPTH_MAX  (1 << CV_CN_SHIFT)
+
+#define CV_8U   0
+#define CV_8S   1
+#define CV_16U  2
+#define CV_16S  3
+#define CV_32S  4
+#define CV_32F  5
+#define CV_64F  6
+#define CV_USRTYPE1 7
+
+#define CV_MAT_DEPTH_MASK       (CV_DEPTH_MAX - 1)
+#define CV_MAT_DEPTH(flags)     ((flags) & CV_MAT_DEPTH_MASK)
+
+#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
+#define CV_MAKE_TYPE CV_MAKETYPE
+
+#define CV_8UC1 CV_MAKETYPE(CV_8U,1)
+#define CV_8UC2 CV_MAKETYPE(CV_8U,2)
+#define CV_8UC3 CV_MAKETYPE(CV_8U,3)
+#define CV_8UC4 CV_MAKETYPE(CV_8U,4)
+#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n))
+
+#define CV_8SC1 CV_MAKETYPE(CV_8S,1)
+#define CV_8SC2 CV_MAKETYPE(CV_8S,2)
+#define CV_8SC3 CV_MAKETYPE(CV_8S,3)
+#define CV_8SC4 CV_MAKETYPE(CV_8S,4)
+#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n))
+
+#define CV_16UC1 CV_MAKETYPE(CV_16U,1)
+#define CV_16UC2 CV_MAKETYPE(CV_16U,2)
+#define CV_16UC3 CV_MAKETYPE(CV_16U,3)
+#define CV_16UC4 CV_MAKETYPE(CV_16U,4)
+#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n))
+
+#define CV_16SC1 CV_MAKETYPE(CV_16S,1)
+#define CV_16SC2 CV_MAKETYPE(CV_16S,2)
+#define CV_16SC3 CV_MAKETYPE(CV_16S,3)
+#define CV_16SC4 CV_MAKETYPE(CV_16S,4)
+#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n))
+
+#define CV_32SC1 CV_MAKETYPE(CV_32S,1)
+#define CV_32SC2 CV_MAKETYPE(CV_32S,2)
+#define CV_32SC3 CV_MAKETYPE(CV_32S,3)
+#define CV_32SC4 CV_MAKETYPE(CV_32S,4)
+#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n))
+
+#define CV_32FC1 CV_MAKETYPE(CV_32F,1)
+#define CV_32FC2 CV_MAKETYPE(CV_32F,2)
+#define CV_32FC3 CV_MAKETYPE(CV_32F,3)
+#define CV_32FC4 CV_MAKETYPE(CV_32F,4)
+#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n))
+
+#define CV_64FC1 CV_MAKETYPE(CV_64F,1)
+#define CV_64FC2 CV_MAKETYPE(CV_64F,2)
+#define CV_64FC3 CV_MAKETYPE(CV_64F,3)
+#define CV_64FC4 CV_MAKETYPE(CV_64F,4)
+#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))
+//! @}
+
+//! @name Comparison operation
+//! @sa cv::CmpTypes
+//! @{
+#define CV_HAL_CMP_EQ 0
+#define CV_HAL_CMP_GT 1
+#define CV_HAL_CMP_GE 2
+#define CV_HAL_CMP_LT 3
+#define CV_HAL_CMP_LE 4
+#define CV_HAL_CMP_NE 5
+//! @}
+
+//! @name Border processing modes
+//! @sa cv::BorderTypes
+//! @{
+#define CV_HAL_BORDER_CONSTANT 0
+#define CV_HAL_BORDER_REPLICATE 1
+#define CV_HAL_BORDER_REFLECT 2
+#define CV_HAL_BORDER_WRAP 3
+#define CV_HAL_BORDER_REFLECT_101 4
+#define CV_HAL_BORDER_TRANSPARENT 5
+#define CV_HAL_BORDER_ISOLATED 16
+//! @}
+
+//! @name DFT flags
+//! @{
+#define CV_HAL_DFT_INVERSE        1
+#define CV_HAL_DFT_SCALE          2
+#define CV_HAL_DFT_ROWS           4
+#define CV_HAL_DFT_COMPLEX_OUTPUT 16
+#define CV_HAL_DFT_REAL_OUTPUT    32
+#define CV_HAL_DFT_TWO_STAGE      64
+#define CV_HAL_DFT_STAGE_COLS    128
+#define CV_HAL_DFT_IS_CONTINUOUS 512
+#define CV_HAL_DFT_IS_INPLACE 1024
+//! @}
+
+//! @name SVD flags
+//! @{
+#define CV_HAL_SVD_NO_UV    1
+#define CV_HAL_SVD_SHORT_UV 2
+#define CV_HAL_SVD_MODIFY_A 4
+#define CV_HAL_SVD_FULL_UV  8
+//! @}
+
+//! @name Gemm flags
+//! @{
+#define CV_HAL_GEMM_1_T 1
+#define CV_HAL_GEMM_2_T 2
+#define CV_HAL_GEMM_3_T 4
+//! @}
+
+//! @}
+
+#endif
diff --git a/thirdparty/linux/include/opencv2/core/hal/intrin.hpp b/thirdparty/linux/include/opencv2/core/hal/intrin.hpp
new file mode 100644
index 0000000..34075e3
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/hal/intrin.hpp
@@ -0,0 +1,414 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_INTRIN_HPP
+#define OPENCV_HAL_INTRIN_HPP
+
+#include <cmath>
+#include <float.h>
+#include <stdlib.h>
+#include "opencv2/core/cvdef.h"
+
+#define OPENCV_HAL_ADD(a, b) ((a) + (b))
+#define OPENCV_HAL_AND(a, b) ((a) & (b))
+#define OPENCV_HAL_NOP(a) (a)
+#define OPENCV_HAL_1ST(a, b) (a)
+
+// unlike HAL API, which is in cv::hal,
+// we put intrinsics into cv namespace to make its
+// access from within opencv code more accessible
+namespace cv {
+
+//! @addtogroup core_hal_intrin
+//! @{
+
+//! @cond IGNORED
+template<typename _Tp> struct V_TypeTraits
+{
+    typedef _Tp int_type;
+    typedef _Tp uint_type;
+    typedef _Tp abs_type;
+    typedef _Tp sum_type;
+
+    enum { delta = 0, shift = 0 };
+
+    static int_type reinterpret_int(_Tp x) { return x; }
+    static uint_type reinterpet_uint(_Tp x) { return x; }
+    static _Tp reinterpret_from_int(int_type x) { return (_Tp)x; }
+};
+
+template<> struct V_TypeTraits<uchar>
+{
+    typedef uchar value_type;
+    typedef schar int_type;
+    typedef uchar uint_type;
+    typedef uchar abs_type;
+    typedef int sum_type;
+
+    typedef ushort w_type;
+    typedef unsigned q_type;
+
+    enum { delta = 128, shift = 8 };
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+template<> struct V_TypeTraits<schar>
+{
+    typedef schar value_type;
+    typedef schar int_type;
+    typedef uchar uint_type;
+    typedef uchar abs_type;
+    typedef int sum_type;
+
+    typedef short w_type;
+    typedef int q_type;
+
+    enum { delta = 128, shift = 8 };
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+template<> struct V_TypeTraits<ushort>
+{
+    typedef ushort value_type;
+    typedef short int_type;
+    typedef ushort uint_type;
+    typedef ushort abs_type;
+    typedef int sum_type;
+
+    typedef unsigned w_type;
+    typedef uchar nu_type;
+
+    enum { delta = 32768, shift = 16 };
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+template<> struct V_TypeTraits<short>
+{
+    typedef short value_type;
+    typedef short int_type;
+    typedef ushort uint_type;
+    typedef ushort abs_type;
+    typedef int sum_type;
+
+    typedef int w_type;
+    typedef uchar nu_type;
+    typedef schar n_type;
+
+    enum { delta = 128, shift = 8 };
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+template<> struct V_TypeTraits<unsigned>
+{
+    typedef unsigned value_type;
+    typedef int int_type;
+    typedef unsigned uint_type;
+    typedef unsigned abs_type;
+    typedef unsigned sum_type;
+
+    typedef uint64 w_type;
+    typedef ushort nu_type;
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+template<> struct V_TypeTraits<int>
+{
+    typedef int value_type;
+    typedef int int_type;
+    typedef unsigned uint_type;
+    typedef unsigned abs_type;
+    typedef int sum_type;
+
+    typedef int64 w_type;
+    typedef short n_type;
+    typedef ushort nu_type;
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+template<> struct V_TypeTraits<uint64>
+{
+    typedef uint64 value_type;
+    typedef int64 int_type;
+    typedef uint64 uint_type;
+    typedef uint64 abs_type;
+    typedef uint64 sum_type;
+
+    typedef unsigned nu_type;
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+template<> struct V_TypeTraits<int64>
+{
+    typedef int64 value_type;
+    typedef int64 int_type;
+    typedef uint64 uint_type;
+    typedef uint64 abs_type;
+    typedef int64 sum_type;
+
+    typedef int nu_type;
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+
+template<> struct V_TypeTraits<float>
+{
+    typedef float value_type;
+    typedef int int_type;
+    typedef unsigned uint_type;
+    typedef float abs_type;
+    typedef float sum_type;
+
+    typedef double w_type;
+
+    static int_type reinterpret_int(value_type x)
+    {
+        Cv32suf u;
+        u.f = x;
+        return u.i;
+    }
+    static uint_type reinterpet_uint(value_type x)
+    {
+        Cv32suf u;
+        u.f = x;
+        return u.u;
+    }
+    static value_type reinterpret_from_int(int_type x)
+    {
+        Cv32suf u;
+        u.i = x;
+        return u.f;
+    }
+};
+
+template<> struct V_TypeTraits<double>
+{
+    typedef double value_type;
+    typedef int64 int_type;
+    typedef uint64 uint_type;
+    typedef double abs_type;
+    typedef double sum_type;
+    static int_type reinterpret_int(value_type x)
+    {
+        Cv64suf u;
+        u.f = x;
+        return u.i;
+    }
+    static uint_type reinterpet_uint(value_type x)
+    {
+        Cv64suf u;
+        u.f = x;
+        return u.u;
+    }
+    static value_type reinterpret_from_int(int_type x)
+    {
+        Cv64suf u;
+        u.i = x;
+        return u.f;
+    }
+};
+
+template <typename T> struct V_SIMD128Traits
+{
+    enum { nlanes = 16 / sizeof(T) };
+};
+
+//! @endcond
+
+//! @}
+
+}
+
+#ifdef CV_DOXYGEN
+#   undef CV_SSE2
+#   undef CV_NEON
+#endif
+
+#if CV_SSE2
+
+#include "opencv2/core/hal/intrin_sse.hpp"
+
+#elif CV_NEON
+
+#include "opencv2/core/hal/intrin_neon.hpp"
+
+#else
+
+#include "opencv2/core/hal/intrin_cpp.hpp"
+
+#endif
+
+//! @addtogroup core_hal_intrin
+//! @{
+
+#ifndef CV_SIMD128
+//! Set to 1 if current compiler supports vector extensions (NEON or SSE is enabled)
+#define CV_SIMD128 0
+#endif
+
+#ifndef CV_SIMD128_64F
+//! Set to 1 if current intrinsics implementation supports 64-bit float vectors
+#define CV_SIMD128_64F 0
+#endif
+
+//! @}
+
+//==================================================================================================
+
+//! @cond IGNORED
+
+namespace cv {
+
+template <typename R> struct V_RegTrait128;
+
+template <> struct V_RegTrait128<uchar> {
+    typedef v_uint8x16 reg;
+    typedef v_uint16x8 w_reg;
+    typedef v_uint32x4 q_reg;
+    typedef v_uint8x16 u_reg;
+    static v_uint8x16 zero() { return v_setzero_u8(); }
+    static v_uint8x16 all(uchar val) { return v_setall_u8(val); }
+};
+
+template <> struct V_RegTrait128<schar> {
+    typedef v_int8x16 reg;
+    typedef v_int16x8 w_reg;
+    typedef v_int32x4 q_reg;
+    typedef v_uint8x16 u_reg;
+    static v_int8x16 zero() { return v_setzero_s8(); }
+    static v_int8x16 all(schar val) { return v_setall_s8(val); }
+};
+
+template <> struct V_RegTrait128<ushort> {
+    typedef v_uint16x8 reg;
+    typedef v_uint32x4 w_reg;
+    typedef v_int16x8 int_reg;
+    typedef v_uint16x8 u_reg;
+    static v_uint16x8 zero() { return v_setzero_u16(); }
+    static v_uint16x8 all(ushort val) { return v_setall_u16(val); }
+};
+
+template <> struct V_RegTrait128<short> {
+    typedef v_int16x8 reg;
+    typedef v_int32x4 w_reg;
+    typedef v_uint16x8 u_reg;
+    static v_int16x8 zero() { return v_setzero_s16(); }
+    static v_int16x8 all(short val) { return v_setall_s16(val); }
+};
+
+template <> struct V_RegTrait128<unsigned> {
+    typedef v_uint32x4 reg;
+    typedef v_uint64x2 w_reg;
+    typedef v_int32x4 int_reg;
+    typedef v_uint32x4 u_reg;
+    static v_uint32x4 zero() { return v_setzero_u32(); }
+    static v_uint32x4 all(unsigned val) { return v_setall_u32(val); }
+};
+
+template <> struct V_RegTrait128<int> {
+    typedef v_int32x4 reg;
+    typedef v_int64x2 w_reg;
+    typedef v_uint32x4 u_reg;
+    static v_int32x4 zero() { return v_setzero_s32(); }
+    static v_int32x4 all(int val) { return v_setall_s32(val); }
+};
+
+template <> struct V_RegTrait128<uint64> {
+    typedef v_uint64x2 reg;
+    static v_uint64x2 zero() { return v_setzero_u64(); }
+    static v_uint64x2 all(uint64 val) { return v_setall_u64(val); }
+};
+
+template <> struct V_RegTrait128<int64> {
+    typedef v_int64x2 reg;
+    static v_int64x2 zero() { return v_setzero_s64(); }
+    static v_int64x2 all(int64 val) { return v_setall_s64(val); }
+};
+
+template <> struct V_RegTrait128<float> {
+    typedef v_float32x4 reg;
+    typedef v_int32x4 int_reg;
+    typedef v_float32x4 u_reg;
+    static v_float32x4 zero() { return v_setzero_f32(); }
+    static v_float32x4 all(float val) { return v_setall_f32(val); }
+};
+
+#if CV_SIMD128_64F
+template <> struct V_RegTrait128<double> {
+    typedef v_float64x2 reg;
+    typedef v_int32x4 int_reg;
+    typedef v_float64x2 u_reg;
+    static v_float64x2 zero() { return v_setzero_f64(); }
+    static v_float64x2 all(double val) { return v_setall_f64(val); }
+};
+#endif
+
+} // cv::
+
+//! @endcond
+
+#endif
diff --git a/thirdparty/linux/include/opencv2/core/hal/intrin_cpp.hpp b/thirdparty/linux/include/opencv2/core/hal/intrin_cpp.hpp
new file mode 100644
index 0000000..93ca397
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/hal/intrin_cpp.hpp
@@ -0,0 +1,1790 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_INTRIN_CPP_HPP
+#define OPENCV_HAL_INTRIN_CPP_HPP
+
+#include <limits>
+#include <cstring>
+#include <algorithm>
+#include "opencv2/core/saturate.hpp"
+
+namespace cv
+{
+
+/** @addtogroup core_hal_intrin
+
+"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on
+different platforms. Currently there are two supported SIMD extensions: __SSE/SSE2__ on x86
+architectures and __NEON__ on ARM architectures, both allow working with 128 bit registers
+containing packed values of different types. In case when there is no SIMD extension available
+during compilation, fallback C++ implementation of intrinsics will be chosen and code will work as
+expected although it could be slower.
+
+### Types
+
+There are several types representing 128-bit register as a vector of packed values, each type is
+implemented as a structure based on a one SIMD register.
+
+- cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values (unsigned/signed) - char
+- cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values (unsigned/signed) - short
+- cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsgined/signed) - int
+- cv::v_uint64x2 and cv::v_int64x2: two 64-bit integer values (unsigned/signed) - int64
+- cv::v_float32x4: four 32-bit floating point values (signed) - float
+- cv::v_float64x2: two 64-bit floating point valies (signed) - double
+
+@note
+cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to
+check the CV_SIMD128_64F preprocessor definition:
+@code
+#if CV_SIMD128_64F
+//...
+#endif
+@endcode
+
+### Load and store operations
+
+These operations allow to set contents of the register explicitly or by loading it from some memory
+block and to save contents of the register to memory block.
+
+- Constructors:
+@ref v_reg::v_reg(const _Tp *ptr) "from memory",
+@ref v_reg::v_reg(_Tp s0, _Tp s1) "from two values", ...
+- Other create methods:
+@ref v_setall_s8, @ref v_setall_u8, ...,
+@ref v_setzero_u8, @ref v_setzero_s8, ...
+- Memory operations:
+@ref v_load, @ref v_load_aligned, @ref v_load_halves,
+@ref v_store, @ref v_store_aligned,
+@ref v_store_high, @ref v_store_low
+
+### Value reordering
+
+These operations allow to reorder or recombine elements in one or multiple vectors.
+
+- Interleave, deinterleave (2, 3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave
+- Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand
+- Pack: @ref v_pack, @ref v_pack_u, @ref v_rshr_pack, @ref v_rshr_pack_u,
+@ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store
+- Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high
+- Extract: @ref v_extract
+
+
+### Arithmetic, bitwise and comparison operations
+
+Element-wise binary and unary operations.
+
+- Arithmetics:
+@ref operator +(const v_reg &a, const v_reg &b) "+",
+@ref operator -(const v_reg &a, const v_reg &b) "-",
+@ref operator *(const v_reg &a, const v_reg &b) "*",
+@ref operator /(const v_reg &a, const v_reg &b) "/",
+@ref v_mul_expand
+
+- Non-saturating arithmetics: @ref v_add_wrap, @ref v_sub_wrap
+
+- Bitwise shifts:
+@ref operator <<(const v_reg &a, int s) "<<",
+@ref operator >>(const v_reg &a, int s) ">>",
+@ref v_shl, @ref v_shr
+
+- Bitwise logic:
+@ref operator&(const v_reg &a, const v_reg &b) "&",
+@ref operator |(const v_reg &a, const v_reg &b) "|",
+@ref operator ^(const v_reg &a, const v_reg &b) "^",
+@ref operator ~(const v_reg &a) "~"
+
+- Comparison:
+@ref operator >(const v_reg &a, const v_reg &b) ">",
+@ref operator >=(const v_reg &a, const v_reg &b) ">=",
+@ref operator <(const v_reg &a, const v_reg &b) "<",
+@ref operator <=(const v_reg &a, const v_reg &b) "<=",
+@ref operator==(const v_reg &a, const v_reg &b) "==",
+@ref operator !=(const v_reg &a, const v_reg &b) "!="
+
+- min/max: @ref v_min, @ref v_max
+
+### Reduce and mask
+
+Most of these operations return only one value.
+
+- Reduce: @ref v_reduce_min, @ref v_reduce_max, @ref v_reduce_sum
+- Mask: @ref v_signmask, @ref v_check_all, @ref v_check_any, @ref v_select
+
+### Other math
+
+- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude
+- Absolute values: @ref v_abs, @ref v_absdiff
+
+### Conversions
+
+Different type conversions and casts:
+
+- Rounding: @ref v_round, @ref v_floor, @ref v_ceil, @ref v_trunc,
+- To float: @ref v_cvt_f32, @ref v_cvt_f64
+- Reinterpret: @ref v_reinterpret_as_u8, @ref v_reinterpret_as_s8, ...
+
+### Matrix operations
+
+In these operations vectors represent matrix rows/columns: @ref v_dotprod, @ref v_matmul, @ref v_transpose4x4
+
+### Usability
+
+Most operations are implemented only for some subset of the available types, following matrices
+shows the applicability of different operations to the types.
+
+Regular integers:
+
+| Operations\\Types | uint 8x16 | int 8x16 | uint 16x8 | int 16x8 | uint 32x4 | int 32x4 |
+|-------------------|:-:|:-:|:-:|:-:|:-:|:-:|
+|load, store        | x | x | x | x | x | x |
+|interleave         | x | x | x | x | x | x |
+|expand             | x | x | x | x | x | x |
+|expand_q           | x | x |   |   |   |   |
+|add, sub           | x | x | x | x | x | x |
+|add_wrap, sub_wrap | x | x | x | x |   |   |
+|mul                |   |   | x | x | x | x |
+|mul_expand         |   |   | x | x | x |   |
+|compare            | x | x | x | x | x | x |
+|shift              |   |   | x | x | x | x |
+|dotprod            |   |   |   | x |   |   |
+|logical            | x | x | x | x | x | x |
+|min, max           | x | x | x | x | x | x |
+|absdiff            | x | x | x | x | x | x |
+|reduce             |   |   |   |   | x | x |
+|mask               | x | x | x | x | x | x |
+|pack               | x | x | x | x | x | x |
+|pack_u             | x |   | x |   |   |   |
+|unpack             | x | x | x | x | x | x |
+|extract            | x | x | x | x | x | x |
+|cvt_flt32          |   |   |   |   |   | x |
+|cvt_flt64          |   |   |   |   |   | x |
+|transpose4x4       |   |   |   |   | x | x |
+
+Big integers:
+
+| Operations\\Types | uint 64x2 | int 64x2 |
+|-------------------|:-:|:-:|
+|load, store        | x | x |
+|add, sub           | x | x |
+|shift              | x | x |
+|logical            | x | x |
+|extract            | x | x |
+
+Floating point:
+
+| Operations\\Types | float 32x4 | float 64x2 |
+|-------------------|:-:|:-:|
+|load, store        | x | x |
+|interleave         | x |   |
+|add, sub           | x | x |
+|mul                | x | x |
+|div                | x | x |
+|compare            | x | x |
+|min, max           | x | x |
+|absdiff            | x | x |
+|reduce             | x |   |
+|mask               | x | x |
+|unpack             | x | x |
+|cvt_flt32          |   | x |
+|cvt_flt64          | x |   |
+|sqrt, abs          | x | x |
+|float math         | x | x |
+|transpose4x4       | x |   |
+
+
+ @{ */
+
+template<typename _Tp, int n> struct v_reg
+{
+//! @cond IGNORED
+    typedef _Tp lane_type;
+    typedef v_reg<typename V_TypeTraits<_Tp>::int_type, n> int_vec;
+    typedef v_reg<typename V_TypeTraits<_Tp>::abs_type, n> abs_vec;
+    enum { nlanes = n };
+// !@endcond
+
+    /** @brief Constructor
+
+    Initializes register with data from memory
+    @param ptr pointer to memory block with data for register */
+    explicit v_reg(const _Tp* ptr) { for( int i = 0; i < n; i++ ) s[i] = ptr[i]; }
+
+    /** @brief Constructor
+
+    Initializes register with two 64-bit values */
+    v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; }
+
+    /** @brief Constructor
+
+    Initializes register with four 32-bit values */
+    v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; }
+
+    /** @brief Constructor
+
+    Initializes register with eight 16-bit values */
+    v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
+           _Tp s4, _Tp s5, _Tp s6, _Tp s7)
+    {
+        s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
+        s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
+    }
+
+    /** @brief Constructor
+
+    Initializes register with sixteen 8-bit values */
+    v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
+           _Tp s4, _Tp s5, _Tp s6, _Tp s7,
+           _Tp s8, _Tp s9, _Tp s10, _Tp s11,
+           _Tp s12, _Tp s13, _Tp s14, _Tp s15)
+    {
+        s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
+        s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
+        s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11;
+        s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15;
+    }
+
+    /** @brief Default constructor
+
+    Does not initialize anything*/
+    v_reg() {}
+
+    /** @brief Copy constructor */
+    v_reg(const v_reg<_Tp, n> & r)
+    {
+        for( int i = 0; i < n; i++ )
+            s[i] = r.s[i];
+    }
+    /** @brief Access first value
+
+    Returns value of the first lane according to register type, for example:
+    @code{.cpp}
+    v_int32x4 r(1, 2, 3, 4);
+    int v = r.get0(); // returns 1
+    v_uint64x2 r(1, 2);
+    uint64_t v = r.get0(); // returns 1
+    @endcode
+    */
+    _Tp get0() const { return s[0]; }
+
+//! @cond IGNORED
+    _Tp get(const int i) const { return s[i]; }
+    v_reg<_Tp, n> high() const
+    {
+        v_reg<_Tp, n> c;
+        int i;
+        for( i = 0; i < n/2; i++ )
+        {
+            c.s[i] = s[i+(n/2)];
+            c.s[i+(n/2)] = 0;
+        }
+        return c;
+    }
+
+    static v_reg<_Tp, n> zero()
+    {
+        v_reg<_Tp, n> c;
+        for( int i = 0; i < n; i++ )
+            c.s[i] = (_Tp)0;
+        return c;
+    }
+
+    static v_reg<_Tp, n> all(_Tp s)
+    {
+        v_reg<_Tp, n> c;
+        for( int i = 0; i < n; i++ )
+            c.s[i] = s;
+        return c;
+    }
+
+    template<typename _Tp2, int n2> v_reg<_Tp2, n2> reinterpret_as() const
+    {
+        size_t bytes = std::min(sizeof(_Tp2)*n2, sizeof(_Tp)*n);
+        v_reg<_Tp2, n2> c;
+        std::memcpy(&c.s[0], &s[0], bytes);
+        return c;
+    }
+
+    _Tp s[n];
+//! @endcond
+};
+
+/** @brief Sixteen 8-bit unsigned integer values */
+typedef v_reg<uchar, 16> v_uint8x16;
+/** @brief Sixteen 8-bit signed integer values */
+typedef v_reg<schar, 16> v_int8x16;
+/** @brief Eight 16-bit unsigned integer values */
+typedef v_reg<ushort, 8> v_uint16x8;
+/** @brief Eight 16-bit signed integer values */
+typedef v_reg<short, 8> v_int16x8;
+/** @brief Four 32-bit unsigned integer values */
+typedef v_reg<unsigned, 4> v_uint32x4;
+/** @brief Four 32-bit signed integer values */
+typedef v_reg<int, 4> v_int32x4;
+/** @brief Four 32-bit floating point values (single precision) */
+typedef v_reg<float, 4> v_float32x4;
+/** @brief Two 64-bit floating point values (double precision) */
+typedef v_reg<double, 2> v_float64x2;
+/** @brief Two 64-bit unsigned integer values */
+typedef v_reg<uint64, 2> v_uint64x2;
+/** @brief Two 64-bit signed integer values */
+typedef v_reg<int64, 2> v_int64x2;
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_BIN_OP(bin_op) \
+template<typename _Tp, int n> inline v_reg<_Tp, n> \
+    operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    v_reg<_Tp, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
+    return c; \
+} \
+template<typename _Tp, int n> inline v_reg<_Tp, n>& \
+    operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    for( int i = 0; i < n; i++ ) \
+        a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
+    return a; \
+}
+
+/** @brief Add values
+
+For all types. */
+OPENCV_HAL_IMPL_BIN_OP(+)
+
+/** @brief Subtract values
+
+For all types. */
+OPENCV_HAL_IMPL_BIN_OP(-)
+
+/** @brief Multiply values
+
+For 16- and 32-bit integer types and floating types. */
+OPENCV_HAL_IMPL_BIN_OP(*)
+
+/** @brief Divide values
+
+For floating types only. */
+OPENCV_HAL_IMPL_BIN_OP(/)
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_BIT_OP(bit_op) \
+template<typename _Tp, int n> inline v_reg<_Tp, n> operator bit_op \
+    (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    v_reg<_Tp, n> c; \
+    typedef typename V_TypeTraits<_Tp>::int_type itype; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
+                                                        V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
+    return c; \
+} \
+template<typename _Tp, int n> inline v_reg<_Tp, n>& operator \
+    bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    typedef typename V_TypeTraits<_Tp>::int_type itype; \
+    for( int i = 0; i < n; i++ ) \
+        a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
+                                                        V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
+    return a; \
+}
+
+/** @brief Bitwise AND
+
+Only for integer types. */
+OPENCV_HAL_IMPL_BIT_OP(&)
+
+/** @brief Bitwise OR
+
+Only for integer types. */
+OPENCV_HAL_IMPL_BIT_OP(|)
+
+/** @brief Bitwise XOR
+
+Only for integer types.*/
+OPENCV_HAL_IMPL_BIT_OP(^)
+
+/** @brief Bitwise NOT
+
+Only for integer types.*/
+template<typename _Tp, int n> inline v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i]));
+    }
+    return c;
+}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \
+template<typename _Tp, int n> inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \
+{ \
+    v_reg<_Tp2, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = cfunc(a.s[i]); \
+    return c; \
+}
+
+/** @brief Square root of elements
+
+Only for floating point types.*/
+OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp)
+
+//! @cond IGNORED
+OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp)
+OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp)
+OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp)
+OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp)
+//! @endcond
+
+/** @brief Absolute value of elements
+
+Only for floating point types.*/
+OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs,
+                          typename V_TypeTraits<_Tp>::abs_type)
+
+/** @brief Round elements
+
+Only for floating point types.*/
+OPENCV_HAL_IMPL_MATH_FUNC(v_round, cvRound, int)
+
+/** @brief Floor elements
+
+Only for floating point types.*/
+OPENCV_HAL_IMPL_MATH_FUNC(v_floor, cvFloor, int)
+
+/** @brief Ceil elements
+
+Only for floating point types.*/
+OPENCV_HAL_IMPL_MATH_FUNC(v_ceil, cvCeil, int)
+
+/** @brief Truncate elements
+
+Only for floating point types.*/
+OPENCV_HAL_IMPL_MATH_FUNC(v_trunc, int, int)
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \
+template<typename _Tp, int n> inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    v_reg<_Tp, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = cfunc(a.s[i], b.s[i]); \
+    return c; \
+}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \
+template<typename _Tp, int n> inline _Tp func(const v_reg<_Tp, n>& a) \
+{ \
+    _Tp c = a.s[0]; \
+    for( int i = 1; i < n; i++ ) \
+        c = cfunc(c, a.s[i]); \
+    return c; \
+}
+
+/** @brief Choose min values for each pair
+
+Scheme:
+@code
+{A1 A2 ...}
+{B1 B2 ...}
+--------------
+{min(A1,B1) min(A2,B2) ...}
+@endcode
+For all types except 64-bit integer. */
+OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min)
+
+/** @brief Choose max values for each pair
+
+Scheme:
+@code
+{A1 A2 ...}
+{B1 B2 ...}
+--------------
+{max(A1,B1) max(A2,B2) ...}
+@endcode
+For all types except 64-bit integer. */
+OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max)
+
+/** @brief Find one min value
+
+Scheme:
+@code
+{A1 A2 A3 ...} => min(A1,A2,A3,...)
+@endcode
+For 32-bit integer and 32-bit floating point types. */
+OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min)
+
+/** @brief Find one max value
+
+Scheme:
+@code
+{A1 A2 A3 ...} => max(A1,A2,A3,...)
+@endcode
+For 32-bit integer and 32-bit floating point types. */
+OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max)
+
+//! @cond IGNORED
+template<typename _Tp, int n>
+inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                      v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval )
+{
+    for( int i = 0; i < n; i++ )
+    {
+        minval.s[i] = std::min(a.s[i], b.s[i]);
+        maxval.s[i] = std::max(a.s[i], b.s[i]);
+    }
+}
+//! @endcond
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \
+template<typename _Tp, int n> \
+inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    typedef typename V_TypeTraits<_Tp>::int_type itype; \
+    v_reg<_Tp, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \
+    return c; \
+}
+
+/** @brief Less-than comparison
+
+For all types except 64-bit integer values. */
+OPENCV_HAL_IMPL_CMP_OP(<)
+
+/** @brief Greater-than comparison
+
+For all types except 64-bit integer values. */
+OPENCV_HAL_IMPL_CMP_OP(>)
+
+/** @brief Less-than or equal comparison
+
+For all types except 64-bit integer values. */
+OPENCV_HAL_IMPL_CMP_OP(<=)
+
+/** @brief Greater-than or equal comparison
+
+For all types except 64-bit integer values. */
+OPENCV_HAL_IMPL_CMP_OP(>=)
+
+/** @brief Equal comparison
+
+For all types except 64-bit integer values. */
+OPENCV_HAL_IMPL_CMP_OP(==)
+
+/** @brief Not equal comparison
+
+For all types except 64-bit integer values. */
+OPENCV_HAL_IMPL_CMP_OP(!=)
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_ADD_SUB_OP(func, bin_op, cast_op, _Tp2) \
+template<typename _Tp, int n> \
+inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    typedef _Tp2 rtype; \
+    v_reg<rtype, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \
+    return c; \
+}
+
+/** @brief Add values without saturation
+
+For 8- and 16-bit integer values. */
+OPENCV_HAL_IMPL_ADD_SUB_OP(v_add_wrap, +, (_Tp), _Tp)
+
+/** @brief Subtract values without saturation
+
+For 8- and 16-bit integer values. */
+OPENCV_HAL_IMPL_ADD_SUB_OP(v_sub_wrap, -, (_Tp), _Tp)
+
+//! @cond IGNORED
+template<typename T> inline T _absdiff(T a, T b)
+{
+    return a > b ? a - b : b - a;
+}
+//! @endcond
+
+/** @brief Absolute difference
+
+Returns \f$ |a - b| \f$ converted to corresponding unsigned type.
+Example:
+@code{.cpp}
+v_int32x4 a, b; // {1, 2, 3, 4} and {4, 3, 2, 1}
+v_uint32x4 c = v_absdiff(a, b); // result is {3, 1, 1, 3}
+@endcode
+For 8-, 16-, 32-bit integer source types. */
+template<typename _Tp, int n>
+inline v_reg<typename V_TypeTraits<_Tp>::abs_type, n> v_absdiff(const v_reg<_Tp, n>& a, const v_reg<_Tp, n> & b)
+{
+    typedef typename V_TypeTraits<_Tp>::abs_type rtype;
+    v_reg<rtype, n> c;
+    const rtype mask = std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0;
+    for( int i = 0; i < n; i++ )
+    {
+        rtype ua = a.s[i] ^ mask;
+        rtype ub = b.s[i] ^ mask;
+        c.s[i] = _absdiff(ua, ub);
+    }
+    return c;
+}
+
+/** @overload
+
+For 32-bit floating point values */
+inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b)
+{
+    v_float32x4 c;
+    for( int i = 0; i < c.nlanes; i++ )
+        c.s[i] = _absdiff(a.s[i], b.s[i]);
+    return c;
+}
+
+/** @overload
+
+For 64-bit floating point values */
+inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b)
+{
+    v_float64x2 c;
+    for( int i = 0; i < c.nlanes; i++ )
+        c.s[i] = _absdiff(a.s[i], b.s[i]);
+    return c;
+}
+
+/** @brief Inversed square root
+
+Returns \f$ 1/sqrt(a) \f$
+For floating point types only. */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_invsqrt(const v_reg<_Tp, n>& a)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = 1.f/std::sqrt(a.s[i]);
+    return c;
+}
+
+/** @brief Magnitude
+
+Returns \f$ sqrt(a^2 + b^2) \f$
+For floating point types only. */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]);
+    return c;
+}
+
+/** @brief Square of the magnitude
+
+Returns \f$ a^2 + b^2 \f$
+For floating point types only. */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i];
+    return c;
+}
+
+/** @brief Multiply and add
+
+Returns \f$ a*b + c \f$
+For floating point types only. */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                              const v_reg<_Tp, n>& c)
+{
+    v_reg<_Tp, n> d;
+    for( int i = 0; i < n; i++ )
+        d.s[i] = a.s[i]*b.s[i] + c.s[i];
+    return d;
+}
+
+/** @brief Dot product of elements
+
+Multiply values in two registers and sum adjacent result pairs.
+Scheme:
+@code
+  {A1 A2 ...} // 16-bit
+x {B1 B2 ...} // 16-bit
+-------------
+{A1B1+A2B2 ...} // 32-bit
+@endcode
+Implemented only for 16-bit signed source type (v_int16x8).
+*/
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
+    v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    v_reg<w_type, n/2> c;
+    for( int i = 0; i < (n/2); i++ )
+        c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1];
+    return c;
+}
+
+/** @brief Multiply and expand
+
+Multiply values two registers and store results in two registers with wider pack type.
+Scheme:
+@code
+  {A B C D} // 32-bit
+x {E F G H} // 32-bit
+---------------
+{AE BF}         // 64-bit
+        {CG DH} // 64-bit
+@endcode
+Example:
+@code{.cpp}
+v_uint32x4 a, b; // {1,2,3,4} and {2,2,2,2}
+v_uint64x2 c, d; // results
+v_mul_expand(a, b, c, d); // c, d = {2,4}, {6, 8}
+@endcode
+Implemented only for 16- and unsigned 32-bit source types (v_int16x8, v_uint16x8, v_uint32x4).
+*/
+template<typename _Tp, int n> inline void v_mul_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                                                       v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c,
+                                                       v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& d)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    for( int i = 0; i < (n/2); i++ )
+    {
+        c.s[i] = (w_type)a.s[i]*b.s[i];
+        d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)];
+    }
+}
+
+//! @cond IGNORED
+template<typename _Tp, int n> inline void v_hsum(const v_reg<_Tp, n>& a,
+                                                 v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    for( int i = 0; i < (n/2); i++ )
+    {
+        c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1];
+    }
+}
+//! @endcond
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \
+template<typename _Tp, int n> inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \
+{ \
+    v_reg<_Tp, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = (_Tp)(a.s[i] shift_op imm); \
+    return c; \
+}
+
+/** @brief Bitwise shift left
+
+For 16-, 32- and 64-bit integer values. */
+OPENCV_HAL_IMPL_SHIFT_OP(<<)
+
+/** @brief Bitwise shift right
+
+For 16-, 32- and 64-bit integer values. */
+OPENCV_HAL_IMPL_SHIFT_OP(>>)
+
+/** @brief Sum packed values
+
+Scheme:
+@code
+{A1 A2 A3 ...} => sum{A1,A2,A3,...}
+@endcode
+For 32-bit integer and 32-bit floating point types.*/
+template<typename _Tp, int n> inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a)
+{
+    typename V_TypeTraits<_Tp>::sum_type c = a.s[0];
+    for( int i = 1; i < n; i++ )
+        c += a.s[i];
+    return c;
+}
+
+/** @brief Get negative values mask
+
+Returned value is a bit mask with bits set to 1 on places corresponding to negative packed values indexes.
+Example:
+@code{.cpp}
+v_int32x4 r; // set to {-1, -1, 1, 1}
+int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n> inline int v_signmask(const v_reg<_Tp, n>& a)
+{
+    int mask = 0;
+    for( int i = 0; i < n; i++ )
+        mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i;
+    return mask;
+}
+
+/** @brief Check if all packed values are less than zero
+
+Unsigned values will be casted to signed: `uchar 254 => char -2`.
+For all types except 64-bit. */
+template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < n; i++ )
+        if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 )
+            return false;
+    return true;
+}
+
+/** @brief Check if any of packed values is less than zero
+
+Unsigned values will be casted to signed: `uchar 254 => char -2`.
+For all types except 64-bit. */
+template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < n; i++ )
+        if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 )
+            return true;
+    return false;
+}
+
+/** @brief Bitwise select
+
+Return value will be built by combining values a and b using the following scheme:
+If the i-th bit in _mask_ is 1
+    select i-th bit from _a_
+else
+    select i-th bit from _b_ */
+template<typename _Tp, int n> inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask,
+                                                           const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    typedef V_TypeTraits<_Tp> Traits;
+    typedef typename Traits::int_type int_type;
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+    {
+        int_type m = Traits::reinterpret_int(mask.s[i]);
+        c.s[i] =  Traits::reinterpret_from_int((Traits::reinterpret_int(a.s[i]) & m)
+                                             | (Traits::reinterpret_int(b.s[i]) & ~m));
+    }
+    return c;
+}
+
+/** @brief Expand values to the wider pack type
+
+Copy contents of register to two registers with 2x wider pack type.
+Scheme:
+@code
+ int32x4     int64x2 int64x2
+{A B C D} ==> {A B} , {C D}
+@endcode */
+template<typename _Tp, int n> inline void v_expand(const v_reg<_Tp, n>& a,
+                            v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& b0,
+                            v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& b1)
+{
+    for( int i = 0; i < (n/2); i++ )
+    {
+        b0.s[i] = a.s[i];
+        b1.s[i] = a.s[i+(n/2)];
+    }
+}
+
+//! @cond IGNORED
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::int_type, n>
+    v_reinterpret_as_int(const v_reg<_Tp, n>& a)
+{
+    v_reg<typename V_TypeTraits<_Tp>::int_type, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]);
+    return c;
+}
+
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::uint_type, n>
+    v_reinterpret_as_uint(const v_reg<_Tp, n>& a)
+{
+    v_reg<typename V_TypeTraits<_Tp>::uint_type, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]);
+    return c;
+}
+//! @endcond
+
+/** @brief Interleave two vectors
+
+Scheme:
+@code
+  {A1 A2 A3 A4}
+  {B1 B2 B3 B4}
+---------------
+  {A1 B1 A2 B2} and {A3 B3 A4 B4}
+@endcode
+For all types except 64-bit.
+*/
+template<typename _Tp, int n> inline void v_zip( const v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1,
+                                               v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1 )
+{
+    int i;
+    for( i = 0; i < n/2; i++ )
+    {
+        b0.s[i*2] = a0.s[i];
+        b0.s[i*2+1] = a1.s[i];
+    }
+    for( ; i < n; i++ )
+    {
+        b1.s[i*2-n] = a0.s[i];
+        b1.s[i*2-n+1] = a1.s[i];
+    }
+}
+
+/** @brief Load register contents from memory
+
+@param ptr pointer to memory block with data
+@return register object
+
+@note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc.
+ */
+template<typename _Tp>
+inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load(const _Tp* ptr)
+{
+    return v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes>(ptr);
+}
+
+/** @brief Load register contents from memory (aligned)
+
+similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary)
+ */
+template<typename _Tp>
+inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_aligned(const _Tp* ptr)
+{
+    return v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes>(ptr);
+}
+
+/** @brief Load register contents from two memory blocks
+
+@param loptr memory block containing data for first half (0..n/2)
+@param hiptr memory block containing data for second half (n/2..n)
+
+@code{.cpp}
+int lo[2] = { 1, 2 }, hi[2] = { 3, 4 };
+v_int32x4 r = v_load_halves(lo, hi);
+@endcode
+ */
+template<typename _Tp>
+inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_halves(const _Tp* loptr, const _Tp* hiptr)
+{
+    v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> c;
+    for( int i = 0; i < c.nlanes/2; i++ )
+    {
+        c.s[i] = loptr[i];
+        c.s[i+c.nlanes/2] = hiptr[i];
+    }
+    return c;
+}
+
+/** @brief Load register contents from memory with double expand
+
+Same as cv::v_load, but result pack type will be 2x wider than memory type.
+
+@code{.cpp}
+short buf[4] = {1, 2, 3, 4}; // type is int16
+v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32
+@endcode
+For 8-, 16-, 32-bit integer source types. */
+template<typename _Tp>
+inline v_reg<typename V_TypeTraits<_Tp>::w_type, V_SIMD128Traits<_Tp>::nlanes / 2>
+v_load_expand(const _Tp* ptr)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    v_reg<w_type, V_SIMD128Traits<w_type>::nlanes> c;
+    for( int i = 0; i < c.nlanes; i++ )
+    {
+        c.s[i] = ptr[i];
+    }
+    return c;
+}
+
+/** @brief Load register contents from memory with quad expand
+
+Same as cv::v_load_expand, but result type is 4 times wider than source.
+@code{.cpp}
+char buf[4] = {1, 2, 3, 4}; // type is int8
+v_int32x4 r = v_load_q(buf); // r = {1, 2, 3, 4} - type is int32
+@endcode
+For 8-bit integer source types. */
+template<typename _Tp>
+inline v_reg<typename V_TypeTraits<_Tp>::q_type, V_SIMD128Traits<_Tp>::nlanes / 4>
+v_load_expand_q(const _Tp* ptr)
+{
+    typedef typename V_TypeTraits<_Tp>::q_type q_type;
+    v_reg<q_type, V_SIMD128Traits<q_type>::nlanes> c;
+    for( int i = 0; i < c.nlanes; i++ )
+    {
+        c.s[i] = ptr[i];
+    }
+    return c;
+}
+
+/** @brief Load and deinterleave (2 channels)
+
+Load data from memory deinterleave and store to 2 registers.
+Scheme:
+@code
+{A1 B1 A2 B2 ...} ==> {A1 A2 ...}, {B1 B2 ...}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
+                                                            v_reg<_Tp, n>& b)
+{
+    int i, i2;
+    for( i = i2 = 0; i < n; i++, i2 += 2 )
+    {
+        a.s[i] = ptr[i2];
+        b.s[i] = ptr[i2+1];
+    }
+}
+
+/** @brief Load and deinterleave (3 channels)
+
+Load data from memory deinterleave and store to 3 registers.
+Scheme:
+@code
+{A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
+                                                            v_reg<_Tp, n>& b, v_reg<_Tp, n>& c)
+{
+    int i, i3;
+    for( i = i3 = 0; i < n; i++, i3 += 3 )
+    {
+        a.s[i] = ptr[i3];
+        b.s[i] = ptr[i3+1];
+        c.s[i] = ptr[i3+2];
+    }
+}
+
+/** @brief Load and deinterleave (4 channels)
+
+Load data from memory deinterleave and store to 4 registers.
+Scheme:
+@code
+{A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n>
+inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
+                                v_reg<_Tp, n>& b, v_reg<_Tp, n>& c,
+                                v_reg<_Tp, n>& d)
+{
+    int i, i4;
+    for( i = i4 = 0; i < n; i++, i4 += 4 )
+    {
+        a.s[i] = ptr[i4];
+        b.s[i] = ptr[i4+1];
+        c.s[i] = ptr[i4+2];
+        d.s[i] = ptr[i4+3];
+    }
+}
+
+/** @brief Interleave and store (2 channels)
+
+Interleave and store data from 2 registers to memory.
+Scheme:
+@code
+{A1 A2 ...}, {B1 B2 ...} ==> {A1 B1 A2 B2 ...}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n>
+inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
+                               const v_reg<_Tp, n>& b)
+{
+    int i, i2;
+    for( i = i2 = 0; i < n; i++, i2 += 2 )
+    {
+        ptr[i2] = a.s[i];
+        ptr[i2+1] = b.s[i];
+    }
+}
+
+/** @brief Interleave and store (3 channels)
+
+Interleave and store data from 3 registers to memory.
+Scheme:
+@code
+{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} ==> {A1 B1 C1 A2 B2 C2 ...}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n>
+inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
+                                const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c)
+{
+    int i, i3;
+    for( i = i3 = 0; i < n; i++, i3 += 3 )
+    {
+        ptr[i3] = a.s[i];
+        ptr[i3+1] = b.s[i];
+        ptr[i3+2] = c.s[i];
+    }
+}
+
+/** @brief Interleave and store (4 channels)
+
+Interleave and store data from 4 registers to memory.
+Scheme:
+@code
+{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n> inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
+                                                            const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
+                                                            const v_reg<_Tp, n>& d)
+{
+    int i, i4;
+    for( i = i4 = 0; i < n; i++, i4 += 4 )
+    {
+        ptr[i4] = a.s[i];
+        ptr[i4+1] = b.s[i];
+        ptr[i4+2] = c.s[i];
+        ptr[i4+3] = d.s[i];
+    }
+}
+
+/** @brief Store data to memory
+
+Store register contents to memory.
+Scheme:
+@code
+  REG {A B C D} ==> MEM {A B C D}
+@endcode
+Pointer can be unaligned. */
+template<typename _Tp, int n>
+inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < n; i++ )
+        ptr[i] = a.s[i];
+}
+
+/** @brief Store data to memory (lower half)
+
+Store lower half of register contents to memory.
+Scheme:
+@code
+  REG {A B C D} ==> MEM {A B}
+@endcode */
+template<typename _Tp, int n>
+inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < (n/2); i++ )
+        ptr[i] = a.s[i];
+}
+
+/** @brief Store data to memory (higher half)
+
+Store higher half of register contents to memory.
+Scheme:
+@code
+  REG {A B C D} ==> MEM {C D}
+@endcode */
+template<typename _Tp, int n>
+inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < (n/2); i++ )
+        ptr[i] = a.s[i+(n/2)];
+}
+
+/** @brief Store data to memory (aligned)
+
+Store register contents to memory.
+Scheme:
+@code
+  REG {A B C D} ==> MEM {A B C D}
+@endcode
+Pointer __should__ be aligned by 16-byte boundary. */
+template<typename _Tp, int n>
+inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < n; i++ )
+        ptr[i] = a.s[i];
+}
+
+/** @brief Combine vector from first elements of two vectors
+
+Scheme:
+@code
+  {A1 A2 A3 A4}
+  {B1 B2 B3 B4}
+---------------
+  {A1 A2 B1 B2}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < (n/2); i++ )
+    {
+        c.s[i] = a.s[i];
+        c.s[i+(n/2)] = b.s[i];
+    }
+    return c;
+}
+
+/** @brief Combine vector from last elements of two vectors
+
+Scheme:
+@code
+  {A1 A2 A3 A4}
+  {B1 B2 B3 B4}
+---------------
+  {A3 A4 B3 B4}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < (n/2); i++ )
+    {
+        c.s[i] = a.s[i+(n/2)];
+        c.s[i+(n/2)] = b.s[i+(n/2)];
+    }
+    return c;
+}
+
+/** @brief Combine two vectors from lower and higher parts of two other vectors
+
+@code{.cpp}
+low = cv::v_combine_low(a, b);
+high = cv::v_combine_high(a, b);
+@endcode */
+template<typename _Tp, int n>
+inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                        v_reg<_Tp, n>& low, v_reg<_Tp, n>& high)
+{
+    for( int i = 0; i < (n/2); i++ )
+    {
+        low.s[i] = a.s[i];
+        low.s[i+(n/2)] = b.s[i];
+        high.s[i] = a.s[i+(n/2)];
+        high.s[i+(n/2)] = b.s[i+(n/2)];
+    }
+}
+
+/** @brief Vector extract
+
+Scheme:
+@code
+  {A1 A2 A3 A4}
+  {B1 B2 B3 B4}
+========================
+shift = 1  {A2 A3 A4 B1}
+shift = 2  {A3 A4 B1 B2}
+shift = 3  {A4 B1 B2 B3}
+@endcode
+Restriction: 0 <= shift < nlanes
+
+Usage:
+@code
+v_int32x4 a, b, c;
+c = v_extract<2>(a, b);
+@endcode
+For integer types only. */
+template<int s, typename _Tp, int n>
+inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> r;
+    const int shift = n - s;
+    int i = 0;
+    for (; i < shift; ++i)
+        r.s[i] = a.s[i+s];
+    for (; i < n; ++i)
+        r.s[i] = b.s[i-shift];
+    return r;
+}
+
+/** @brief Round
+
+Rounds each value. Input type is float vector ==> output type is int vector.*/
+template<int n> inline v_reg<int, n> v_round(const v_reg<float, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = cvRound(a.s[i]);
+    return c;
+}
+
+/** @brief Floor
+
+Floor each value. Input type is float vector ==> output type is int vector.*/
+template<int n> inline v_reg<int, n> v_floor(const v_reg<float, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = cvFloor(a.s[i]);
+    return c;
+}
+
+/** @brief Ceil
+
+Ceil each value. Input type is float vector ==> output type is int vector.*/
+template<int n> inline v_reg<int, n> v_ceil(const v_reg<float, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = cvCeil(a.s[i]);
+    return c;
+}
+
+/** @brief Trunc
+
+Truncate each value. Input type is float vector ==> output type is int vector.*/
+template<int n> inline v_reg<int, n> v_trunc(const v_reg<float, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = (int)(a.s[i]);
+    return c;
+}
+
+/** @overload */
+template<int n> inline v_reg<int, n*2> v_round(const v_reg<double, n>& a)
+{
+    v_reg<int, n*2> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = cvRound(a.s[i]);
+        c.s[i+n] = 0;
+    }
+    return c;
+}
+
+/** @overload */
+template<int n> inline v_reg<int, n*2> v_floor(const v_reg<double, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = cvFloor(a.s[i]);
+        c.s[i+n] = 0;
+    }
+    return c;
+}
+
+/** @overload */
+template<int n> inline v_reg<int, n*2> v_ceil(const v_reg<double, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = cvCeil(a.s[i]);
+        c.s[i+n] = 0;
+    }
+    return c;
+}
+
+/** @overload */
+template<int n> inline v_reg<int, n*2> v_trunc(const v_reg<double, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = cvCeil(a.s[i]);
+        c.s[i+n] = 0;
+    }
+    return c;
+}
+
+/** @brief Convert to float
+
+Supported input type is cv::v_int32x4. */
+template<int n> inline v_reg<float, n> v_cvt_f32(const v_reg<int, n>& a)
+{
+    v_reg<float, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = (float)a.s[i];
+    return c;
+}
+
+/** @brief Convert to double
+
+Supported input type is cv::v_int32x4. */
+template<int n> inline v_reg<double, n> v_cvt_f64(const v_reg<int, n*2>& a)
+{
+    v_reg<double, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = (double)a.s[i];
+    return c;
+}
+
+/** @brief Convert to double
+
+Supported input type is cv::v_float32x4. */
+template<int n> inline v_reg<double, n> v_cvt_f64(const v_reg<float, n*2>& a)
+{
+    v_reg<double, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = (double)a.s[i];
+    return c;
+}
+
+/** @brief Transpose 4x4 matrix
+
+Scheme:
+@code
+a0  {A1 A2 A3 A4}
+a1  {B1 B2 B3 B4}
+a2  {C1 C2 C3 C4}
+a3  {D1 D2 D3 D4}
+===============
+b0  {A1 B1 C1 D1}
+b1  {A2 B2 C2 D2}
+b2  {A3 B3 C3 D3}
+b3  {A4 B4 C4 D4}
+@endcode
+*/
+template<typename _Tp>
+inline void v_transpose4x4( v_reg<_Tp, 4>& a0, const v_reg<_Tp, 4>& a1,
+                            const v_reg<_Tp, 4>& a2, const v_reg<_Tp, 4>& a3,
+                            v_reg<_Tp, 4>& b0, v_reg<_Tp, 4>& b1,
+                            v_reg<_Tp, 4>& b2, v_reg<_Tp, 4>& b3 )
+{
+    b0 = v_reg<_Tp, 4>(a0.s[0], a1.s[0], a2.s[0], a3.s[0]);
+    b1 = v_reg<_Tp, 4>(a0.s[1], a1.s[1], a2.s[1], a3.s[1]);
+    b2 = v_reg<_Tp, 4>(a0.s[2], a1.s[2], a2.s[2], a3.s[2]);
+    b3 = v_reg<_Tp, 4>(a0.s[3], a1.s[3], a2.s[3], a3.s[3]);
+}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, _Tp, suffix) \
+inline _Tpvec v_setzero_##suffix() { return _Tpvec::zero(); }
+
+//! @name Init with zero
+//! @{
+//! @brief Create new vector with zero elements
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, schar, s8)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, float, f32)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, double, f64)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, int64, s64)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, suffix) \
+inline _Tpvec v_setall_##suffix(_Tp val) { return _Tpvec::all(val); }
+
+//! @name Init with value
+//! @{
+//! @brief Create new vector with elements set to a specific value
+OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, s8)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, f32)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, f64)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, s64)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \
+template<typename _Tp0, int n0> inline _Tpvec \
+    v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \
+{ return a.template reinterpret_as<_Tp, _Tpvec::nlanes>(); }
+
+//! @name Reinterpret
+//! @{
+//! @brief Convert vector to different type without modifying underlying data.
+OPENCV_HAL_IMPL_C_REINTERPRET(v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16, schar, s8)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_float32x4, float, f32)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2, double, f64)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2, int64, s64)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \
+template<int n> inline _Tpvec v_shl(const _Tpvec& a) \
+{ return a << n; }
+
+//! @name Left shift
+//! @{
+//! @brief Shift left
+OPENCV_HAL_IMPL_C_SHIFTL(v_uint16x8, ushort)
+OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8, short)
+OPENCV_HAL_IMPL_C_SHIFTL(v_uint32x4, unsigned)
+OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4, int)
+OPENCV_HAL_IMPL_C_SHIFTL(v_uint64x2, uint64)
+OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2, int64)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \
+template<int n> inline _Tpvec v_shr(const _Tpvec& a) \
+{ return a >> n; }
+
+//! @name Right shift
+//! @{
+//! @brief Shift right
+OPENCV_HAL_IMPL_C_SHIFTR(v_uint16x8, ushort)
+OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8, short)
+OPENCV_HAL_IMPL_C_SHIFTR(v_uint32x4, unsigned)
+OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4, int)
+OPENCV_HAL_IMPL_C_SHIFTR(v_uint64x2, uint64)
+OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2, int64)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \
+template<int n> inline _Tpvec v_rshr(const _Tpvec& a) \
+{ \
+    _Tpvec c; \
+    for( int i = 0; i < _Tpvec::nlanes; i++ ) \
+        c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
+    return c; \
+}
+
+//! @name Rounding shift
+//! @{
+//! @brief Rounding shift right
+OPENCV_HAL_IMPL_C_RSHIFTR(v_uint16x8, ushort)
+OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8, short)
+OPENCV_HAL_IMPL_C_RSHIFTR(v_uint32x4, unsigned)
+OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4, int)
+OPENCV_HAL_IMPL_C_RSHIFTR(v_uint64x2, uint64)
+OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix) \
+inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    _Tpnvec c; \
+    for( int i = 0; i < _Tpvec::nlanes; i++ ) \
+    { \
+        c.s[i] = saturate_cast<_Tpn>(a.s[i]); \
+        c.s[i+_Tpvec::nlanes] = saturate_cast<_Tpn>(b.s[i]); \
+    } \
+    return c; \
+}
+
+//! @name Pack
+//! @{
+//! @brief Pack values from two vectors to one
+//!
+//! Return vector type have twice more elements than input vector types. Variant with _u_ suffix also
+//! converts to corresponding unsigned type.
+//!
+//! - pack: for 16-, 32- and 64-bit integer input types
+//! - pack_u: for 16- and 32-bit signed integer input types
+OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack)
+OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack)
+OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack)
+OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack)
+OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack)
+OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack)
+OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u)
+OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \
+template<int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    _Tpnvec c; \
+    for( int i = 0; i < _Tpvec::nlanes; i++ ) \
+    { \
+        c.s[i] = saturate_cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
+        c.s[i+_Tpvec::nlanes] = saturate_cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \
+    } \
+    return c; \
+}
+
+//! @name Pack with rounding shift
+//! @{
+//! @brief Pack values from two vectors to one with rounding shift
+//!
+//! Values from the input vectors will be shifted right by _n_ bits with rounding, converted to narrower
+//! type and returned in the result vector. Variant with _u_ suffix converts to unsigned type.
+//!
+//! - pack: for 16-, 32- and 64-bit integer input types
+//! - pack_u: for 16- and 32-bit signed integer input types
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \
+inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
+{ \
+    for( int i = 0; i < _Tpvec::nlanes; i++ ) \
+        ptr[i] = saturate_cast<_Tpn>(a.s[i]); \
+}
+
+//! @name Pack and store
+//! @{
+//! @brief Store values from the input vector into memory with pack
+//!
+//! Values will be stored into memory with saturating conversion to narrower type.
+//! Variant with _u_ suffix converts to corresponding unsigned type.
+//!
+//! - pack: for 16-, 32- and 64-bit integer input types
+//! - pack_u: for 16- and 32-bit signed integer input types
+OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \
+template<int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
+{ \
+    for( int i = 0; i < _Tpvec::nlanes; i++ ) \
+        ptr[i] = saturate_cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
+}
+
+//! @name Pack and store with rounding shift
+//! @{
+//! @brief Store values from the input vector into memory with pack
+//!
+//! Values will be shifted _n_ bits right with rounding, converted to narrower type and stored into
+//! memory. Variant with _u_ suffix converts to unsigned type.
+//!
+//! - pack: for 16-, 32- and 64-bit integer input types
+//! - pack_u: for 16- and 32-bit signed integer input types
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u)
+//! @}
+
+/** @brief Matrix multiplication
+
+Scheme:
+@code
+{A0 A1 A2 A3}   |V0|
+{B0 B1 B2 B3}   |V1|
+{C0 C1 C2 C3}   |V2|
+{D0 D1 D2 D3} x |V3|
+====================
+{R0 R1 R2 R3}, where:
+R0 = A0V0 + A1V1 + A2V2 + A3V3,
+R1 = B0V0 + B1V1 + B2V2 + B3V3
+...
+@endcode
+*/
+inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
+                            const v_float32x4& m1, const v_float32x4& m2,
+                            const v_float32x4& m3)
+{
+    return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + v.s[3]*m3.s[0],
+                       v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + v.s[3]*m3.s[1],
+                       v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + v.s[3]*m3.s[2],
+                       v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]);
+}
+
+//! @}
+
+//! @name Check SIMD support
+//! @{
+//! @brief Check CPU capability of SIMD operation
+static inline bool hasSIMD128()
+{
+    return false;
+}
+
+//! @}
+
+
+}
+
+#endif
diff --git a/thirdparty/linux/include/opencv2/core/hal/intrin_neon.hpp b/thirdparty/linux/include/opencv2/core/hal/intrin_neon.hpp
new file mode 100644
index 0000000..b000733
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/hal/intrin_neon.hpp
@@ -0,0 +1,1234 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_INTRIN_NEON_HPP
+#define OPENCV_HAL_INTRIN_NEON_HPP
+
+#include <algorithm>
+#include "opencv2/core/utility.hpp"
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+#define CV_SIMD128 1
+#if defined(__aarch64__)
+#define CV_SIMD128_64F 1
+#else
+#define CV_SIMD128_64F 0
+#endif
+
+#if CV_SIMD128_64F
+#define OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv, suffix) \
+template <typename T> static inline \
+_Tpv vreinterpretq_##suffix##_f64(T a) { return (_Tpv) a; } \
+template <typename T> static inline \
+float64x2_t vreinterpretq_f64_##suffix(T a) { return (float64x2_t) a; }
+OPENCV_HAL_IMPL_NEON_REINTERPRET(uint8x16_t, u8)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(int8x16_t, s8)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(uint16x8_t, u16)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(int16x8_t, s16)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(uint32x4_t, u32)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(int32x4_t, s32)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(uint64x2_t, u64)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(int64x2_t, s64)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(float32x4_t, f32)
+#endif
+
+struct v_uint8x16
+{
+    typedef uchar lane_type;
+    enum { nlanes = 16 };
+
+    v_uint8x16() {}
+    explicit v_uint8x16(uint8x16_t v) : val(v) {}
+    v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
+               uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
+    {
+        uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
+        val = vld1q_u8(v);
+    }
+    uchar get0() const
+    {
+        return vgetq_lane_u8(val, 0);
+    }
+
+    uint8x16_t val;
+};
+
+struct v_int8x16
+{
+    typedef schar lane_type;
+    enum { nlanes = 16 };
+
+    v_int8x16() {}
+    explicit v_int8x16(int8x16_t v) : val(v) {}
+    v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
+               schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
+    {
+        schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
+        val = vld1q_s8(v);
+    }
+    schar get0() const
+    {
+        return vgetq_lane_s8(val, 0);
+    }
+
+    int8x16_t val;
+};
+
+struct v_uint16x8
+{
+    typedef ushort lane_type;
+    enum { nlanes = 8 };
+
+    v_uint16x8() {}
+    explicit v_uint16x8(uint16x8_t v) : val(v) {}
+    v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
+    {
+        ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
+        val = vld1q_u16(v);
+    }
+    ushort get0() const
+    {
+        return vgetq_lane_u16(val, 0);
+    }
+
+    uint16x8_t val;
+};
+
+struct v_int16x8
+{
+    typedef short lane_type;
+    enum { nlanes = 8 };
+
+    v_int16x8() {}
+    explicit v_int16x8(int16x8_t v) : val(v) {}
+    v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
+    {
+        short v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
+        val = vld1q_s16(v);
+    }
+    short get0() const
+    {
+        return vgetq_lane_s16(val, 0);
+    }
+
+    int16x8_t val;
+};
+
+struct v_uint32x4
+{
+    typedef unsigned lane_type;
+    enum { nlanes = 4 };
+
+    v_uint32x4() {}
+    explicit v_uint32x4(uint32x4_t v) : val(v) {}
+    v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
+    {
+        unsigned v[] = {v0, v1, v2, v3};
+        val = vld1q_u32(v);
+    }
+    unsigned get0() const
+    {
+        return vgetq_lane_u32(val, 0);
+    }
+
+    uint32x4_t val;
+};
+
+struct v_int32x4
+{
+    typedef int lane_type;
+    enum { nlanes = 4 };
+
+    v_int32x4() {}
+    explicit v_int32x4(int32x4_t v) : val(v) {}
+    v_int32x4(int v0, int v1, int v2, int v3)
+    {
+        int v[] = {v0, v1, v2, v3};
+        val = vld1q_s32(v);
+    }
+    int get0() const
+    {
+        return vgetq_lane_s32(val, 0);
+    }
+    int32x4_t val;
+};
+
+struct v_float32x4
+{
+    typedef float lane_type;
+    enum { nlanes = 4 };
+
+    v_float32x4() {}
+    explicit v_float32x4(float32x4_t v) : val(v) {}
+    v_float32x4(float v0, float v1, float v2, float v3)
+    {
+        float v[] = {v0, v1, v2, v3};
+        val = vld1q_f32(v);
+    }
+    float get0() const
+    {
+        return vgetq_lane_f32(val, 0);
+    }
+    float32x4_t val;
+};
+
+struct v_uint64x2
+{
+    typedef uint64 lane_type;
+    enum { nlanes = 2 };
+
+    v_uint64x2() {}
+    explicit v_uint64x2(uint64x2_t v) : val(v) {}
+    v_uint64x2(unsigned v0, unsigned v1)
+    {
+        uint64 v[] = {v0, v1};
+        val = vld1q_u64(v);
+    }
+    uint64 get0() const
+    {
+        return vgetq_lane_u64(val, 0);
+    }
+    uint64x2_t val;
+};
+
+struct v_int64x2
+{
+    typedef int64 lane_type;
+    enum { nlanes = 2 };
+
+    v_int64x2() {}
+    explicit v_int64x2(int64x2_t v) : val(v) {}
+    v_int64x2(int v0, int v1)
+    {
+        int64 v[] = {v0, v1};
+        val = vld1q_s64(v);
+    }
+    int64 get0() const
+    {
+        return vgetq_lane_s64(val, 0);
+    }
+    int64x2_t val;
+};
+
+#if CV_SIMD128_64F
+struct v_float64x2
+{
+    typedef double lane_type;
+    enum { nlanes = 2 };
+
+    v_float64x2() {}
+    explicit v_float64x2(float64x2_t v) : val(v) {}
+    v_float64x2(double v0, double v1)
+    {
+        double v[] = {v0, v1};
+        val = vld1q_f64(v);
+    }
+    double get0() const
+    {
+        return vgetq_lane_f64(val, 0);
+    }
+    float64x2_t val;
+};
+#endif
+
+#if defined (HAVE_FP16)
+// Workaround for old comiplers
+template <typename T> static inline int16x4_t vreinterpret_s16_f16(T a)
+{ return (int16x4_t)a; }
+template <typename T> static inline float16x4_t vreinterpret_f16_s16(T a)
+{ return (float16x4_t)a; }
+template <typename T> static inline float16x4_t vld1_f16(const T* ptr)
+{ return vreinterpret_f16_s16(vld1_s16((const short*)ptr)); }
+template <typename T> static inline void vst1_f16(T* ptr, float16x4_t a)
+{ vst1_s16((short*)ptr, vreinterpret_s16_f16(a)); }
+
+struct v_float16x4
+{
+    typedef short lane_type;
+    enum { nlanes = 4 };
+
+    v_float16x4() {}
+    explicit v_float16x4(float16x4_t v) : val(v) {}
+    v_float16x4(short v0, short v1, short v2, short v3)
+    {
+        short v[] = {v0, v1, v2, v3};
+        val = vld1_f16(v);
+    }
+    short get0() const
+    {
+        return vget_lane_s16(vreinterpret_s16_f16(val), 0);
+    }
+    float16x4_t val;
+};
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix) \
+inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_Tp)0)); } \
+inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix(v)); } \
+inline _Tpv##_t vreinterpretq_##suffix##_##suffix(_Tpv##_t v) { return v; } \
+inline v_uint8x16 v_reinterpret_as_u8(const v_##_Tpv& v) { return v_uint8x16(vreinterpretq_u8_##suffix(v.val)); } \
+inline v_int8x16 v_reinterpret_as_s8(const v_##_Tpv& v) { return v_int8x16(vreinterpretq_s8_##suffix(v.val)); } \
+inline v_uint16x8 v_reinterpret_as_u16(const v_##_Tpv& v) { return v_uint16x8(vreinterpretq_u16_##suffix(v.val)); } \
+inline v_int16x8 v_reinterpret_as_s16(const v_##_Tpv& v) { return v_int16x8(vreinterpretq_s16_##suffix(v.val)); } \
+inline v_uint32x4 v_reinterpret_as_u32(const v_##_Tpv& v) { return v_uint32x4(vreinterpretq_u32_##suffix(v.val)); } \
+inline v_int32x4 v_reinterpret_as_s32(const v_##_Tpv& v) { return v_int32x4(vreinterpretq_s32_##suffix(v.val)); } \
+inline v_uint64x2 v_reinterpret_as_u64(const v_##_Tpv& v) { return v_uint64x2(vreinterpretq_u64_##suffix(v.val)); } \
+inline v_int64x2 v_reinterpret_as_s64(const v_##_Tpv& v) { return v_int64x2(vreinterpretq_s64_##suffix(v.val)); } \
+inline v_float32x4 v_reinterpret_as_f32(const v_##_Tpv& v) { return v_float32x4(vreinterpretq_f32_##suffix(v.val)); }
+
+OPENCV_HAL_IMPL_NEON_INIT(uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_NEON_INIT(int8x16, schar, s8)
+OPENCV_HAL_IMPL_NEON_INIT(uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_NEON_INIT(int16x8, short, s16)
+OPENCV_HAL_IMPL_NEON_INIT(uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_NEON_INIT(int32x4, int, s32)
+OPENCV_HAL_IMPL_NEON_INIT(uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_NEON_INIT(int64x2, int64, s64)
+OPENCV_HAL_IMPL_NEON_INIT(float32x4, float, f32)
+#if CV_SIMD128_64F
+#define OPENCV_HAL_IMPL_NEON_INIT_64(_Tpv, suffix) \
+inline v_float64x2 v_reinterpret_as_f64(const v_##_Tpv& v) { return v_float64x2(vreinterpretq_f64_##suffix(v.val)); }
+OPENCV_HAL_IMPL_NEON_INIT(float64x2, double, f64)
+OPENCV_HAL_IMPL_NEON_INIT_64(uint8x16, u8)
+OPENCV_HAL_IMPL_NEON_INIT_64(int8x16, s8)
+OPENCV_HAL_IMPL_NEON_INIT_64(uint16x8, u16)
+OPENCV_HAL_IMPL_NEON_INIT_64(int16x8, s16)
+OPENCV_HAL_IMPL_NEON_INIT_64(uint32x4, u32)
+OPENCV_HAL_IMPL_NEON_INIT_64(int32x4, s32)
+OPENCV_HAL_IMPL_NEON_INIT_64(uint64x2, u64)
+OPENCV_HAL_IMPL_NEON_INIT_64(int64x2, s64)
+OPENCV_HAL_IMPL_NEON_INIT_64(float32x4, f32)
+OPENCV_HAL_IMPL_NEON_INIT_64(float64x2, f64)
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec, wsuffix, pack, op) \
+inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \
+{ \
+    hreg a1 = vqmov##op##_##wsuffix(a.val), b1 = vqmov##op##_##wsuffix(b.val); \
+    return _Tpvec(vcombine_##suffix(a1, b1)); \
+} \
+inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
+{ \
+    hreg a1 = vqmov##op##_##wsuffix(a.val); \
+    vst1_##suffix(ptr, a1); \
+} \
+template<int n> inline \
+_Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \
+{ \
+    hreg a1 = vqrshr##op##_n_##wsuffix(a.val, n); \
+    hreg b1 = vqrshr##op##_n_##wsuffix(b.val, n); \
+    return _Tpvec(vcombine_##suffix(a1, b1)); \
+} \
+template<int n> inline \
+void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
+{ \
+    hreg a1 = vqrshr##op##_n_##wsuffix(a.val, n); \
+    vst1_##suffix(ptr, a1); \
+}
+
+OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_uint16x8, u16, pack, n)
+OPENCV_HAL_IMPL_NEON_PACK(v_int8x16, schar, int8x8_t, s8, v_int16x8, s16, pack, n)
+OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_uint32x4, u32, pack, n)
+OPENCV_HAL_IMPL_NEON_PACK(v_int16x8, short, int16x4_t, s16, v_int32x4, s32, pack, n)
+OPENCV_HAL_IMPL_NEON_PACK(v_uint32x4, unsigned, uint32x2_t, u32, v_uint64x2, u64, pack, n)
+OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, s64, pack, n)
+
+OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, s16, pack_u, un)
+OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, s32, pack_u, un)
+
+inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
+                            const v_float32x4& m1, const v_float32x4& m2,
+                            const v_float32x4& m3)
+{
+    float32x2_t vl = vget_low_f32(v.val), vh = vget_high_f32(v.val);
+    float32x4_t res = vmulq_lane_f32(m0.val, vl, 0);
+    res = vmlaq_lane_f32(res, m1.val, vl, 1);
+    res = vmlaq_lane_f32(res, m2.val, vh, 0);
+    res = vmlaq_lane_f32(res, m3.val, vh, 1);
+    return v_float32x4(res);
+}
+
+#define OPENCV_HAL_IMPL_NEON_BIN_OP(bin_op, _Tpvec, intrin) \
+inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(intrin(a.val, b.val)); \
+} \
+inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
+{ \
+    a.val = intrin(a.val, b.val); \
+    return a; \
+}
+
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint8x16, vqaddq_u8)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint8x16, vqsubq_u8)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int8x16, vqaddq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int8x16, vqsubq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint16x8, vqaddq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint16x8, vqsubq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint16x8, vmulq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int16x8, vqaddq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int16x8, vqsubq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int16x8, vmulq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int32x4, vaddq_s32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int32x4, vsubq_s32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int32x4, vmulq_s32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint32x4, vaddq_u32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint32x4, vsubq_u32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint32x4, vmulq_u32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_float32x4, vaddq_f32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_float32x4, vsubq_f32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_float32x4, vmulq_f32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int64x2, vaddq_s64)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int64x2, vsubq_s64)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint64x2, vaddq_u64)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint64x2, vsubq_u64)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float32x4, vdivq_f32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_float64x2, vaddq_f64)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_float64x2, vsubq_f64)
+OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_float64x2, vmulq_f64)
+OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float64x2, vdivq_f64)
+#else
+inline v_float32x4 operator / (const v_float32x4& a, const v_float32x4& b)
+{
+    float32x4_t reciprocal = vrecpeq_f32(b.val);
+    reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
+    reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
+    return v_float32x4(vmulq_f32(a.val, reciprocal));
+}
+inline v_float32x4& operator /= (v_float32x4& a, const v_float32x4& b)
+{
+    float32x4_t reciprocal = vrecpeq_f32(b.val);
+    reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
+    reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
+    a.val = vmulq_f32(a.val, reciprocal);
+    return a;
+}
+#endif
+
+inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
+                         v_int32x4& c, v_int32x4& d)
+{
+    c.val = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
+    d.val = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
+}
+
+inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b,
+                         v_uint32x4& c, v_uint32x4& d)
+{
+    c.val = vmull_u16(vget_low_u16(a.val), vget_low_u16(b.val));
+    d.val = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val));
+}
+
+inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
+                         v_uint64x2& c, v_uint64x2& d)
+{
+    c.val = vmull_u32(vget_low_u32(a.val), vget_low_u32(b.val));
+    d.val = vmull_u32(vget_high_u32(a.val), vget_high_u32(b.val));
+}
+
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
+{
+    int32x4_t c = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
+    int32x4_t d = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
+    int32x4x2_t cd = vuzpq_s32(c, d);
+    return v_int32x4(vaddq_s32(cd.val[0], cd.val[1]));
+}
+
+#define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix) \
+    OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \
+    OPENCV_HAL_IMPL_NEON_BIN_OP(|, _Tpvec, vorrq_##suffix) \
+    OPENCV_HAL_IMPL_NEON_BIN_OP(^, _Tpvec, veorq_##suffix) \
+    inline _Tpvec operator ~ (const _Tpvec& a) \
+    { \
+        return _Tpvec(vreinterpretq_##suffix##_u8(vmvnq_u8(vreinterpretq_u8_##suffix(a.val)))); \
+    }
+
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint8x16, u8)
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int8x16, s8)
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint16x8, u16)
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int16x8, s16)
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint32x4, u32)
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int32x4, s32)
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint64x2, u64)
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int64x2, s64)
+
+#define OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(bin_op, intrin) \
+inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \
+{ \
+    return v_float32x4(vreinterpretq_f32_s32(intrin(vreinterpretq_s32_f32(a.val), vreinterpretq_s32_f32(b.val)))); \
+} \
+inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \
+{ \
+    a.val = vreinterpretq_f32_s32(intrin(vreinterpretq_s32_f32(a.val), vreinterpretq_s32_f32(b.val))); \
+    return a; \
+}
+
+OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(&, vandq_s32)
+OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(|, vorrq_s32)
+OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(^, veorq_s32)
+
+inline v_float32x4 operator ~ (const v_float32x4& a)
+{
+    return v_float32x4(vreinterpretq_f32_s32(vmvnq_s32(vreinterpretq_s32_f32(a.val))));
+}
+
+#if CV_SIMD128_64F
+inline v_float32x4 v_sqrt(const v_float32x4& x)
+{
+    return v_float32x4(vsqrtq_f32(x.val));
+}
+
+inline v_float32x4 v_invsqrt(const v_float32x4& x)
+{
+    v_float32x4 one = v_setall_f32(1.0f);
+    return one / v_sqrt(x);
+}
+#else
+inline v_float32x4 v_sqrt(const v_float32x4& x)
+{
+    float32x4_t x1 = vmaxq_f32(x.val, vdupq_n_f32(FLT_MIN));
+    float32x4_t e = vrsqrteq_f32(x1);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e);
+    return v_float32x4(vmulq_f32(x.val, e));
+}
+
+inline v_float32x4 v_invsqrt(const v_float32x4& x)
+{
+    float32x4_t e = vrsqrteq_f32(x.val);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x.val, e), e), e);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x.val, e), e), e);
+    return v_float32x4(e);
+}
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \
+inline _Tpuvec v_abs(const _Tpsvec& a) { return v_reinterpret_as_##usuffix(_Tpsvec(vabsq_##ssuffix(a.val))); }
+
+OPENCV_HAL_IMPL_NEON_ABS(v_uint8x16, v_int8x16, u8, s8)
+OPENCV_HAL_IMPL_NEON_ABS(v_uint16x8, v_int16x8, u16, s16)
+OPENCV_HAL_IMPL_NEON_ABS(v_uint32x4, v_int32x4, u32, s32)
+
+inline v_float32x4 v_abs(v_float32x4 x)
+{ return v_float32x4(vabsq_f32(x.val)); }
+
+#if CV_SIMD128_64F
+#define OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(bin_op, intrin) \
+inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \
+{ \
+    return v_float64x2(vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val)))); \
+} \
+inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \
+{ \
+    a.val = vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val))); \
+    return a; \
+}
+
+OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(&, vandq_s64)
+OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(|, vorrq_s64)
+OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(^, veorq_s64)
+
+inline v_float64x2 operator ~ (const v_float64x2& a)
+{
+    return v_float64x2(vreinterpretq_f64_s32(vmvnq_s32(vreinterpretq_s32_f64(a.val))));
+}
+
+inline v_float64x2 v_sqrt(const v_float64x2& x)
+{
+    return v_float64x2(vsqrtq_f64(x.val));
+}
+
+inline v_float64x2 v_invsqrt(const v_float64x2& x)
+{
+    v_float64x2 one = v_setall_f64(1.0f);
+    return one / v_sqrt(x);
+}
+
+inline v_float64x2 v_abs(v_float64x2 x)
+{ return v_float64x2(vabsq_f64(x.val)); }
+#endif
+
+// TODO: exp, log, sin, cos
+
+#define OPENCV_HAL_IMPL_NEON_BIN_FUNC(_Tpvec, func, intrin) \
+inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(intrin(a.val, b.val)); \
+}
+
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_min, vminq_u8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_max, vmaxq_u8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_min, vminq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_max, vmaxq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_min, vminq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_max, vmaxq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_min, vminq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_max, vmaxq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_min, vminq_u32)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_max, vmaxq_u32)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_min, vminq_s32)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_max, vmaxq_s32)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_min, vminq_f32)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_max, vmaxq_f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_min, vminq_f64)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_max, vmaxq_f64)
+#endif
+
+#if CV_SIMD128_64F
+inline int64x2_t vmvnq_s64(int64x2_t a)
+{
+    int64x2_t vx = vreinterpretq_s64_u32(vdupq_n_u32(0xFFFFFFFF));
+    return veorq_s64(a, vx);
+}
+inline uint64x2_t vmvnq_u64(uint64x2_t a)
+{
+    uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF));
+    return veorq_u64(a, vx);
+}
+#endif
+#define OPENCV_HAL_IMPL_NEON_INT_CMP_OP(_Tpvec, cast, suffix, not_suffix) \
+inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(cast(vceqq_##suffix(a.val, b.val))); } \
+inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(cast(vmvnq_##not_suffix(vceqq_##suffix(a.val, b.val)))); } \
+inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(cast(vcltq_##suffix(a.val, b.val))); } \
+inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(cast(vcgtq_##suffix(a.val, b.val))); } \
+inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(cast(vcleq_##suffix(a.val, b.val))); } \
+inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(cast(vcgeq_##suffix(a.val, b.val))); }
+
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint8x16, OPENCV_HAL_NOP, u8, u8)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int8x16, vreinterpretq_s8_u8, s8, u8)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint16x8, OPENCV_HAL_NOP, u16, u16)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, vreinterpretq_s16_u16, s16, u16)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint32x4, OPENCV_HAL_NOP, u32, u32)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, vreinterpretq_s32_u32, s32, u32)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float32x4, vreinterpretq_f32_u32, f32, u32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float64x2, vreinterpretq_f64_u64, f64, u64)
+#endif
+
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_add_wrap, vaddq_u8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_add_wrap, vaddq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_add_wrap, vaddq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_add_wrap, vaddq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_sub_wrap, vsubq_u8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_sub_wrap, vsubq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_sub_wrap, vsubq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_sub_wrap, vsubq_s16)
+
+// TODO: absdiff for signed integers
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_absdiff, vabdq_u8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_absdiff, vabdq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_absdiff, vabdq_u32)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_absdiff, vabdq_f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_absdiff, vabdq_f64)
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \
+inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec2(cast(intrin(a.val, b.val))); \
+}
+
+OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int8x16, v_uint8x16, vreinterpretq_u8_s8, v_absdiff, vabdq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int16x8, v_uint16x8, vreinterpretq_u16_s16, v_absdiff, vabdq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int32x4, v_uint32x4, vreinterpretq_u32_s32, v_absdiff, vabdq_s32)
+
+inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b)
+{
+    v_float32x4 x(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val));
+    return v_sqrt(x);
+}
+
+inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b)
+{
+    return v_float32x4(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val));
+}
+
+inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
+{
+    return v_float32x4(vmlaq_f32(c.val, a.val, b.val));
+}
+
+#if CV_SIMD128_64F
+inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b)
+{
+    v_float64x2 x(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val)));
+    return v_sqrt(x);
+}
+
+inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b)
+{
+    return v_float64x2(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val)));
+}
+
+inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
+{
+    return v_float64x2(vaddq_f64(c.val, vmulq_f64(a.val, b.val)));
+}
+#endif
+
+// trade efficiency for convenience
+#define OPENCV_HAL_IMPL_NEON_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix) \
+inline _Tpvec operator << (const _Tpvec& a, int n) \
+{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)n))); } \
+inline _Tpvec operator >> (const _Tpvec& a, int n) \
+{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)-n))); } \
+template<int n> inline _Tpvec v_shl(const _Tpvec& a) \
+{ return _Tpvec(vshlq_n_##suffix(a.val, n)); } \
+template<int n> inline _Tpvec v_shr(const _Tpvec& a) \
+{ return _Tpvec(vshrq_n_##suffix(a.val, n)); } \
+template<int n> inline _Tpvec v_rshr(const _Tpvec& a) \
+{ return _Tpvec(vrshrq_n_##suffix(a.val, n)); }
+
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint8x16, u8, schar, s8)
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int8x16, s8, schar, s8)
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint16x8, u16, short, s16)
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int16x8, s16, short, s16)
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint32x4, u32, int, s32)
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int32x4, s32, int, s32)
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint64x2, u64, int64, s64)
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int64x2, s64, int64, s64)
+
+#define OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(_Tpvec, _Tp, suffix) \
+inline _Tpvec v_load(const _Tp* ptr) \
+{ return _Tpvec(vld1q_##suffix(ptr)); } \
+inline _Tpvec v_load_aligned(const _Tp* ptr) \
+{ return _Tpvec(vld1q_##suffix(ptr)); } \
+inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
+{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr0), vld1_##suffix(ptr1))); } \
+inline void v_store(_Tp* ptr, const _Tpvec& a) \
+{ vst1q_##suffix(ptr, a.val); } \
+inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
+{ vst1q_##suffix(ptr, a.val); } \
+inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
+{ vst1_##suffix(ptr, vget_low_##suffix(a.val)); } \
+inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
+{ vst1_##suffix(ptr, vget_high_##suffix(a.val)); }
+
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int8x16, schar, s8)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int64x2, int64, s64)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64)
+#endif
+
+#if defined (HAVE_FP16)
+// Workaround for old comiplers
+inline v_float16x4 v_load_f16(const short* ptr)
+{ return v_float16x4(vld1_f16(ptr)); }
+inline void v_store_f16(short* ptr, v_float16x4& a)
+{ vst1_f16(ptr, a.val); }
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
+inline scalartype v_reduce_##func(const _Tpvec& a) \
+{ \
+    _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
+    a0 = vp##vectorfunc##_##suffix(a0, a0); \
+    return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
+}
+
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, sum, add, u16)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, max, max, u16)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, min, min, u16)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, sum, add, s16)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, max, max, s16)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, min, min, s16)
+
+#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
+inline scalartype v_reduce_##func(const _Tpvec& a) \
+{ \
+    _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
+    return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, vget_high_##suffix(a.val)),0); \
+}
+
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, sum, add, u32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, max, max, u32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, min, min, u32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, sum, add, s32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, max, max, s32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, min, min, s32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, sum, add, f32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32)
+
+inline int v_signmask(const v_uint8x16& a)
+{
+    int8x8_t m0 = vcreate_s8(CV_BIG_UINT(0x0706050403020100));
+    uint8x16_t v0 = vshlq_u8(vshrq_n_u8(a.val, 7), vcombine_s8(m0, m0));
+    uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(v0)));
+    return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 8);
+}
+inline int v_signmask(const v_int8x16& a)
+{ return v_signmask(v_reinterpret_as_u8(a)); }
+
+inline int v_signmask(const v_uint16x8& a)
+{
+    int16x4_t m0 = vcreate_s16(CV_BIG_UINT(0x0003000200010000));
+    uint16x8_t v0 = vshlq_u16(vshrq_n_u16(a.val, 15), vcombine_s16(m0, m0));
+    uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(v0));
+    return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 4);
+}
+inline int v_signmask(const v_int16x8& a)
+{ return v_signmask(v_reinterpret_as_u16(a)); }
+
+inline int v_signmask(const v_uint32x4& a)
+{
+    int32x2_t m0 = vcreate_s32(CV_BIG_UINT(0x0000000100000000));
+    uint32x4_t v0 = vshlq_u32(vshrq_n_u32(a.val, 31), vcombine_s32(m0, m0));
+    uint64x2_t v1 = vpaddlq_u32(v0);
+    return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 2);
+}
+inline int v_signmask(const v_int32x4& a)
+{ return v_signmask(v_reinterpret_as_u32(a)); }
+inline int v_signmask(const v_float32x4& a)
+{ return v_signmask(v_reinterpret_as_u32(a)); }
+#if CV_SIMD128_64F
+inline int v_signmask(const v_uint64x2& a)
+{
+    int64x1_t m0 = vdup_n_s64(0);
+    uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), vcombine_s64(m0, m0));
+    return (int)vgetq_lane_u64(v0, 0) + ((int)vgetq_lane_u64(v0, 1) << 1);
+}
+inline int v_signmask(const v_float64x2& a)
+{ return v_signmask(v_reinterpret_as_u64(a)); }
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \
+inline bool v_check_all(const v_##_Tpvec& a) \
+{ \
+    _Tpvec##_t v0 = vshrq_n_##suffix(vmvnq_##suffix(a.val), shift); \
+    uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
+    return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) == 0; \
+} \
+inline bool v_check_any(const v_##_Tpvec& a) \
+{ \
+    _Tpvec##_t v0 = vshrq_n_##suffix(a.val, shift); \
+    uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
+    return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) != 0; \
+}
+
+OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint8x16, u8, 7)
+OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint16x8, u16, 15)
+OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint32x4, u32, 31)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint64x2, u64, 63)
+#endif
+
+inline bool v_check_all(const v_int8x16& a)
+{ return v_check_all(v_reinterpret_as_u8(a)); }
+inline bool v_check_all(const v_int16x8& a)
+{ return v_check_all(v_reinterpret_as_u16(a)); }
+inline bool v_check_all(const v_int32x4& a)
+{ return v_check_all(v_reinterpret_as_u32(a)); }
+inline bool v_check_all(const v_float32x4& a)
+{ return v_check_all(v_reinterpret_as_u32(a)); }
+
+inline bool v_check_any(const v_int8x16& a)
+{ return v_check_any(v_reinterpret_as_u8(a)); }
+inline bool v_check_any(const v_int16x8& a)
+{ return v_check_any(v_reinterpret_as_u16(a)); }
+inline bool v_check_any(const v_int32x4& a)
+{ return v_check_any(v_reinterpret_as_u32(a)); }
+inline bool v_check_any(const v_float32x4& a)
+{ return v_check_any(v_reinterpret_as_u32(a)); }
+
+#if CV_SIMD128_64F
+inline bool v_check_all(const v_int64x2& a)
+{ return v_check_all(v_reinterpret_as_u64(a)); }
+inline bool v_check_all(const v_float64x2& a)
+{ return v_check_all(v_reinterpret_as_u64(a)); }
+inline bool v_check_any(const v_int64x2& a)
+{ return v_check_any(v_reinterpret_as_u64(a)); }
+inline bool v_check_any(const v_float64x2& a)
+{ return v_check_any(v_reinterpret_as_u64(a)); }
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_SELECT(_Tpvec, suffix, usuffix) \
+inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(vbslq_##suffix(vreinterpretq_##usuffix##_##suffix(mask.val), a.val, b.val)); \
+}
+
+OPENCV_HAL_IMPL_NEON_SELECT(v_uint8x16, u8, u8)
+OPENCV_HAL_IMPL_NEON_SELECT(v_int8x16, s8, u8)
+OPENCV_HAL_IMPL_NEON_SELECT(v_uint16x8, u16, u16)
+OPENCV_HAL_IMPL_NEON_SELECT(v_int16x8, s16, u16)
+OPENCV_HAL_IMPL_NEON_SELECT(v_uint32x4, u32, u32)
+OPENCV_HAL_IMPL_NEON_SELECT(v_int32x4, s32, u32)
+OPENCV_HAL_IMPL_NEON_SELECT(v_float32x4, f32, u32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_SELECT(v_float64x2, f64, u64)
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \
+inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
+{ \
+    b0.val = vmovl_##suffix(vget_low_##suffix(a.val)); \
+    b1.val = vmovl_##suffix(vget_high_##suffix(a.val)); \
+} \
+inline _Tpwvec v_load_expand(const _Tp* ptr) \
+{ \
+    return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \
+}
+
+OPENCV_HAL_IMPL_NEON_EXPAND(v_uint8x16, v_uint16x8, uchar, u8)
+OPENCV_HAL_IMPL_NEON_EXPAND(v_int8x16, v_int16x8, schar, s8)
+OPENCV_HAL_IMPL_NEON_EXPAND(v_uint16x8, v_uint32x4, ushort, u16)
+OPENCV_HAL_IMPL_NEON_EXPAND(v_int16x8, v_int32x4, short, s16)
+OPENCV_HAL_IMPL_NEON_EXPAND(v_uint32x4, v_uint64x2, uint, u32)
+OPENCV_HAL_IMPL_NEON_EXPAND(v_int32x4, v_int64x2, int, s32)
+
+inline v_uint32x4 v_load_expand_q(const uchar* ptr)
+{
+    uint8x8_t v0 = vcreate_u8(*(unsigned*)ptr);
+    uint16x4_t v1 = vget_low_u16(vmovl_u8(v0));
+    return v_uint32x4(vmovl_u16(v1));
+}
+
+inline v_int32x4 v_load_expand_q(const schar* ptr)
+{
+    int8x8_t v0 = vcreate_s8(*(unsigned*)ptr);
+    int16x4_t v1 = vget_low_s16(vmovl_s8(v0));
+    return v_int32x4(vmovl_s16(v1));
+}
+
+#if defined(__aarch64__)
+#define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \
+inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \
+{ \
+    b0.val = vzip1q_##suffix(a0.val, a1.val); \
+    b1.val = vzip2q_##suffix(a0.val, a1.val); \
+} \
+inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+    return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \
+} \
+inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+    return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \
+} \
+inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \
+{ \
+    c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \
+    d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \
+}
+#else
+#define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \
+inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \
+{ \
+    _Tpvec##x2_t p = vzipq_##suffix(a0.val, a1.val); \
+    b0.val = p.val[0]; \
+    b1.val = p.val[1]; \
+} \
+inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+    return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \
+} \
+inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+    return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \
+} \
+inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \
+{ \
+    c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \
+    d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \
+}
+#endif
+
+OPENCV_HAL_IMPL_NEON_UNPACKS(uint8x16, u8)
+OPENCV_HAL_IMPL_NEON_UNPACKS(int8x16, s8)
+OPENCV_HAL_IMPL_NEON_UNPACKS(uint16x8, u16)
+OPENCV_HAL_IMPL_NEON_UNPACKS(int16x8, s16)
+OPENCV_HAL_IMPL_NEON_UNPACKS(uint32x4, u32)
+OPENCV_HAL_IMPL_NEON_UNPACKS(int32x4, s32)
+OPENCV_HAL_IMPL_NEON_UNPACKS(float32x4, f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_UNPACKS(float64x2, f64)
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix) \
+template <int s> \
+inline v_##_Tpvec v_extract(const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+    return v_##_Tpvec(vextq_##suffix(a.val, b.val, s)); \
+}
+
+OPENCV_HAL_IMPL_NEON_EXTRACT(uint8x16, u8)
+OPENCV_HAL_IMPL_NEON_EXTRACT(int8x16, s8)
+OPENCV_HAL_IMPL_NEON_EXTRACT(uint16x8, u16)
+OPENCV_HAL_IMPL_NEON_EXTRACT(int16x8, s16)
+OPENCV_HAL_IMPL_NEON_EXTRACT(uint32x4, u32)
+OPENCV_HAL_IMPL_NEON_EXTRACT(int32x4, s32)
+OPENCV_HAL_IMPL_NEON_EXTRACT(uint64x2, u64)
+OPENCV_HAL_IMPL_NEON_EXTRACT(int64x2, s64)
+OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_EXTRACT(float64x2, f64)
+#endif
+
+inline v_int32x4 v_round(const v_float32x4& a)
+{
+    static const int32x4_t v_sign = vdupq_n_s32(1 << 31),
+        v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f));
+
+    int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(a.val)));
+    return v_int32x4(vcvtq_s32_f32(vaddq_f32(a.val, vreinterpretq_f32_s32(v_addition))));
+}
+
+inline v_int32x4 v_floor(const v_float32x4& a)
+{
+    int32x4_t a1 = vcvtq_s32_f32(a.val);
+    uint32x4_t mask = vcgtq_f32(vcvtq_f32_s32(a1), a.val);
+    return v_int32x4(vaddq_s32(a1, vreinterpretq_s32_u32(mask)));
+}
+
+inline v_int32x4 v_ceil(const v_float32x4& a)
+{
+    int32x4_t a1 = vcvtq_s32_f32(a.val);
+    uint32x4_t mask = vcgtq_f32(a.val, vcvtq_f32_s32(a1));
+    return v_int32x4(vsubq_s32(a1, vreinterpretq_s32_u32(mask)));
+}
+
+inline v_int32x4 v_trunc(const v_float32x4& a)
+{ return v_int32x4(vcvtq_s32_f32(a.val)); }
+
+#if CV_SIMD128_64F
+inline v_int32x4 v_round(const v_float64x2& a)
+{
+    static const int32x2_t zero = vdup_n_s32(0);
+    return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero));
+}
+
+inline v_int32x4 v_floor(const v_float64x2& a)
+{
+    static const int32x2_t zero = vdup_n_s32(0);
+    int64x2_t a1 = vcvtq_s64_f64(a.val);
+    uint64x2_t mask = vcgtq_f64(vcvtq_f64_s64(a1), a.val);
+    a1 = vaddq_s64(a1, vreinterpretq_s64_u64(mask));
+    return v_int32x4(vcombine_s32(vmovn_s64(a1), zero));
+}
+
+inline v_int32x4 v_ceil(const v_float64x2& a)
+{
+    static const int32x2_t zero = vdup_n_s32(0);
+    int64x2_t a1 = vcvtq_s64_f64(a.val);
+    uint64x2_t mask = vcgtq_f64(a.val, vcvtq_f64_s64(a1));
+    a1 = vsubq_s64(a1, vreinterpretq_s64_u64(mask));
+    return v_int32x4(vcombine_s32(vmovn_s64(a1), zero));
+}
+
+inline v_int32x4 v_trunc(const v_float64x2& a)
+{
+    static const int32x2_t zero = vdup_n_s32(0);
+    return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero));
+}
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(_Tpvec, suffix) \
+inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \
+                         const v_##_Tpvec& a2, const v_##_Tpvec& a3, \
+                         v_##_Tpvec& b0, v_##_Tpvec& b1, \
+                         v_##_Tpvec& b2, v_##_Tpvec& b3) \
+{ \
+    /* m00 m01 m02 m03 */ \
+    /* m10 m11 m12 m13 */ \
+    /* m20 m21 m22 m23 */ \
+    /* m30 m31 m32 m33 */ \
+    _Tpvec##x2_t t0 = vtrnq_##suffix(a0.val, a1.val); \
+    _Tpvec##x2_t t1 = vtrnq_##suffix(a2.val, a3.val); \
+    /* m00 m10 m02 m12 */ \
+    /* m01 m11 m03 m13 */ \
+    /* m20 m30 m22 m32 */ \
+    /* m21 m31 m23 m33 */ \
+    b0.val = vcombine_##suffix(vget_low_##suffix(t0.val[0]), vget_low_##suffix(t1.val[0])); \
+    b1.val = vcombine_##suffix(vget_low_##suffix(t0.val[1]), vget_low_##suffix(t1.val[1])); \
+    b2.val = vcombine_##suffix(vget_high_##suffix(t0.val[0]), vget_high_##suffix(t1.val[0])); \
+    b3.val = vcombine_##suffix(vget_high_##suffix(t0.val[1]), vget_high_##suffix(t1.val[1])); \
+}
+
+OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(uint32x4, u32)
+OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(int32x4, s32)
+OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(float32x4, f32)
+
+#define OPENCV_HAL_IMPL_NEON_INTERLEAVED(_Tpvec, _Tp, suffix) \
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \
+{ \
+    _Tpvec##x2_t v = vld2q_##suffix(ptr); \
+    a.val = v.val[0]; \
+    b.val = v.val[1]; \
+} \
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \
+{ \
+    _Tpvec##x3_t v = vld3q_##suffix(ptr); \
+    a.val = v.val[0]; \
+    b.val = v.val[1]; \
+    c.val = v.val[2]; \
+} \
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \
+                                v_##_Tpvec& c, v_##_Tpvec& d) \
+{ \
+    _Tpvec##x4_t v = vld4q_##suffix(ptr); \
+    a.val = v.val[0]; \
+    b.val = v.val[1]; \
+    c.val = v.val[2]; \
+    d.val = v.val[3]; \
+} \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+    _Tpvec##x2_t v; \
+    v.val[0] = a.val; \
+    v.val[1] = b.val; \
+    vst2q_##suffix(ptr, v); \
+} \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, const v_##_Tpvec& c) \
+{ \
+    _Tpvec##x3_t v; \
+    v.val[0] = a.val; \
+    v.val[1] = b.val; \
+    v.val[2] = c.val; \
+    vst3q_##suffix(ptr, v); \
+} \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
+                               const v_##_Tpvec& c, const v_##_Tpvec& d) \
+{ \
+    _Tpvec##x4_t v; \
+    v.val[0] = a.val; \
+    v.val[1] = b.val; \
+    v.val[2] = c.val; \
+    v.val[3] = d.val; \
+    vst4q_##suffix(ptr, v); \
+}
+
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(int8x16, schar, s8)
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(int16x8, short, s16)
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(int32x4, int, s32)
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(float32x4, float, f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(float64x2, double, f64)
+#endif
+
+inline v_float32x4 v_cvt_f32(const v_int32x4& a)
+{
+    return v_float32x4(vcvtq_f32_s32(a.val));
+}
+
+#if CV_SIMD128_64F
+inline v_float32x4 v_cvt_f32(const v_float64x2& a)
+{
+    float32x2_t zero = vdup_n_f32(0.0f);
+    return v_float32x4(vcombine_f32(vcvt_f32_f64(a.val), zero));
+}
+
+inline v_float64x2 v_cvt_f64(const v_int32x4& a)
+{
+    return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_low_s32(a.val))));
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
+{
+    return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_high_s32(a.val))));
+}
+
+inline v_float64x2 v_cvt_f64(const v_float32x4& a)
+{
+    return v_float64x2(vcvt_f64_f32(vget_low_f32(a.val)));
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
+{
+    return v_float64x2(vcvt_f64_f32(vget_high_f32(a.val)));
+}
+#endif
+
+#if defined (HAVE_FP16)
+inline v_float32x4 v_cvt_f32(const v_float16x4& a)
+{
+    return v_float32x4(vcvt_f32_f16(a.val));
+}
+
+inline v_float16x4 v_cvt_f16(const v_float32x4& a)
+{
+    return v_float16x4(vcvt_f16_f32(a.val));
+}
+#endif
+
+//! @name Check SIMD support
+//! @{
+//! @brief Check CPU capability of SIMD operation
+static inline bool hasSIMD128()
+{
+    return checkHardwareSupport(CV_CPU_NEON);
+}
+
+//! @}
+
+//! @endcond
+
+}
+
+#endif
diff --git a/thirdparty/linux/include/opencv2/core/hal/intrin_sse.hpp b/thirdparty/linux/include/opencv2/core/hal/intrin_sse.hpp
new file mode 100644
index 0000000..fc81dac
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/hal/intrin_sse.hpp
@@ -0,0 +1,1744 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_SSE_HPP
+#define OPENCV_HAL_SSE_HPP
+
+#include <algorithm>
+#include "opencv2/core/utility.hpp"
+
+#define CV_SIMD128 1
+#define CV_SIMD128_64F 1
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+struct v_uint8x16
+{
+    typedef uchar lane_type;
+    enum { nlanes = 16 };
+
+    v_uint8x16() {}
+    explicit v_uint8x16(__m128i v) : val(v) {}
+    v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
+               uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
+    {
+        val = _mm_setr_epi8((char)v0, (char)v1, (char)v2, (char)v3,
+                            (char)v4, (char)v5, (char)v6, (char)v7,
+                            (char)v8, (char)v9, (char)v10, (char)v11,
+                            (char)v12, (char)v13, (char)v14, (char)v15);
+    }
+    uchar get0() const
+    {
+        return (uchar)_mm_cvtsi128_si32(val);
+    }
+
+    __m128i val;
+};
+
+struct v_int8x16
+{
+    typedef schar lane_type;
+    enum { nlanes = 16 };
+
+    v_int8x16() {}
+    explicit v_int8x16(__m128i v) : val(v) {}
+    v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
+              schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
+    {
+        val = _mm_setr_epi8((char)v0, (char)v1, (char)v2, (char)v3,
+                            (char)v4, (char)v5, (char)v6, (char)v7,
+                            (char)v8, (char)v9, (char)v10, (char)v11,
+                            (char)v12, (char)v13, (char)v14, (char)v15);
+    }
+    schar get0() const
+    {
+        return (schar)_mm_cvtsi128_si32(val);
+    }
+
+    __m128i val;
+};
+
+struct v_uint16x8
+{
+    typedef ushort lane_type;
+    enum { nlanes = 8 };
+
+    v_uint16x8() {}
+    explicit v_uint16x8(__m128i v) : val(v) {}
+    v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
+    {
+        val = _mm_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3,
+                             (short)v4, (short)v5, (short)v6, (short)v7);
+    }
+    ushort get0() const
+    {
+        return (ushort)_mm_cvtsi128_si32(val);
+    }
+
+    __m128i val;
+};
+
+struct v_int16x8
+{
+    typedef short lane_type;
+    enum { nlanes = 8 };
+
+    v_int16x8() {}
+    explicit v_int16x8(__m128i v) : val(v) {}
+    v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
+    {
+        val = _mm_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3,
+                             (short)v4, (short)v5, (short)v6, (short)v7);
+    }
+    short get0() const
+    {
+        return (short)_mm_cvtsi128_si32(val);
+    }
+    __m128i val;
+};
+
+struct v_uint32x4
+{
+    typedef unsigned lane_type;
+    enum { nlanes = 4 };
+
+    v_uint32x4() {}
+    explicit v_uint32x4(__m128i v) : val(v) {}
+    v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
+    {
+        val = _mm_setr_epi32((int)v0, (int)v1, (int)v2, (int)v3);
+    }
+    unsigned get0() const
+    {
+        return (unsigned)_mm_cvtsi128_si32(val);
+    }
+    __m128i val;
+};
+
+struct v_int32x4
+{
+    typedef int lane_type;
+    enum { nlanes = 4 };
+
+    v_int32x4() {}
+    explicit v_int32x4(__m128i v) : val(v) {}
+    v_int32x4(int v0, int v1, int v2, int v3)
+    {
+        val = _mm_setr_epi32(v0, v1, v2, v3);
+    }
+    int get0() const
+    {
+        return _mm_cvtsi128_si32(val);
+    }
+    __m128i val;
+};
+
+struct v_float32x4
+{
+    typedef float lane_type;
+    enum { nlanes = 4 };
+
+    v_float32x4() {}
+    explicit v_float32x4(__m128 v) : val(v) {}
+    v_float32x4(float v0, float v1, float v2, float v3)
+    {
+        val = _mm_setr_ps(v0, v1, v2, v3);
+    }
+    float get0() const
+    {
+        return _mm_cvtss_f32(val);
+    }
+    __m128 val;
+};
+
+struct v_uint64x2
+{
+    typedef uint64 lane_type;
+    enum { nlanes = 2 };
+
+    v_uint64x2() {}
+    explicit v_uint64x2(__m128i v) : val(v) {}
+    v_uint64x2(uint64 v0, uint64 v1)
+    {
+        val = _mm_setr_epi32((int)v0, (int)(v0 >> 32), (int)v1, (int)(v1 >> 32));
+    }
+    uint64 get0() const
+    {
+        int a = _mm_cvtsi128_si32(val);
+        int b = _mm_cvtsi128_si32(_mm_srli_epi64(val, 32));
+        return (unsigned)a | ((uint64)(unsigned)b << 32);
+    }
+    __m128i val;
+};
+
+struct v_int64x2
+{
+    typedef int64 lane_type;
+    enum { nlanes = 2 };
+
+    v_int64x2() {}
+    explicit v_int64x2(__m128i v) : val(v) {}
+    v_int64x2(int64 v0, int64 v1)
+    {
+        val = _mm_setr_epi32((int)v0, (int)(v0 >> 32), (int)v1, (int)(v1 >> 32));
+    }
+    int64 get0() const
+    {
+        int a = _mm_cvtsi128_si32(val);
+        int b = _mm_cvtsi128_si32(_mm_srli_epi64(val, 32));
+        return (int64)((unsigned)a | ((uint64)(unsigned)b << 32));
+    }
+    __m128i val;
+};
+
+struct v_float64x2
+{
+    typedef double lane_type;
+    enum { nlanes = 2 };
+
+    v_float64x2() {}
+    explicit v_float64x2(__m128d v) : val(v) {}
+    v_float64x2(double v0, double v1)
+    {
+        val = _mm_setr_pd(v0, v1);
+    }
+    double get0() const
+    {
+        return _mm_cvtsd_f64(val);
+    }
+    __m128d val;
+};
+
+#if defined(HAVE_FP16)
+struct v_float16x4
+{
+    typedef short lane_type;
+    enum { nlanes = 4 };
+
+    v_float16x4() {}
+    explicit v_float16x4(__m128i v) : val(v) {}
+    v_float16x4(short v0, short v1, short v2, short v3)
+    {
+        val = _mm_setr_epi16(v0, v1, v2, v3, 0, 0, 0, 0);
+    }
+    short get0() const
+    {
+        return (short)_mm_cvtsi128_si32(val);
+    }
+    __m128i val;
+};
+#endif
+
+#define OPENCV_HAL_IMPL_SSE_INITVEC(_Tpvec, _Tp, suffix, zsuffix, ssuffix, _Tps, cast) \
+inline _Tpvec v_setzero_##suffix() { return _Tpvec(_mm_setzero_##zsuffix()); } \
+inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(_mm_set1_##ssuffix((_Tps)v)); } \
+template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \
+{ return _Tpvec(cast(a.val)); }
+
+OPENCV_HAL_IMPL_SSE_INITVEC(v_uint8x16, uchar, u8, si128, epi8, char, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_int8x16, schar, s8, si128, epi8, char, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_uint16x8, ushort, u16, si128, epi16, short, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_int16x8, short, s16, si128, epi16, short, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_uint32x4, unsigned, u32, si128, epi32, int, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_int32x4, int, s32, si128, epi32, int, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_float32x4, float, f32, ps, ps, float, _mm_castsi128_ps)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_float64x2, double, f64, pd, pd, double, _mm_castsi128_pd)
+
+inline v_uint64x2 v_setzero_u64() { return v_uint64x2(_mm_setzero_si128()); }
+inline v_int64x2 v_setzero_s64() { return v_int64x2(_mm_setzero_si128()); }
+inline v_uint64x2 v_setall_u64(uint64 val) { return v_uint64x2(val, val); }
+inline v_int64x2 v_setall_s64(int64 val) { return v_int64x2(val, val); }
+
+template<typename _Tpvec> inline
+v_uint64x2 v_reinterpret_as_u64(const _Tpvec& a) { return v_uint64x2(a.val); }
+template<typename _Tpvec> inline
+v_int64x2 v_reinterpret_as_s64(const _Tpvec& a) { return v_int64x2(a.val); }
+inline v_float32x4 v_reinterpret_as_f32(const v_uint64x2& a)
+{ return v_float32x4(_mm_castsi128_ps(a.val)); }
+inline v_float32x4 v_reinterpret_as_f32(const v_int64x2& a)
+{ return v_float32x4(_mm_castsi128_ps(a.val)); }
+inline v_float64x2 v_reinterpret_as_f64(const v_uint64x2& a)
+{ return v_float64x2(_mm_castsi128_pd(a.val)); }
+inline v_float64x2 v_reinterpret_as_f64(const v_int64x2& a)
+{ return v_float64x2(_mm_castsi128_pd(a.val)); }
+
+#define OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(_Tpvec, suffix) \
+inline _Tpvec v_reinterpret_as_##suffix(const v_float32x4& a) \
+{ return _Tpvec(_mm_castps_si128(a.val)); } \
+inline _Tpvec v_reinterpret_as_##suffix(const v_float64x2& a) \
+{ return _Tpvec(_mm_castpd_si128(a.val)); }
+
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint8x16, u8)
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int8x16, s8)
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint16x8, u16)
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int16x8, s16)
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint32x4, u32)
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int32x4, s32)
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint64x2, u64)
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int64x2, s64)
+
+inline v_float32x4 v_reinterpret_as_f32(const v_float32x4& a) {return a; }
+inline v_float64x2 v_reinterpret_as_f64(const v_float64x2& a) {return a; }
+inline v_float32x4 v_reinterpret_as_f32(const v_float64x2& a) {return v_float32x4(_mm_castpd_ps(a.val)); }
+inline v_float64x2 v_reinterpret_as_f64(const v_float32x4& a) {return v_float64x2(_mm_castps_pd(a.val)); }
+
+//////////////// PACK ///////////////
+inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b)
+{
+    __m128i delta = _mm_set1_epi16(255);
+    return v_uint8x16(_mm_packus_epi16(_mm_subs_epu16(a.val, _mm_subs_epu16(a.val, delta)),
+                                       _mm_subs_epu16(b.val, _mm_subs_epu16(b.val, delta))));
+}
+
+inline void v_pack_store(uchar* ptr, const v_uint16x8& a)
+{
+    __m128i delta = _mm_set1_epi16(255);
+    __m128i a1 = _mm_subs_epu16(a.val, _mm_subs_epu16(a.val, delta));
+    _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a1, a1));
+}
+
+inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b)
+{ return v_uint8x16(_mm_packus_epi16(a.val, b.val)); }
+
+inline void v_pack_u_store(uchar* ptr, const v_int16x8& a)
+{ _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a.val, a.val)); }
+
+template<int n> inline
+v_uint8x16 v_rshr_pack(const v_uint16x8& a, const v_uint16x8& b)
+{
+    // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers.
+    __m128i delta = _mm_set1_epi16((short)(1 << (n-1)));
+    return v_uint8x16(_mm_packus_epi16(_mm_srli_epi16(_mm_adds_epu16(a.val, delta), n),
+                                       _mm_srli_epi16(_mm_adds_epu16(b.val, delta), n)));
+}
+
+template<int n> inline
+void v_rshr_pack_store(uchar* ptr, const v_uint16x8& a)
+{
+    __m128i delta = _mm_set1_epi16((short)(1 << (n-1)));
+    __m128i a1 = _mm_srli_epi16(_mm_adds_epu16(a.val, delta), n);
+    _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a1, a1));
+}
+
+template<int n> inline
+v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b)
+{
+    __m128i delta = _mm_set1_epi16((short)(1 << (n-1)));
+    return v_uint8x16(_mm_packus_epi16(_mm_srai_epi16(_mm_adds_epi16(a.val, delta), n),
+                                       _mm_srai_epi16(_mm_adds_epi16(b.val, delta), n)));
+}
+
+template<int n> inline
+void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a)
+{
+    __m128i delta = _mm_set1_epi16((short)(1 << (n-1)));
+    __m128i a1 = _mm_srai_epi16(_mm_adds_epi16(a.val, delta), n);
+    _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a1, a1));
+}
+
+inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b)
+{ return v_int8x16(_mm_packs_epi16(a.val, b.val)); }
+
+inline void v_pack_store(schar* ptr, v_int16x8& a)
+{ _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi16(a.val, a.val)); }
+
+template<int n> inline
+v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b)
+{
+    // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers.
+    __m128i delta = _mm_set1_epi16((short)(1 << (n-1)));
+    return v_int8x16(_mm_packs_epi16(_mm_srai_epi16(_mm_adds_epi16(a.val, delta), n),
+                                     _mm_srai_epi16(_mm_adds_epi16(b.val, delta), n)));
+}
+template<int n> inline
+void v_rshr_pack_store(schar* ptr, const v_int16x8& a)
+{
+    // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers.
+    __m128i delta = _mm_set1_epi16((short)(1 << (n-1)));
+    __m128i a1 = _mm_srai_epi16(_mm_adds_epi16(a.val, delta), n);
+    _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi16(a1, a1));
+}
+
+
+// bit-wise "mask ? a : b"
+inline __m128i v_select_si128(__m128i mask, __m128i a, __m128i b)
+{
+    return _mm_xor_si128(b, _mm_and_si128(_mm_xor_si128(a, b), mask));
+}
+
+inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b)
+{
+    __m128i z = _mm_setzero_si128(), maxval32 = _mm_set1_epi32(65535), delta32 = _mm_set1_epi32(32768);
+    __m128i a1 = _mm_sub_epi32(v_select_si128(_mm_cmpgt_epi32(z, a.val), maxval32, a.val), delta32);
+    __m128i b1 = _mm_sub_epi32(v_select_si128(_mm_cmpgt_epi32(z, b.val), maxval32, b.val), delta32);
+    __m128i r = _mm_packs_epi32(a1, b1);
+    return v_uint16x8(_mm_sub_epi16(r, _mm_set1_epi16(-32768)));
+}
+
+inline void v_pack_store(ushort* ptr, const v_uint32x4& a)
+{
+    __m128i z = _mm_setzero_si128(), maxval32 = _mm_set1_epi32(65535), delta32 = _mm_set1_epi32(32768);
+    __m128i a1 = _mm_sub_epi32(v_select_si128(_mm_cmpgt_epi32(z, a.val), maxval32, a.val), delta32);
+    __m128i r = _mm_packs_epi32(a1, a1);
+    _mm_storel_epi64((__m128i*)ptr, _mm_sub_epi16(r, _mm_set1_epi16(-32768)));
+}
+
+template<int n> inline
+v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b)
+{
+    __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768);
+    __m128i a1 = _mm_sub_epi32(_mm_srli_epi32(_mm_add_epi32(a.val, delta), n), delta32);
+    __m128i b1 = _mm_sub_epi32(_mm_srli_epi32(_mm_add_epi32(b.val, delta), n), delta32);
+    return v_uint16x8(_mm_sub_epi16(_mm_packs_epi32(a1, b1), _mm_set1_epi16(-32768)));
+}
+
+template<int n> inline
+void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a)
+{
+    __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768);
+    __m128i a1 = _mm_sub_epi32(_mm_srli_epi32(_mm_add_epi32(a.val, delta), n), delta32);
+    __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768));
+    _mm_storel_epi64((__m128i*)ptr, a2);
+}
+
+inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b)
+{
+    __m128i delta32 = _mm_set1_epi32(32768);
+    __m128i r = _mm_packs_epi32(_mm_sub_epi32(a.val, delta32), _mm_sub_epi32(b.val, delta32));
+    return v_uint16x8(_mm_sub_epi16(r, _mm_set1_epi16(-32768)));
+}
+
+inline void v_pack_u_store(ushort* ptr, const v_int32x4& a)
+{
+    __m128i delta32 = _mm_set1_epi32(32768);
+    __m128i a1 = _mm_sub_epi32(a.val, delta32);
+    __m128i r = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768));
+    _mm_storel_epi64((__m128i*)ptr, r);
+}
+
+template<int n> inline
+v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b)
+{
+    __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768);
+    __m128i a1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), delta32);
+    __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768));
+    __m128i b1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(b.val, delta), n), delta32);
+    __m128i b2 = _mm_sub_epi16(_mm_packs_epi32(b1, b1), _mm_set1_epi16(-32768));
+    return v_uint16x8(_mm_unpacklo_epi64(a2, b2));
+}
+
+template<int n> inline
+void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a)
+{
+    __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768);
+    __m128i a1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), delta32);
+    __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768));
+    _mm_storel_epi64((__m128i*)ptr, a2);
+}
+
+inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b)
+{ return v_int16x8(_mm_packs_epi32(a.val, b.val)); }
+
+inline void v_pack_store(short* ptr, const v_int32x4& a)
+{
+    _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi32(a.val, a.val));
+}
+
+template<int n> inline
+v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b)
+{
+    __m128i delta = _mm_set1_epi32(1 << (n-1));
+    return v_int16x8(_mm_packs_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n),
+                                     _mm_srai_epi32(_mm_add_epi32(b.val, delta), n)));
+}
+
+template<int n> inline
+void v_rshr_pack_store(short* ptr, const v_int32x4& a)
+{
+    __m128i delta = _mm_set1_epi32(1 << (n-1));
+    __m128i a1 = _mm_srai_epi32(_mm_add_epi32(a.val, delta), n);
+    _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi32(a1, a1));
+}
+
+
+// [a0 0 | b0 0]  [a1 0 | b1 0]
+inline v_uint32x4 v_pack(const v_uint64x2& a, const v_uint64x2& b)
+{
+    __m128i v0 = _mm_unpacklo_epi32(a.val, b.val); // a0 a1 0 0
+    __m128i v1 = _mm_unpackhi_epi32(a.val, b.val); // b0 b1 0 0
+    return v_uint32x4(_mm_unpacklo_epi32(v0, v1));
+}
+
+inline void v_pack_store(unsigned* ptr, const v_uint64x2& a)
+{
+    __m128i a1 = _mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 2, 2, 0));
+    _mm_storel_epi64((__m128i*)ptr, a1);
+}
+
+// [a0 0 | b0 0]  [a1 0 | b1 0]
+inline v_int32x4 v_pack(const v_int64x2& a, const v_int64x2& b)
+{
+    __m128i v0 = _mm_unpacklo_epi32(a.val, b.val); // a0 a1 0 0
+    __m128i v1 = _mm_unpackhi_epi32(a.val, b.val); // b0 b1 0 0
+    return v_int32x4(_mm_unpacklo_epi32(v0, v1));
+}
+
+inline void v_pack_store(int* ptr, const v_int64x2& a)
+{
+    __m128i a1 = _mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 2, 2, 0));
+    _mm_storel_epi64((__m128i*)ptr, a1);
+}
+
+template<int n> inline
+v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b)
+{
+    uint64 delta = (uint64)1 << (n-1);
+    v_uint64x2 delta2(delta, delta);
+    __m128i a1 = _mm_srli_epi64(_mm_add_epi64(a.val, delta2.val), n);
+    __m128i b1 = _mm_srli_epi64(_mm_add_epi64(b.val, delta2.val), n);
+    __m128i v0 = _mm_unpacklo_epi32(a1, b1); // a0 a1 0 0
+    __m128i v1 = _mm_unpackhi_epi32(a1, b1); // b0 b1 0 0
+    return v_uint32x4(_mm_unpacklo_epi32(v0, v1));
+}
+
+template<int n> inline
+void v_rshr_pack_store(unsigned* ptr, const v_uint64x2& a)
+{
+    uint64 delta = (uint64)1 << (n-1);
+    v_uint64x2 delta2(delta, delta);
+    __m128i a1 = _mm_srli_epi64(_mm_add_epi64(a.val, delta2.val), n);
+    __m128i a2 = _mm_shuffle_epi32(a1, _MM_SHUFFLE(0, 2, 2, 0));
+    _mm_storel_epi64((__m128i*)ptr, a2);
+}
+
+inline __m128i v_sign_epi64(__m128i a)
+{
+    return _mm_shuffle_epi32(_mm_srai_epi32(a, 31), _MM_SHUFFLE(3, 3, 1, 1)); // x m0 | x m1
+}
+
+inline __m128i v_srai_epi64(__m128i a, int imm)
+{
+    __m128i smask = v_sign_epi64(a);
+    return _mm_xor_si128(_mm_srli_epi64(_mm_xor_si128(a, smask), imm), smask);
+}
+
+template<int n> inline
+v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b)
+{
+    int64 delta = (int64)1 << (n-1);
+    v_int64x2 delta2(delta, delta);
+    __m128i a1 = v_srai_epi64(_mm_add_epi64(a.val, delta2.val), n);
+    __m128i b1 = v_srai_epi64(_mm_add_epi64(b.val, delta2.val), n);
+    __m128i v0 = _mm_unpacklo_epi32(a1, b1); // a0 a1 0 0
+    __m128i v1 = _mm_unpackhi_epi32(a1, b1); // b0 b1 0 0
+    return v_int32x4(_mm_unpacklo_epi32(v0, v1));
+}
+
+template<int n> inline
+void v_rshr_pack_store(int* ptr, const v_int64x2& a)
+{
+    int64 delta = (int64)1 << (n-1);
+    v_int64x2 delta2(delta, delta);
+    __m128i a1 = v_srai_epi64(_mm_add_epi64(a.val, delta2.val), n);
+    __m128i a2 = _mm_shuffle_epi32(a1, _MM_SHUFFLE(0, 2, 2, 0));
+    _mm_storel_epi64((__m128i*)ptr, a2);
+}
+
+inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
+                            const v_float32x4& m1, const v_float32x4& m2,
+                            const v_float32x4& m3)
+{
+    __m128 v0 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(0, 0, 0, 0)), m0.val);
+    __m128 v1 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(1, 1, 1, 1)), m1.val);
+    __m128 v2 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(2, 2, 2, 2)), m2.val);
+    __m128 v3 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(3, 3, 3, 3)), m3.val);
+
+    return v_float32x4(_mm_add_ps(_mm_add_ps(v0, v1), _mm_add_ps(v2, v3)));
+}
+
+
+#define OPENCV_HAL_IMPL_SSE_BIN_OP(bin_op, _Tpvec, intrin) \
+    inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
+    { \
+        return _Tpvec(intrin(a.val, b.val)); \
+    } \
+    inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
+    { \
+        a.val = intrin(a.val, b.val); \
+        return a; \
+    }
+
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint8x16, _mm_adds_epu8)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint8x16, _mm_subs_epu8)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int8x16, _mm_adds_epi8)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int8x16, _mm_subs_epi8)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint16x8, _mm_adds_epu16)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint16x8, _mm_subs_epu16)
+OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_uint16x8, _mm_mullo_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int16x8, _mm_adds_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int16x8, _mm_subs_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_int16x8, _mm_mullo_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint32x4, _mm_add_epi32)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint32x4, _mm_sub_epi32)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int32x4, _mm_add_epi32)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int32x4, _mm_sub_epi32)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_float32x4, _mm_add_ps)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_float32x4, _mm_sub_ps)
+OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_float32x4, _mm_mul_ps)
+OPENCV_HAL_IMPL_SSE_BIN_OP(/, v_float32x4, _mm_div_ps)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_float64x2, _mm_add_pd)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_float64x2, _mm_sub_pd)
+OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_float64x2, _mm_mul_pd)
+OPENCV_HAL_IMPL_SSE_BIN_OP(/, v_float64x2, _mm_div_pd)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint64x2, _mm_add_epi64)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint64x2, _mm_sub_epi64)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int64x2, _mm_add_epi64)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int64x2, _mm_sub_epi64)
+
+inline v_uint32x4 operator * (const v_uint32x4& a, const v_uint32x4& b)
+{
+    __m128i c0 = _mm_mul_epu32(a.val, b.val);
+    __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32));
+    __m128i d0 = _mm_unpacklo_epi32(c0, c1);
+    __m128i d1 = _mm_unpackhi_epi32(c0, c1);
+    return v_uint32x4(_mm_unpacklo_epi64(d0, d1));
+}
+inline v_int32x4 operator * (const v_int32x4& a, const v_int32x4& b)
+{
+    __m128i c0 = _mm_mul_epu32(a.val, b.val);
+    __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32));
+    __m128i d0 = _mm_unpacklo_epi32(c0, c1);
+    __m128i d1 = _mm_unpackhi_epi32(c0, c1);
+    return v_int32x4(_mm_unpacklo_epi64(d0, d1));
+}
+inline v_uint32x4& operator *= (v_uint32x4& a, const v_uint32x4& b)
+{
+    a = a * b;
+    return a;
+}
+inline v_int32x4& operator *= (v_int32x4& a, const v_int32x4& b)
+{
+    a = a * b;
+    return a;
+}
+
+inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
+                         v_int32x4& c, v_int32x4& d)
+{
+    __m128i v0 = _mm_mullo_epi16(a.val, b.val);
+    __m128i v1 = _mm_mulhi_epi16(a.val, b.val);
+    c.val = _mm_unpacklo_epi16(v0, v1);
+    d.val = _mm_unpackhi_epi16(v0, v1);
+}
+
+inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b,
+                         v_uint32x4& c, v_uint32x4& d)
+{
+    __m128i v0 = _mm_mullo_epi16(a.val, b.val);
+    __m128i v1 = _mm_mulhi_epu16(a.val, b.val);
+    c.val = _mm_unpacklo_epi16(v0, v1);
+    d.val = _mm_unpackhi_epi16(v0, v1);
+}
+
+inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
+                         v_uint64x2& c, v_uint64x2& d)
+{
+    __m128i c0 = _mm_mul_epu32(a.val, b.val);
+    __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32));
+    c.val = _mm_unpacklo_epi64(c0, c1);
+    d.val = _mm_unpackhi_epi64(c0, c1);
+}
+
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
+{
+    return v_int32x4(_mm_madd_epi16(a.val, b.val));
+}
+
+#define OPENCV_HAL_IMPL_SSE_LOGIC_OP(_Tpvec, suffix, not_const) \
+    OPENCV_HAL_IMPL_SSE_BIN_OP(&, _Tpvec, _mm_and_##suffix) \
+    OPENCV_HAL_IMPL_SSE_BIN_OP(|, _Tpvec, _mm_or_##suffix) \
+    OPENCV_HAL_IMPL_SSE_BIN_OP(^, _Tpvec, _mm_xor_##suffix) \
+    inline _Tpvec operator ~ (const _Tpvec& a) \
+    { \
+        return _Tpvec(_mm_xor_##suffix(a.val, not_const)); \
+    }
+
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint8x16, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int8x16, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint16x8, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int16x8, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint32x4, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int32x4, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint64x2, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int64x2, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_float32x4, ps, _mm_castsi128_ps(_mm_set1_epi32(-1)))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_float64x2, pd, _mm_castsi128_pd(_mm_set1_epi32(-1)))
+
+inline v_float32x4 v_sqrt(const v_float32x4& x)
+{ return v_float32x4(_mm_sqrt_ps(x.val)); }
+
+inline v_float32x4 v_invsqrt(const v_float32x4& x)
+{
+    static const __m128 _0_5 = _mm_set1_ps(0.5f), _1_5 = _mm_set1_ps(1.5f);
+    __m128 t = x.val;
+    __m128 h = _mm_mul_ps(t, _0_5);
+    t = _mm_rsqrt_ps(t);
+    t = _mm_mul_ps(t, _mm_sub_ps(_1_5, _mm_mul_ps(_mm_mul_ps(t, t), h)));
+    return v_float32x4(t);
+}
+
+inline v_float64x2 v_sqrt(const v_float64x2& x)
+{ return v_float64x2(_mm_sqrt_pd(x.val)); }
+
+inline v_float64x2 v_invsqrt(const v_float64x2& x)
+{
+    static const __m128d v_1 = _mm_set1_pd(1.);
+    return v_float64x2(_mm_div_pd(v_1, _mm_sqrt_pd(x.val)));
+}
+
+#define OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(_Tpuvec, _Tpsvec, func, suffix, subWidth) \
+inline _Tpuvec v_abs(const _Tpsvec& x) \
+{ return _Tpuvec(_mm_##func##_ep##suffix(x.val, _mm_sub_ep##subWidth(_mm_setzero_si128(), x.val))); }
+
+OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint8x16, v_int8x16, min, u8, i8)
+OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint16x8, v_int16x8, max, i16, i16)
+inline v_uint32x4 v_abs(const v_int32x4& x)
+{
+    __m128i s = _mm_srli_epi32(x.val, 31);
+    __m128i f = _mm_srai_epi32(x.val, 31);
+    return v_uint32x4(_mm_add_epi32(_mm_xor_si128(x.val, f), s));
+}
+inline v_float32x4 v_abs(const v_float32x4& x)
+{ return v_float32x4(_mm_and_ps(x.val, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))); }
+inline v_float64x2 v_abs(const v_float64x2& x)
+{
+    return v_float64x2(_mm_and_pd(x.val,
+        _mm_castsi128_pd(_mm_srli_epi64(_mm_set1_epi32(-1), 1))));
+}
+
+// TODO: exp, log, sin, cos
+
+#define OPENCV_HAL_IMPL_SSE_BIN_FUNC(_Tpvec, func, intrin) \
+inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(intrin(a.val, b.val)); \
+}
+
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_min, _mm_min_epu8)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_max, _mm_max_epu8)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_min, _mm_min_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_max, _mm_max_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float32x4, v_min, _mm_min_ps)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float32x4, v_max, _mm_max_ps)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float64x2, v_min, _mm_min_pd)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float64x2, v_max, _mm_max_pd)
+
+inline v_int8x16 v_min(const v_int8x16& a, const v_int8x16& b)
+{
+    __m128i delta = _mm_set1_epi8((char)-128);
+    return v_int8x16(_mm_xor_si128(delta, _mm_min_epu8(_mm_xor_si128(a.val, delta),
+                                                       _mm_xor_si128(b.val, delta))));
+}
+inline v_int8x16 v_max(const v_int8x16& a, const v_int8x16& b)
+{
+    __m128i delta = _mm_set1_epi8((char)-128);
+    return v_int8x16(_mm_xor_si128(delta, _mm_max_epu8(_mm_xor_si128(a.val, delta),
+                                                       _mm_xor_si128(b.val, delta))));
+}
+inline v_uint16x8 v_min(const v_uint16x8& a, const v_uint16x8& b)
+{
+    return v_uint16x8(_mm_subs_epu16(a.val, _mm_subs_epu16(a.val, b.val)));
+}
+inline v_uint16x8 v_max(const v_uint16x8& a, const v_uint16x8& b)
+{
+    return v_uint16x8(_mm_adds_epu16(_mm_subs_epu16(a.val, b.val), b.val));
+}
+inline v_uint32x4 v_min(const v_uint32x4& a, const v_uint32x4& b)
+{
+    __m128i delta = _mm_set1_epi32((int)0x80000000);
+    __m128i mask = _mm_cmpgt_epi32(_mm_xor_si128(a.val, delta), _mm_xor_si128(b.val, delta));
+    return v_uint32x4(v_select_si128(mask, b.val, a.val));
+}
+inline v_uint32x4 v_max(const v_uint32x4& a, const v_uint32x4& b)
+{
+    __m128i delta = _mm_set1_epi32((int)0x80000000);
+    __m128i mask = _mm_cmpgt_epi32(_mm_xor_si128(a.val, delta), _mm_xor_si128(b.val, delta));
+    return v_uint32x4(v_select_si128(mask, a.val, b.val));
+}
+inline v_int32x4 v_min(const v_int32x4& a, const v_int32x4& b)
+{
+    return v_int32x4(v_select_si128(_mm_cmpgt_epi32(a.val, b.val), b.val, a.val));
+}
+inline v_int32x4 v_max(const v_int32x4& a, const v_int32x4& b)
+{
+    return v_int32x4(v_select_si128(_mm_cmpgt_epi32(a.val, b.val), a.val, b.val));
+}
+
+#define OPENCV_HAL_IMPL_SSE_INT_CMP_OP(_Tpuvec, _Tpsvec, suffix, sbit) \
+inline _Tpuvec operator == (const _Tpuvec& a, const _Tpuvec& b) \
+{ return _Tpuvec(_mm_cmpeq_##suffix(a.val, b.val)); } \
+inline _Tpuvec operator != (const _Tpuvec& a, const _Tpuvec& b) \
+{ \
+    __m128i not_mask = _mm_set1_epi32(-1); \
+    return _Tpuvec(_mm_xor_si128(_mm_cmpeq_##suffix(a.val, b.val), not_mask)); \
+} \
+inline _Tpsvec operator == (const _Tpsvec& a, const _Tpsvec& b) \
+{ return _Tpsvec(_mm_cmpeq_##suffix(a.val, b.val)); } \
+inline _Tpsvec operator != (const _Tpsvec& a, const _Tpsvec& b) \
+{ \
+    __m128i not_mask = _mm_set1_epi32(-1); \
+    return _Tpsvec(_mm_xor_si128(_mm_cmpeq_##suffix(a.val, b.val), not_mask)); \
+} \
+inline _Tpuvec operator < (const _Tpuvec& a, const _Tpuvec& b) \
+{ \
+    __m128i smask = _mm_set1_##suffix(sbit); \
+    return _Tpuvec(_mm_cmpgt_##suffix(_mm_xor_si128(b.val, smask), _mm_xor_si128(a.val, smask))); \
+} \
+inline _Tpuvec operator > (const _Tpuvec& a, const _Tpuvec& b) \
+{ \
+    __m128i smask = _mm_set1_##suffix(sbit); \
+    return _Tpuvec(_mm_cmpgt_##suffix(_mm_xor_si128(a.val, smask), _mm_xor_si128(b.val, smask))); \
+} \
+inline _Tpuvec operator <= (const _Tpuvec& a, const _Tpuvec& b) \
+{ \
+    __m128i smask = _mm_set1_##suffix(sbit); \
+    __m128i not_mask = _mm_set1_epi32(-1); \
+    __m128i res = _mm_cmpgt_##suffix(_mm_xor_si128(a.val, smask), _mm_xor_si128(b.val, smask)); \
+    return _Tpuvec(_mm_xor_si128(res, not_mask)); \
+} \
+inline _Tpuvec operator >= (const _Tpuvec& a, const _Tpuvec& b) \
+{ \
+    __m128i smask = _mm_set1_##suffix(sbit); \
+    __m128i not_mask = _mm_set1_epi32(-1); \
+    __m128i res = _mm_cmpgt_##suffix(_mm_xor_si128(b.val, smask), _mm_xor_si128(a.val, smask)); \
+    return _Tpuvec(_mm_xor_si128(res, not_mask)); \
+} \
+inline _Tpsvec operator < (const _Tpsvec& a, const _Tpsvec& b) \
+{ \
+    return _Tpsvec(_mm_cmpgt_##suffix(b.val, a.val)); \
+} \
+inline _Tpsvec operator > (const _Tpsvec& a, const _Tpsvec& b) \
+{ \
+    return _Tpsvec(_mm_cmpgt_##suffix(a.val, b.val)); \
+} \
+inline _Tpsvec operator <= (const _Tpsvec& a, const _Tpsvec& b) \
+{ \
+    __m128i not_mask = _mm_set1_epi32(-1); \
+    return _Tpsvec(_mm_xor_si128(_mm_cmpgt_##suffix(a.val, b.val), not_mask)); \
+} \
+inline _Tpsvec operator >= (const _Tpsvec& a, const _Tpsvec& b) \
+{ \
+    __m128i not_mask = _mm_set1_epi32(-1); \
+    return _Tpsvec(_mm_xor_si128(_mm_cmpgt_##suffix(b.val, a.val), not_mask)); \
+}
+
+OPENCV_HAL_IMPL_SSE_INT_CMP_OP(v_uint8x16, v_int8x16, epi8, (char)-128)
+OPENCV_HAL_IMPL_SSE_INT_CMP_OP(v_uint16x8, v_int16x8, epi16, (short)-32768)
+OPENCV_HAL_IMPL_SSE_INT_CMP_OP(v_uint32x4, v_int32x4, epi32, (int)0x80000000)
+
+#define OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(_Tpvec, suffix) \
+inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(_mm_cmpeq_##suffix(a.val, b.val)); } \
+inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(_mm_cmpneq_##suffix(a.val, b.val)); } \
+inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(_mm_cmplt_##suffix(a.val, b.val)); } \
+inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(_mm_cmpgt_##suffix(a.val, b.val)); } \
+inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(_mm_cmple_##suffix(a.val, b.val)); } \
+inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(_mm_cmpge_##suffix(a.val, b.val)); }
+
+OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps)
+OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd)
+
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_add_wrap, _mm_add_epi8)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_add_wrap, _mm_add_epi8)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_add_wrap, _mm_add_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_add_wrap, _mm_add_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_sub_wrap, _mm_sub_epi8)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_sub_wrap, _mm_sub_epi8)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_sub_wrap, _mm_sub_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_sub_wrap, _mm_sub_epi16)
+
+#define OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(_Tpuvec, _Tpsvec, bits, smask32) \
+inline _Tpuvec v_absdiff(const _Tpuvec& a, const _Tpuvec& b) \
+{ \
+    return _Tpuvec(_mm_add_epi##bits(_mm_subs_epu##bits(a.val, b.val), _mm_subs_epu##bits(b.val, a.val))); \
+} \
+inline _Tpuvec v_absdiff(const _Tpsvec& a, const _Tpsvec& b) \
+{ \
+    __m128i smask = _mm_set1_epi32(smask32); \
+    __m128i a1 = _mm_xor_si128(a.val, smask); \
+    __m128i b1 = _mm_xor_si128(b.val, smask); \
+    return _Tpuvec(_mm_add_epi##bits(_mm_subs_epu##bits(a1, b1), _mm_subs_epu##bits(b1, a1))); \
+}
+
+OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(v_uint8x16, v_int8x16, 8, (int)0x80808080)
+OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(v_uint16x8, v_int16x8, 16, (int)0x80008000)
+
+inline v_uint32x4 v_absdiff(const v_uint32x4& a, const v_uint32x4& b)
+{
+    return v_max(a, b) - v_min(a, b);
+}
+
+inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b)
+{
+    __m128i d = _mm_sub_epi32(a.val, b.val);
+    __m128i m = _mm_cmpgt_epi32(b.val, a.val);
+    return v_uint32x4(_mm_sub_epi32(_mm_xor_si128(d, m), m));
+}
+
+#define OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(_Tpvec, _Tp, _Tpreg, suffix, absmask_vec) \
+inline _Tpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    _Tpreg absmask = _mm_castsi128_##suffix(absmask_vec); \
+    return _Tpvec(_mm_and_##suffix(_mm_sub_##suffix(a.val, b.val), absmask)); \
+} \
+inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    _Tpreg res = _mm_add_##suffix(_mm_mul_##suffix(a.val, a.val), _mm_mul_##suffix(b.val, b.val)); \
+    return _Tpvec(_mm_sqrt_##suffix(res)); \
+} \
+inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    _Tpreg res = _mm_add_##suffix(_mm_mul_##suffix(a.val, a.val), _mm_mul_##suffix(b.val, b.val)); \
+    return _Tpvec(res); \
+} \
+inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
+{ \
+    return _Tpvec(_mm_add_##suffix(_mm_mul_##suffix(a.val, b.val), c.val)); \
+}
+
+OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(v_float32x4, float, __m128, ps, _mm_set1_epi32((int)0x7fffffff))
+OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(v_float64x2, double, __m128d, pd, _mm_srli_epi64(_mm_set1_epi32(-1), 1))
+
+#define OPENCV_HAL_IMPL_SSE_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, srai) \
+inline _Tpuvec operator << (const _Tpuvec& a, int imm) \
+{ \
+    return _Tpuvec(_mm_slli_##suffix(a.val, imm)); \
+} \
+inline _Tpsvec operator << (const _Tpsvec& a, int imm) \
+{ \
+    return _Tpsvec(_mm_slli_##suffix(a.val, imm)); \
+} \
+inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \
+{ \
+    return _Tpuvec(_mm_srli_##suffix(a.val, imm)); \
+} \
+inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \
+{ \
+    return _Tpsvec(srai(a.val, imm)); \
+} \
+template<int imm> \
+inline _Tpuvec v_shl(const _Tpuvec& a) \
+{ \
+    return _Tpuvec(_mm_slli_##suffix(a.val, imm)); \
+} \
+template<int imm> \
+inline _Tpsvec v_shl(const _Tpsvec& a) \
+{ \
+    return _Tpsvec(_mm_slli_##suffix(a.val, imm)); \
+} \
+template<int imm> \
+inline _Tpuvec v_shr(const _Tpuvec& a) \
+{ \
+    return _Tpuvec(_mm_srli_##suffix(a.val, imm)); \
+} \
+template<int imm> \
+inline _Tpsvec v_shr(const _Tpsvec& a) \
+{ \
+    return _Tpsvec(srai(a.val, imm)); \
+}
+
+OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint16x8, v_int16x8, epi16, _mm_srai_epi16)
+OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint32x4, v_int32x4, epi32, _mm_srai_epi32)
+OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint64x2, v_int64x2, epi64, v_srai_epi64)
+
+#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(_Tpvec, _Tp) \
+inline _Tpvec v_load(const _Tp* ptr) \
+{ return _Tpvec(_mm_loadu_si128((const __m128i*)ptr)); } \
+inline _Tpvec v_load_aligned(const _Tp* ptr) \
+{ return _Tpvec(_mm_load_si128((const __m128i*)ptr)); } \
+inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
+{ \
+    return _Tpvec(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i*)ptr0), \
+                                     _mm_loadl_epi64((const __m128i*)ptr1))); \
+} \
+inline void v_store(_Tp* ptr, const _Tpvec& a) \
+{ _mm_storeu_si128((__m128i*)ptr, a.val); } \
+inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
+{ _mm_store_si128((__m128i*)ptr, a.val); } \
+inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
+{ _mm_storel_epi64((__m128i*)ptr, a.val); } \
+inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
+{ _mm_storel_epi64((__m128i*)ptr, _mm_unpackhi_epi64(a.val, a.val)); }
+
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint8x16, uchar)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int8x16, schar)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint16x8, ushort)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int16x8, short)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint32x4, unsigned)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int32x4, int)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint64x2, uint64)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int64x2, int64)
+
+#define OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(_Tpvec, _Tp, suffix) \
+inline _Tpvec v_load(const _Tp* ptr) \
+{ return _Tpvec(_mm_loadu_##suffix(ptr)); } \
+inline _Tpvec v_load_aligned(const _Tp* ptr) \
+{ return _Tpvec(_mm_load_##suffix(ptr)); } \
+inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
+{ \
+    return _Tpvec(_mm_castsi128_##suffix( \
+        _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i*)ptr0), \
+                           _mm_loadl_epi64((const __m128i*)ptr1)))); \
+} \
+inline void v_store(_Tp* ptr, const _Tpvec& a) \
+{ _mm_storeu_##suffix(ptr, a.val); } \
+inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
+{ _mm_store_##suffix(ptr, a.val); } \
+inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
+{ _mm_storel_epi64((__m128i*)ptr, _mm_cast##suffix##_si128(a.val)); } \
+inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
+{ \
+    __m128i a1 = _mm_cast##suffix##_si128(a.val); \
+    _mm_storel_epi64((__m128i*)ptr, _mm_unpackhi_epi64(a1, a1)); \
+}
+
+OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float32x4, float, ps)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float64x2, double, pd)
+
+#if defined(HAVE_FP16)
+inline v_float16x4 v_load_f16(const short* ptr)
+{ return v_float16x4(_mm_loadl_epi64((const __m128i*)ptr)); }
+inline void v_store_f16(short* ptr, v_float16x4& a)
+{ _mm_storel_epi64((__m128i*)ptr, a.val); }
+#endif
+
+#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(_Tpvec, scalartype, func, suffix, sbit) \
+inline scalartype v_reduce_##func(const v_##_Tpvec& a) \
+{ \
+    __m128i val = a.val; \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,8)); \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,4)); \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \
+    return (scalartype)_mm_cvtsi128_si32(val); \
+} \
+inline unsigned scalartype v_reduce_##func(const v_u##_Tpvec& a) \
+{ \
+    __m128i val = a.val; \
+    __m128i smask = _mm_set1_epi16(sbit); \
+    val = _mm_xor_si128(val, smask); \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,8)); \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,4)); \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \
+    return (unsigned scalartype)(_mm_cvtsi128_si32(val) ^  sbit); \
+}
+#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8_SUM(_Tpvec, scalartype, suffix) \
+inline scalartype v_reduce_sum(const v_##_Tpvec& a) \
+{ \
+    __m128i val = a.val; \
+    val = _mm_adds_epi##suffix(val, _mm_srli_si128(val, 8)); \
+    val = _mm_adds_epi##suffix(val, _mm_srli_si128(val, 4)); \
+    val = _mm_adds_epi##suffix(val, _mm_srli_si128(val, 2)); \
+    return (scalartype)_mm_cvtsi128_si32(val); \
+} \
+inline unsigned scalartype v_reduce_sum(const v_u##_Tpvec& a) \
+{ \
+    __m128i val = a.val; \
+    val = _mm_adds_epu##suffix(val, _mm_srli_si128(val, 8)); \
+    val = _mm_adds_epu##suffix(val, _mm_srli_si128(val, 4)); \
+    val = _mm_adds_epu##suffix(val, _mm_srli_si128(val, 2)); \
+    return (unsigned scalartype)_mm_cvtsi128_si32(val); \
+}
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, max, epi16, (short)-32768)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, min, epi16, (short)-32768)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_8_SUM(int16x8, short, 16)
+
+#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(_Tpvec, scalartype, func, scalar_func) \
+inline scalartype v_reduce_##func(const _Tpvec& a) \
+{ \
+    scalartype CV_DECL_ALIGNED(16) buf[4]; \
+    v_store_aligned(buf, a); \
+    scalartype s0 = scalar_func(buf[0], buf[1]); \
+    scalartype s1 = scalar_func(buf[2], buf[3]); \
+    return scalar_func(s0, s1); \
+}
+
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, sum, OPENCV_HAL_ADD)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, min, std::min)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_int32x4, int, sum, OPENCV_HAL_ADD)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_int32x4, int, max, std::max)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_int32x4, int, min, std::min)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, sum, OPENCV_HAL_ADD)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, max, std::max)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, min, std::min)
+
+#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, pack_op, and_op, signmask, allmask) \
+inline int v_signmask(const _Tpvec& a) \
+{ \
+    return and_op(_mm_movemask_##suffix(pack_op(a.val)), signmask); \
+} \
+inline bool v_check_all(const _Tpvec& a) \
+{ return and_op(_mm_movemask_##suffix(a.val), allmask) == allmask; } \
+inline bool v_check_any(const _Tpvec& a) \
+{ return and_op(_mm_movemask_##suffix(a.val), allmask) != 0; }
+
+#define OPENCV_HAL_PACKS(a) _mm_packs_epi16(a, a)
+inline __m128i v_packq_epi32(__m128i a)
+{
+    __m128i b = _mm_packs_epi32(a, a);
+    return _mm_packs_epi16(b, b);
+}
+
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 15, 15)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 3, 3)
+
+#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, suffix) \
+inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(_mm_xor_##suffix(b.val, _mm_and_##suffix(_mm_xor_##suffix(b.val, a.val), mask.val))); \
+}
+
+OPENCV_HAL_IMPL_SSE_SELECT(v_uint8x16, si128)
+OPENCV_HAL_IMPL_SSE_SELECT(v_int8x16, si128)
+OPENCV_HAL_IMPL_SSE_SELECT(v_uint16x8, si128)
+OPENCV_HAL_IMPL_SSE_SELECT(v_int16x8, si128)
+OPENCV_HAL_IMPL_SSE_SELECT(v_uint32x4, si128)
+OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, si128)
+// OPENCV_HAL_IMPL_SSE_SELECT(v_uint64x2, si128)
+// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, si128)
+OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, ps)
+OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, pd)
+
+#define OPENCV_HAL_IMPL_SSE_EXPAND(_Tpuvec, _Tpwuvec, _Tpu, _Tpsvec, _Tpwsvec, _Tps, suffix, wsuffix, shift) \
+inline void v_expand(const _Tpuvec& a, _Tpwuvec& b0, _Tpwuvec& b1) \
+{ \
+    __m128i z = _mm_setzero_si128(); \
+    b0.val = _mm_unpacklo_##suffix(a.val, z); \
+    b1.val = _mm_unpackhi_##suffix(a.val, z); \
+} \
+inline _Tpwuvec v_load_expand(const _Tpu* ptr) \
+{ \
+    __m128i z = _mm_setzero_si128(); \
+    return _Tpwuvec(_mm_unpacklo_##suffix(_mm_loadl_epi64((const __m128i*)ptr), z)); \
+} \
+inline void v_expand(const _Tpsvec& a, _Tpwsvec& b0, _Tpwsvec& b1) \
+{ \
+    b0.val = _mm_srai_##wsuffix(_mm_unpacklo_##suffix(a.val, a.val), shift); \
+    b1.val = _mm_srai_##wsuffix(_mm_unpackhi_##suffix(a.val, a.val), shift); \
+} \
+inline _Tpwsvec v_load_expand(const _Tps* ptr) \
+{ \
+    __m128i a = _mm_loadl_epi64((const __m128i*)ptr); \
+    return _Tpwsvec(_mm_srai_##wsuffix(_mm_unpacklo_##suffix(a, a), shift)); \
+}
+
+OPENCV_HAL_IMPL_SSE_EXPAND(v_uint8x16, v_uint16x8, uchar, v_int8x16, v_int16x8, schar, epi8, epi16, 8)
+OPENCV_HAL_IMPL_SSE_EXPAND(v_uint16x8, v_uint32x4, ushort, v_int16x8, v_int32x4, short, epi16, epi32, 16)
+
+inline void v_expand(const v_uint32x4& a, v_uint64x2& b0, v_uint64x2& b1)
+{
+    __m128i z = _mm_setzero_si128();
+    b0.val = _mm_unpacklo_epi32(a.val, z);
+    b1.val = _mm_unpackhi_epi32(a.val, z);
+}
+inline v_uint64x2 v_load_expand(const unsigned* ptr)
+{
+    __m128i z = _mm_setzero_si128();
+    return v_uint64x2(_mm_unpacklo_epi32(_mm_loadl_epi64((const __m128i*)ptr), z));
+}
+inline void v_expand(const v_int32x4& a, v_int64x2& b0, v_int64x2& b1)
+{
+    __m128i s = _mm_srai_epi32(a.val, 31);
+    b0.val = _mm_unpacklo_epi32(a.val, s);
+    b1.val = _mm_unpackhi_epi32(a.val, s);
+}
+inline v_int64x2 v_load_expand(const int* ptr)
+{
+    __m128i a = _mm_loadl_epi64((const __m128i*)ptr);
+    __m128i s = _mm_srai_epi32(a, 31);
+    return v_int64x2(_mm_unpacklo_epi32(a, s));
+}
+
+inline v_uint32x4 v_load_expand_q(const uchar* ptr)
+{
+    __m128i z = _mm_setzero_si128();
+    __m128i a = _mm_cvtsi32_si128(*(const int*)ptr);
+    return v_uint32x4(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z));
+}
+
+inline v_int32x4 v_load_expand_q(const schar* ptr)
+{
+    __m128i a = _mm_cvtsi32_si128(*(const int*)ptr);
+    a = _mm_unpacklo_epi8(a, a);
+    a = _mm_unpacklo_epi8(a, a);
+    return v_int32x4(_mm_srai_epi32(a, 24));
+}
+
+#define OPENCV_HAL_IMPL_SSE_UNPACKS(_Tpvec, suffix, cast_from, cast_to) \
+inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \
+{ \
+    b0.val = _mm_unpacklo_##suffix(a0.val, a1.val); \
+    b1.val = _mm_unpackhi_##suffix(a0.val, a1.val); \
+} \
+inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    __m128i a1 = cast_from(a.val), b1 = cast_from(b.val); \
+    return _Tpvec(cast_to(_mm_unpacklo_epi64(a1, b1))); \
+} \
+inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    __m128i a1 = cast_from(a.val), b1 = cast_from(b.val); \
+    return _Tpvec(cast_to(_mm_unpackhi_epi64(a1, b1))); \
+} \
+inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \
+{ \
+    __m128i a1 = cast_from(a.val), b1 = cast_from(b.val); \
+    c.val = cast_to(_mm_unpacklo_epi64(a1, b1)); \
+    d.val = cast_to(_mm_unpackhi_epi64(a1, b1)); \
+}
+
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_uint8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_int8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_uint16x8, epi16, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_int16x8, epi16, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_uint32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps)
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_float64x2, pd, _mm_castpd_si128, _mm_castsi128_pd)
+
+template<int s, typename _Tpvec>
+inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
+{
+    const int w = sizeof(typename _Tpvec::lane_type);
+    const int n = _Tpvec::nlanes;
+    __m128i ra, rb;
+    ra = _mm_srli_si128(a.val, s*w);
+    rb = _mm_slli_si128(b.val, (n-s)*w);
+    return _Tpvec(_mm_or_si128(ra, rb));
+}
+
+inline v_int32x4 v_round(const v_float32x4& a)
+{ return v_int32x4(_mm_cvtps_epi32(a.val)); }
+
+inline v_int32x4 v_floor(const v_float32x4& a)
+{
+    __m128i a1 = _mm_cvtps_epi32(a.val);
+    __m128i mask = _mm_castps_si128(_mm_cmpgt_ps(_mm_cvtepi32_ps(a1), a.val));
+    return v_int32x4(_mm_add_epi32(a1, mask));
+}
+
+inline v_int32x4 v_ceil(const v_float32x4& a)
+{
+    __m128i a1 = _mm_cvtps_epi32(a.val);
+    __m128i mask = _mm_castps_si128(_mm_cmpgt_ps(a.val, _mm_cvtepi32_ps(a1)));
+    return v_int32x4(_mm_sub_epi32(a1, mask));
+}
+
+inline v_int32x4 v_trunc(const v_float32x4& a)
+{ return v_int32x4(_mm_cvttps_epi32(a.val)); }
+
+inline v_int32x4 v_round(const v_float64x2& a)
+{ return v_int32x4(_mm_cvtpd_epi32(a.val)); }
+
+inline v_int32x4 v_floor(const v_float64x2& a)
+{
+    __m128i a1 = _mm_cvtpd_epi32(a.val);
+    __m128i mask = _mm_castpd_si128(_mm_cmpgt_pd(_mm_cvtepi32_pd(a1), a.val));
+    mask = _mm_srli_si128(_mm_slli_si128(mask, 4), 8); // m0 m0 m1 m1 => m0 m1 0 0
+    return v_int32x4(_mm_add_epi32(a1, mask));
+}
+
+inline v_int32x4 v_ceil(const v_float64x2& a)
+{
+    __m128i a1 = _mm_cvtpd_epi32(a.val);
+    __m128i mask = _mm_castpd_si128(_mm_cmpgt_pd(a.val, _mm_cvtepi32_pd(a1)));
+    mask = _mm_srli_si128(_mm_slli_si128(mask, 4), 8); // m0 m0 m1 m1 => m0 m1 0 0
+    return v_int32x4(_mm_sub_epi32(a1, mask));
+}
+
+inline v_int32x4 v_trunc(const v_float64x2& a)
+{ return v_int32x4(_mm_cvttpd_epi32(a.val)); }
+
+#define OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(_Tpvec, suffix, cast_from, cast_to) \
+inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \
+                           const _Tpvec& a2, const _Tpvec& a3, \
+                           _Tpvec& b0, _Tpvec& b1, \
+                           _Tpvec& b2, _Tpvec& b3) \
+{ \
+    __m128i t0 = cast_from(_mm_unpacklo_##suffix(a0.val, a1.val)); \
+    __m128i t1 = cast_from(_mm_unpacklo_##suffix(a2.val, a3.val)); \
+    __m128i t2 = cast_from(_mm_unpackhi_##suffix(a0.val, a1.val)); \
+    __m128i t3 = cast_from(_mm_unpackhi_##suffix(a2.val, a3.val)); \
+\
+    b0.val = cast_to(_mm_unpacklo_epi64(t0, t1)); \
+    b1.val = cast_to(_mm_unpackhi_epi64(t0, t1)); \
+    b2.val = cast_to(_mm_unpacklo_epi64(t2, t3)); \
+    b3.val = cast_to(_mm_unpackhi_epi64(t2, t3)); \
+}
+
+OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_uint32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps)
+
+// adopted from sse_utils.hpp
+inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c)
+{
+    __m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
+    __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 16));
+    __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 32));
+
+    __m128i t10 = _mm_unpacklo_epi8(t00, _mm_unpackhi_epi64(t01, t01));
+    __m128i t11 = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t00, t00), t02);
+    __m128i t12 = _mm_unpacklo_epi8(t01, _mm_unpackhi_epi64(t02, t02));
+
+    __m128i t20 = _mm_unpacklo_epi8(t10, _mm_unpackhi_epi64(t11, t11));
+    __m128i t21 = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t10, t10), t12);
+    __m128i t22 = _mm_unpacklo_epi8(t11, _mm_unpackhi_epi64(t12, t12));
+
+    __m128i t30 = _mm_unpacklo_epi8(t20, _mm_unpackhi_epi64(t21, t21));
+    __m128i t31 = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t20, t20), t22);
+    __m128i t32 = _mm_unpacklo_epi8(t21, _mm_unpackhi_epi64(t22, t22));
+
+    a.val = _mm_unpacklo_epi8(t30, _mm_unpackhi_epi64(t31, t31));
+    b.val = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t30, t30), t32);
+    c.val = _mm_unpacklo_epi8(t31, _mm_unpackhi_epi64(t32, t32));
+}
+
+inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c, v_uint8x16& d)
+{
+    __m128i u0 = _mm_loadu_si128((const __m128i*)ptr); // a0 b0 c0 d0 a1 b1 c1 d1 ...
+    __m128i u1 = _mm_loadu_si128((const __m128i*)(ptr + 16)); // a4 b4 c4 d4 ...
+    __m128i u2 = _mm_loadu_si128((const __m128i*)(ptr + 32)); // a8 b8 c8 d8 ...
+    __m128i u3 = _mm_loadu_si128((const __m128i*)(ptr + 48)); // a12 b12 c12 d12 ...
+
+    __m128i v0 = _mm_unpacklo_epi8(u0, u2); // a0 a8 b0 b8 ...
+    __m128i v1 = _mm_unpackhi_epi8(u0, u2); // a2 a10 b2 b10 ...
+    __m128i v2 = _mm_unpacklo_epi8(u1, u3); // a4 a12 b4 b12 ...
+    __m128i v3 = _mm_unpackhi_epi8(u1, u3); // a6 a14 b6 b14 ...
+
+    u0 = _mm_unpacklo_epi8(v0, v2); // a0 a4 a8 a12 ...
+    u1 = _mm_unpacklo_epi8(v1, v3); // a2 a6 a10 a14 ...
+    u2 = _mm_unpackhi_epi8(v0, v2); // a1 a5 a9 a13 ...
+    u3 = _mm_unpackhi_epi8(v1, v3); // a3 a7 a11 a15 ...
+
+    v0 = _mm_unpacklo_epi8(u0, u1); // a0 a2 a4 a6 ...
+    v1 = _mm_unpacklo_epi8(u2, u3); // a1 a3 a5 a7 ...
+    v2 = _mm_unpackhi_epi8(u0, u1); // c0 c2 c4 c6 ...
+    v3 = _mm_unpackhi_epi8(u2, u3); // c1 c3 c5 c7 ...
+
+    a.val = _mm_unpacklo_epi8(v0, v1);
+    b.val = _mm_unpackhi_epi8(v0, v1);
+    c.val = _mm_unpacklo_epi8(v2, v3);
+    d.val = _mm_unpackhi_epi8(v2, v3);
+}
+
+inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c)
+{
+    __m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
+    __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 8));
+    __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 16));
+
+    __m128i t10 = _mm_unpacklo_epi16(t00, _mm_unpackhi_epi64(t01, t01));
+    __m128i t11 = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t00, t00), t02);
+    __m128i t12 = _mm_unpacklo_epi16(t01, _mm_unpackhi_epi64(t02, t02));
+
+    __m128i t20 = _mm_unpacklo_epi16(t10, _mm_unpackhi_epi64(t11, t11));
+    __m128i t21 = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t10, t10), t12);
+    __m128i t22 = _mm_unpacklo_epi16(t11, _mm_unpackhi_epi64(t12, t12));
+
+    a.val = _mm_unpacklo_epi16(t20, _mm_unpackhi_epi64(t21, t21));
+    b.val = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t20, t20), t22);
+    c.val = _mm_unpacklo_epi16(t21, _mm_unpackhi_epi64(t22, t22));
+}
+
+inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d)
+{
+    __m128i u0 = _mm_loadu_si128((const __m128i*)ptr); // a0 b0 c0 d0 a1 b1 c1 d1
+    __m128i u1 = _mm_loadu_si128((const __m128i*)(ptr + 8)); // a2 b2 c2 d2 ...
+    __m128i u2 = _mm_loadu_si128((const __m128i*)(ptr + 16)); // a4 b4 c4 d4 ...
+    __m128i u3 = _mm_loadu_si128((const __m128i*)(ptr + 24)); // a6 b6 c6 d6 ...
+
+    __m128i v0 = _mm_unpacklo_epi16(u0, u2); // a0 a4 b0 b4 ...
+    __m128i v1 = _mm_unpackhi_epi16(u0, u2); // a1 a5 b1 b5 ...
+    __m128i v2 = _mm_unpacklo_epi16(u1, u3); // a2 a6 b2 b6 ...
+    __m128i v3 = _mm_unpackhi_epi16(u1, u3); // a3 a7 b3 b7 ...
+
+    u0 = _mm_unpacklo_epi16(v0, v2); // a0 a2 a4 a6 ...
+    u1 = _mm_unpacklo_epi16(v1, v3); // a1 a3 a5 a7 ...
+    u2 = _mm_unpackhi_epi16(v0, v2); // c0 c2 c4 c6 ...
+    u3 = _mm_unpackhi_epi16(v1, v3); // c1 c3 c5 c7 ...
+
+    a.val = _mm_unpacklo_epi16(u0, u1);
+    b.val = _mm_unpackhi_epi16(u0, u1);
+    c.val = _mm_unpacklo_epi16(u2, u3);
+    d.val = _mm_unpackhi_epi16(u2, u3);
+}
+
+inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c)
+{
+    __m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
+    __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 4));
+    __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 8));
+
+    __m128i t10 = _mm_unpacklo_epi32(t00, _mm_unpackhi_epi64(t01, t01));
+    __m128i t11 = _mm_unpacklo_epi32(_mm_unpackhi_epi64(t00, t00), t02);
+    __m128i t12 = _mm_unpacklo_epi32(t01, _mm_unpackhi_epi64(t02, t02));
+
+    a.val = _mm_unpacklo_epi32(t10, _mm_unpackhi_epi64(t11, t11));
+    b.val = _mm_unpacklo_epi32(_mm_unpackhi_epi64(t10, t10), t12);
+    c.val = _mm_unpacklo_epi32(t11, _mm_unpackhi_epi64(t12, t12));
+}
+
+inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d)
+{
+    v_uint32x4 u0(_mm_loadu_si128((const __m128i*)ptr));        // a0 b0 c0 d0
+    v_uint32x4 u1(_mm_loadu_si128((const __m128i*)(ptr + 4))); // a1 b1 c1 d1
+    v_uint32x4 u2(_mm_loadu_si128((const __m128i*)(ptr + 8))); // a2 b2 c2 d2
+    v_uint32x4 u3(_mm_loadu_si128((const __m128i*)(ptr + 12))); // a3 b3 c3 d3
+
+    v_transpose4x4(u0, u1, u2, u3, a, b, c, d);
+}
+
+// 2-channel, float only
+inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
+{
+    const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
+
+    __m128 u0 = _mm_loadu_ps(ptr);       // a0 b0 a1 b1
+    __m128 u1 = _mm_loadu_ps((ptr + 4)); // a2 b2 a3 b3
+
+    a.val = _mm_shuffle_ps(u0, u1, mask_lo); // a0 a1 a2 a3
+    b.val = _mm_shuffle_ps(u0, u1, mask_hi); // b0 b1 ab b3
+}
+
+inline void v_store_interleave( short* ptr, const v_int16x8& a, const v_int16x8& b )
+{
+    __m128i t0, t1;
+    t0 = _mm_unpacklo_epi16(a.val, b.val);
+    t1 = _mm_unpackhi_epi16(a.val, b.val);
+    _mm_storeu_si128((__m128i*)(ptr), t0);
+    _mm_storeu_si128((__m128i*)(ptr + 8), t1);
+}
+
+inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
+                                const v_uint8x16& c )
+{
+    __m128i z = _mm_setzero_si128();
+    __m128i ab0 = _mm_unpacklo_epi8(a.val, b.val);
+    __m128i ab1 = _mm_unpackhi_epi8(a.val, b.val);
+    __m128i c0 = _mm_unpacklo_epi8(c.val, z);
+    __m128i c1 = _mm_unpackhi_epi8(c.val, z);
+
+    __m128i p00 = _mm_unpacklo_epi16(ab0, c0);
+    __m128i p01 = _mm_unpackhi_epi16(ab0, c0);
+    __m128i p02 = _mm_unpacklo_epi16(ab1, c1);
+    __m128i p03 = _mm_unpackhi_epi16(ab1, c1);
+
+    __m128i p10 = _mm_unpacklo_epi32(p00, p01);
+    __m128i p11 = _mm_unpackhi_epi32(p00, p01);
+    __m128i p12 = _mm_unpacklo_epi32(p02, p03);
+    __m128i p13 = _mm_unpackhi_epi32(p02, p03);
+
+    __m128i p20 = _mm_unpacklo_epi64(p10, p11);
+    __m128i p21 = _mm_unpackhi_epi64(p10, p11);
+    __m128i p22 = _mm_unpacklo_epi64(p12, p13);
+    __m128i p23 = _mm_unpackhi_epi64(p12, p13);
+
+    p20 = _mm_slli_si128(p20, 1);
+    p22 = _mm_slli_si128(p22, 1);
+
+    __m128i p30 = _mm_slli_epi64(_mm_unpacklo_epi32(p20, p21), 8);
+    __m128i p31 = _mm_srli_epi64(_mm_unpackhi_epi32(p20, p21), 8);
+    __m128i p32 = _mm_slli_epi64(_mm_unpacklo_epi32(p22, p23), 8);
+    __m128i p33 = _mm_srli_epi64(_mm_unpackhi_epi32(p22, p23), 8);
+
+    __m128i p40 = _mm_unpacklo_epi64(p30, p31);
+    __m128i p41 = _mm_unpackhi_epi64(p30, p31);
+    __m128i p42 = _mm_unpacklo_epi64(p32, p33);
+    __m128i p43 = _mm_unpackhi_epi64(p32, p33);
+
+    __m128i v0 = _mm_or_si128(_mm_srli_si128(p40, 2), _mm_slli_si128(p41, 10));
+    __m128i v1 = _mm_or_si128(_mm_srli_si128(p41, 6), _mm_slli_si128(p42, 6));
+    __m128i v2 = _mm_or_si128(_mm_srli_si128(p42, 10), _mm_slli_si128(p43, 2));
+
+    _mm_storeu_si128((__m128i*)(ptr), v0);
+    _mm_storeu_si128((__m128i*)(ptr + 16), v1);
+    _mm_storeu_si128((__m128i*)(ptr + 32), v2);
+}
+
+inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
+                                const v_uint8x16& c, const v_uint8x16& d)
+{
+    // a0 a1 a2 a3 ....
+    // b0 b1 b2 b3 ....
+    // c0 c1 c2 c3 ....
+    // d0 d1 d2 d3 ....
+    __m128i u0 = _mm_unpacklo_epi8(a.val, c.val); // a0 c0 a1 c1 ...
+    __m128i u1 = _mm_unpackhi_epi8(a.val, c.val); // a8 c8 a9 c9 ...
+    __m128i u2 = _mm_unpacklo_epi8(b.val, d.val); // b0 d0 b1 d1 ...
+    __m128i u3 = _mm_unpackhi_epi8(b.val, d.val); // b8 d8 b9 d9 ...
+
+    __m128i v0 = _mm_unpacklo_epi8(u0, u2); // a0 b0 c0 d0 ...
+    __m128i v1 = _mm_unpacklo_epi8(u1, u3); // a8 b8 c8 d8 ...
+    __m128i v2 = _mm_unpackhi_epi8(u0, u2); // a4 b4 c4 d4 ...
+    __m128i v3 = _mm_unpackhi_epi8(u1, u3); // a12 b12 c12 d12 ...
+
+    _mm_storeu_si128((__m128i*)ptr, v0);
+    _mm_storeu_si128((__m128i*)(ptr + 16), v2);
+    _mm_storeu_si128((__m128i*)(ptr + 32), v1);
+    _mm_storeu_si128((__m128i*)(ptr + 48), v3);
+}
+
+inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
+                                const v_uint16x8& b,
+                                const v_uint16x8& c )
+{
+    __m128i z = _mm_setzero_si128();
+    __m128i ab0 = _mm_unpacklo_epi16(a.val, b.val);
+    __m128i ab1 = _mm_unpackhi_epi16(a.val, b.val);
+    __m128i c0 = _mm_unpacklo_epi16(c.val, z);
+    __m128i c1 = _mm_unpackhi_epi16(c.val, z);
+
+    __m128i p10 = _mm_unpacklo_epi32(ab0, c0);
+    __m128i p11 = _mm_unpackhi_epi32(ab0, c0);
+    __m128i p12 = _mm_unpacklo_epi32(ab1, c1);
+    __m128i p13 = _mm_unpackhi_epi32(ab1, c1);
+
+    __m128i p20 = _mm_unpacklo_epi64(p10, p11);
+    __m128i p21 = _mm_unpackhi_epi64(p10, p11);
+    __m128i p22 = _mm_unpacklo_epi64(p12, p13);
+    __m128i p23 = _mm_unpackhi_epi64(p12, p13);
+
+    p20 = _mm_slli_si128(p20, 2);
+    p22 = _mm_slli_si128(p22, 2);
+
+    __m128i p30 = _mm_unpacklo_epi64(p20, p21);
+    __m128i p31 = _mm_unpackhi_epi64(p20, p21);
+    __m128i p32 = _mm_unpacklo_epi64(p22, p23);
+    __m128i p33 = _mm_unpackhi_epi64(p22, p23);
+
+    __m128i v0 = _mm_or_si128(_mm_srli_si128(p30, 2), _mm_slli_si128(p31, 10));
+    __m128i v1 = _mm_or_si128(_mm_srli_si128(p31, 6), _mm_slli_si128(p32, 6));
+    __m128i v2 = _mm_or_si128(_mm_srli_si128(p32, 10), _mm_slli_si128(p33, 2));
+
+    _mm_storeu_si128((__m128i*)(ptr), v0);
+    _mm_storeu_si128((__m128i*)(ptr + 8), v1);
+    _mm_storeu_si128((__m128i*)(ptr + 16), v2);
+}
+
+inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b,
+                                const v_uint16x8& c, const v_uint16x8& d)
+{
+    // a0 a1 a2 a3 ....
+    // b0 b1 b2 b3 ....
+    // c0 c1 c2 c3 ....
+    // d0 d1 d2 d3 ....
+    __m128i u0 = _mm_unpacklo_epi16(a.val, c.val); // a0 c0 a1 c1 ...
+    __m128i u1 = _mm_unpackhi_epi16(a.val, c.val); // a4 c4 a5 c5 ...
+    __m128i u2 = _mm_unpacklo_epi16(b.val, d.val); // b0 d0 b1 d1 ...
+    __m128i u3 = _mm_unpackhi_epi16(b.val, d.val); // b4 d4 b5 d5 ...
+
+    __m128i v0 = _mm_unpacklo_epi16(u0, u2); // a0 b0 c0 d0 ...
+    __m128i v1 = _mm_unpacklo_epi16(u1, u3); // a4 b4 c4 d4 ...
+    __m128i v2 = _mm_unpackhi_epi16(u0, u2); // a2 b2 c2 d2 ...
+    __m128i v3 = _mm_unpackhi_epi16(u1, u3); // a6 b6 c6 d6 ...
+
+    _mm_storeu_si128((__m128i*)ptr, v0);
+    _mm_storeu_si128((__m128i*)(ptr + 8), v2);
+    _mm_storeu_si128((__m128i*)(ptr + 16), v1);
+    _mm_storeu_si128((__m128i*)(ptr + 24), v3);
+}
+
+inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
+                                const v_uint32x4& c )
+{
+    v_uint32x4 z = v_setzero_u32(), u0, u1, u2, u3;
+    v_transpose4x4(a, b, c, z, u0, u1, u2, u3);
+
+    __m128i v0 = _mm_or_si128(u0.val, _mm_slli_si128(u1.val, 12));
+    __m128i v1 = _mm_or_si128(_mm_srli_si128(u1.val, 4), _mm_slli_si128(u2.val, 8));
+    __m128i v2 = _mm_or_si128(_mm_srli_si128(u2.val, 8), _mm_slli_si128(u3.val, 4));
+
+    _mm_storeu_si128((__m128i*)ptr, v0);
+    _mm_storeu_si128((__m128i*)(ptr + 4), v1);
+    _mm_storeu_si128((__m128i*)(ptr + 8), v2);
+}
+
+inline void v_store_interleave(unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
+                               const v_uint32x4& c, const v_uint32x4& d)
+{
+    v_uint32x4 t0, t1, t2, t3;
+    v_transpose4x4(a, b, c, d, t0, t1, t2, t3);
+    v_store(ptr, t0);
+    v_store(ptr + 4, t1);
+    v_store(ptr + 8, t2);
+    v_store(ptr + 12, t3);
+}
+
+// 2-channel, float only
+inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b)
+{
+    // a0 a1 a2 a3 ...
+    // b0 b1 b2 b3 ...
+    __m128 u0 = _mm_unpacklo_ps(a.val, b.val); // a0 b0 a1 b1
+    __m128 u1 = _mm_unpackhi_ps(a.val, b.val); // a2 b2 a3 b3
+
+    _mm_storeu_ps(ptr, u0);
+    _mm_storeu_ps((ptr + 4), u1);
+}
+
+#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec, _Tp, suffix, _Tpuvec, _Tpu, usuffix) \
+inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \
+                                 _Tpvec& b0, _Tpvec& c0 ) \
+{ \
+    _Tpuvec a1, b1, c1; \
+    v_load_deinterleave((const _Tpu*)ptr, a1, b1, c1); \
+    a0 = v_reinterpret_as_##suffix(a1); \
+    b0 = v_reinterpret_as_##suffix(b1); \
+    c0 = v_reinterpret_as_##suffix(c1); \
+} \
+inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \
+                                 _Tpvec& b0, _Tpvec& c0, _Tpvec& d0 ) \
+{ \
+    _Tpuvec a1, b1, c1, d1; \
+    v_load_deinterleave((const _Tpu*)ptr, a1, b1, c1, d1); \
+    a0 = v_reinterpret_as_##suffix(a1); \
+    b0 = v_reinterpret_as_##suffix(b1); \
+    c0 = v_reinterpret_as_##suffix(c1); \
+    d0 = v_reinterpret_as_##suffix(d1); \
+} \
+inline void v_store_interleave( _Tp* ptr, const _Tpvec& a0, \
+                               const _Tpvec& b0, const _Tpvec& c0 ) \
+{ \
+    _Tpuvec a1 = v_reinterpret_as_##usuffix(a0); \
+    _Tpuvec b1 = v_reinterpret_as_##usuffix(b0); \
+    _Tpuvec c1 = v_reinterpret_as_##usuffix(c0); \
+    v_store_interleave((_Tpu*)ptr, a1, b1, c1); \
+} \
+inline void v_store_interleave( _Tp* ptr, const _Tpvec& a0, const _Tpvec& b0, \
+                               const _Tpvec& c0, const _Tpvec& d0 ) \
+{ \
+    _Tpuvec a1 = v_reinterpret_as_##usuffix(a0); \
+    _Tpuvec b1 = v_reinterpret_as_##usuffix(b0); \
+    _Tpuvec c1 = v_reinterpret_as_##usuffix(c0); \
+    _Tpuvec d1 = v_reinterpret_as_##usuffix(d0); \
+    v_store_interleave((_Tpu*)ptr, a1, b1, c1, d1); \
+}
+
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int8x16, schar, s8, v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int16x8, short, s16, v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int32x4, int, s32, v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_float32x4, float, f32, v_uint32x4, unsigned, u32)
+
+inline v_float32x4 v_cvt_f32(const v_int32x4& a)
+{
+    return v_float32x4(_mm_cvtepi32_ps(a.val));
+}
+
+inline v_float32x4 v_cvt_f32(const v_float64x2& a)
+{
+    return v_float32x4(_mm_cvtpd_ps(a.val));
+}
+
+inline v_float64x2 v_cvt_f64(const v_int32x4& a)
+{
+    return v_float64x2(_mm_cvtepi32_pd(a.val));
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
+{
+    return v_float64x2(_mm_cvtepi32_pd(_mm_srli_si128(a.val,8)));
+}
+
+inline v_float64x2 v_cvt_f64(const v_float32x4& a)
+{
+    return v_float64x2(_mm_cvtps_pd(a.val));
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
+{
+    return v_float64x2(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(a.val),8))));
+}
+
+#if defined(HAVE_FP16)
+inline v_float32x4 v_cvt_f32(const v_float16x4& a)
+{
+    return v_float32x4(_mm_cvtph_ps(a.val));
+}
+
+inline v_float16x4 v_cvt_f16(const v_float32x4& a)
+{
+    return v_float16x4(_mm_cvtps_ph(a.val, 0));
+}
+#endif
+
+//! @name Check SIMD support
+//! @{
+//! @brief Check CPU capability of SIMD operation
+static inline bool hasSIMD128()
+{
+    return checkHardwareSupport(CV_CPU_SSE2);
+}
+
+//! @}
+
+//! @endcond
+
+}
+
+#endif
diff --git a/thirdparty/linux/include/opencv2/core/ippasync.hpp b/thirdparty/linux/include/opencv2/core/ippasync.hpp
new file mode 100644
index 0000000..0ed8264
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/ippasync.hpp
@@ -0,0 +1,195 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2015, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_IPPASYNC_HPP
+#define OPENCV_CORE_IPPASYNC_HPP
+
+#ifdef HAVE_IPP_A
+
+#include "opencv2/core.hpp"
+#include <ipp_async_op.h>
+#include <ipp_async_accel.h>
+
+namespace cv
+{
+
+namespace hpp
+{
+
+/** @addtogroup core_ipp
+This section describes conversion between OpenCV and [Intel&reg; IPP Asynchronous
+C/C++](http://software.intel.com/en-us/intel-ipp-preview) library. [Getting Started
+Guide](http://registrationcenter.intel.com/irc_nas/3727/ipp_async_get_started.htm) help you to
+install the library, configure header and library build paths.
+ */
+//! @{
+
+    //! convert OpenCV data type to hppDataType
+    inline int toHppType(const int cvType)
+    {
+        int depth = CV_MAT_DEPTH(cvType);
+        int hppType = depth == CV_8U ? HPP_DATA_TYPE_8U :
+                     depth == CV_16U ? HPP_DATA_TYPE_16U :
+                     depth == CV_16S ? HPP_DATA_TYPE_16S :
+                     depth == CV_32S ? HPP_DATA_TYPE_32S :
+                     depth == CV_32F ? HPP_DATA_TYPE_32F :
+                     depth == CV_64F ? HPP_DATA_TYPE_64F : -1;
+        CV_Assert( hppType >= 0 );
+        return hppType;
+    }
+
+    //! convert hppDataType to OpenCV data type
+    inline int toCvType(const int hppType)
+    {
+        int cvType = hppType == HPP_DATA_TYPE_8U ? CV_8U :
+                    hppType == HPP_DATA_TYPE_16U ? CV_16U :
+                    hppType == HPP_DATA_TYPE_16S ? CV_16S :
+                    hppType == HPP_DATA_TYPE_32S ? CV_32S :
+                    hppType == HPP_DATA_TYPE_32F ? CV_32F :
+                    hppType == HPP_DATA_TYPE_64F ? CV_64F : -1;
+        CV_Assert( cvType >= 0 );
+        return cvType;
+    }
+
+    /** @brief Convert hppiMatrix to Mat.
+
+    This function allocates and initializes new matrix (if needed) that has the same size and type as
+    input matrix. Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
+    @param src input hppiMatrix.
+    @param dst output matrix.
+    @param accel accelerator instance (see hpp::getHpp for the list of acceleration framework types).
+    @param cn number of channels.
+     */
+    inline void copyHppToMat(hppiMatrix* src, Mat& dst, hppAccel accel, int cn)
+    {
+        hppDataType type;
+        hpp32u width, height;
+        hppStatus sts;
+
+        if (src == NULL)
+            return dst.release();
+
+        sts = hppiInquireMatrix(src, &type, &width, &height);
+
+        CV_Assert( sts == HPP_STATUS_NO_ERROR);
+
+        int matType = CV_MAKETYPE(toCvType(type), cn);
+
+        CV_Assert(width%cn == 0);
+
+        width /= cn;
+
+        dst.create((int)height, (int)width, (int)matType);
+
+        size_t newSize = (size_t)(height*(hpp32u)(dst.step));
+
+        sts = hppiGetMatrixData(accel,src,(hpp32u)(dst.step),dst.data,&newSize);
+
+        CV_Assert( sts == HPP_STATUS_NO_ERROR);
+    }
+
+    /** @brief Create Mat from hppiMatrix.
+
+    This function allocates and initializes the Mat that has the same size and type as input matrix.
+    Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
+    @param src input hppiMatrix.
+    @param accel accelerator instance (see hpp::getHpp for the list of acceleration framework types).
+    @param cn number of channels.
+    @sa howToUseIPPAconversion, hpp::copyHppToMat, hpp::getHpp.
+     */
+    inline Mat getMat(hppiMatrix* src, hppAccel accel, int cn)
+    {
+        Mat dst;
+        copyHppToMat(src, dst, accel, cn);
+        return dst;
+    }
+
+    /** @brief Create hppiMatrix from Mat.
+
+    This function allocates and initializes the hppiMatrix that has the same size and type as input
+    matrix, returns the hppiMatrix*.
+
+    If you want to use zero-copy for GPU you should to have 4KB aligned matrix data. See details
+    [hppiCreateSharedMatrix](http://software.intel.com/ru-ru/node/501697).
+
+    Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
+
+    @note The hppiMatrix pointer to the image buffer in system memory refers to the src.data. Control
+    the lifetime of the matrix and don't change its data, if there is no special need.
+    @param src input matrix.
+    @param accel accelerator instance. Supports type:
+    -   **HPP_ACCEL_TYPE_CPU** - accelerated by optimized CPU instructions.
+    -   **HPP_ACCEL_TYPE_GPU** - accelerated by GPU programmable units or fixed-function
+        accelerators.
+    -   **HPP_ACCEL_TYPE_ANY** - any acceleration or no acceleration available.
+    @sa howToUseIPPAconversion, hpp::getMat
+     */
+    inline hppiMatrix* getHpp(const Mat& src, hppAccel accel)
+    {
+        int htype = toHppType(src.type());
+        int cn = src.channels();
+
+        CV_Assert(src.data);
+        hppAccelType accelType = hppQueryAccelType(accel);
+
+        if (accelType!=HPP_ACCEL_TYPE_CPU)
+        {
+            hpp32u pitch, size;
+            hppQueryMatrixAllocParams(accel, src.cols*cn, src.rows, htype, &pitch, &size);
+            if (pitch!=0 && size!=0)
+                if ((int)(src.data)%4096==0 && pitch==(hpp32u)(src.step))
+                {
+                    return hppiCreateSharedMatrix(htype, src.cols*cn, src.rows, src.data, pitch, size);
+                }
+        }
+
+        return hppiCreateMatrix(htype, src.cols*cn, src.rows, src.data, (hpp32s)(src.step));;
+    }
+
+//! @}
+}}
+
+#endif
+
+#endif
diff --git a/thirdparty/linux/include/opencv2/core/mat.hpp b/thirdparty/linux/include/opencv2/core/mat.hpp
new file mode 100644
index 0000000..39c197e
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/mat.hpp
@@ -0,0 +1,3520 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_MAT_HPP
+#define OPENCV_CORE_MAT_HPP
+
+#ifndef __cplusplus
+#  error mat.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/matx.hpp"
+#include "opencv2/core/types.hpp"
+
+#include "opencv2/core/bufferpool.hpp"
+
+namespace cv
+{
+
+//! @addtogroup core_basic
+//! @{
+
+enum { ACCESS_READ=1<<24, ACCESS_WRITE=1<<25,
+    ACCESS_RW=3<<24, ACCESS_MASK=ACCESS_RW, ACCESS_FAST=1<<26 };
+
+class CV_EXPORTS _OutputArray;
+
+//////////////////////// Input/Output Array Arguments /////////////////////////////////
+
+/** @brief This is the proxy class for passing read-only input arrays into OpenCV functions.
+
+It is defined as:
+@code
+    typedef const _InputArray& InputArray;
+@endcode
+where _InputArray is a class that can be constructed from `Mat`, `Mat_<T>`, `Matx<T, m, n>`,
+`std::vector<T>`, `std::vector<std::vector<T> >` or `std::vector<Mat>`. It can also be constructed
+from a matrix expression.
+
+Since this is mostly implementation-level class, and its interface may change in future versions, we
+do not describe it in details. There are a few key things, though, that should be kept in mind:
+
+-   When you see in the reference manual or in OpenCV source code a function that takes
+    InputArray, it means that you can actually pass `Mat`, `Matx`, `vector<T>` etc. (see above the
+    complete list).
+-   Optional input arguments: If some of the input arrays may be empty, pass cv::noArray() (or
+    simply cv::Mat() as you probably did before).
+-   The class is designed solely for passing parameters. That is, normally you *should not*
+    declare class members, local and global variables of this type.
+-   If you want to design your own function or a class method that can operate of arrays of
+    multiple types, you can use InputArray (or OutputArray) for the respective parameters. Inside
+    a function you should use _InputArray::getMat() method to construct a matrix header for the
+    array (without copying data). _InputArray::kind() can be used to distinguish Mat from
+    `vector<>` etc., but normally it is not needed.
+
+Here is how you can use a function that takes InputArray :
+@code
+    std::vector<Point2f> vec;
+    // points or a circle
+    for( int i = 0; i < 30; i++ )
+        vec.push_back(Point2f((float)(100 + 30*cos(i*CV_PI*2/5)),
+                              (float)(100 - 30*sin(i*CV_PI*2/5))));
+    cv::transform(vec, vec, cv::Matx23f(0.707, -0.707, 10, 0.707, 0.707, 20));
+@endcode
+That is, we form an STL vector containing points, and apply in-place affine transformation to the
+vector using the 2x3 matrix created inline as `Matx<float, 2, 3>` instance.
+
+Here is how such a function can be implemented (for simplicity, we implement a very specific case of
+it, according to the assertion statement inside) :
+@code
+    void myAffineTransform(InputArray _src, OutputArray _dst, InputArray _m)
+    {
+        // get Mat headers for input arrays. This is O(1) operation,
+        // unless _src and/or _m are matrix expressions.
+        Mat src = _src.getMat(), m = _m.getMat();
+        CV_Assert( src.type() == CV_32FC2 && m.type() == CV_32F && m.size() == Size(3, 2) );
+
+        // [re]create the output array so that it has the proper size and type.
+        // In case of Mat it calls Mat::create, in case of STL vector it calls vector::resize.
+        _dst.create(src.size(), src.type());
+        Mat dst = _dst.getMat();
+
+        for( int i = 0; i < src.rows; i++ )
+            for( int j = 0; j < src.cols; j++ )
+            {
+                Point2f pt = src.at<Point2f>(i, j);
+                dst.at<Point2f>(i, j) = Point2f(m.at<float>(0, 0)*pt.x +
+                                                m.at<float>(0, 1)*pt.y +
+                                                m.at<float>(0, 2),
+                                                m.at<float>(1, 0)*pt.x +
+                                                m.at<float>(1, 1)*pt.y +
+                                                m.at<float>(1, 2));
+            }
+    }
+@endcode
+There is another related type, InputArrayOfArrays, which is currently defined as a synonym for
+InputArray:
+@code
+    typedef InputArray InputArrayOfArrays;
+@endcode
+It denotes function arguments that are either vectors of vectors or vectors of matrices. A separate
+synonym is needed to generate Python/Java etc. wrappers properly. At the function implementation
+level their use is similar, but _InputArray::getMat(idx) should be used to get header for the
+idx-th component of the outer vector and _InputArray::size().area() should be used to find the
+number of components (vectors/matrices) of the outer vector.
+ */
+class CV_EXPORTS _InputArray
+{
+public:
+    enum {
+        KIND_SHIFT = 16,
+        FIXED_TYPE = 0x8000 << KIND_SHIFT,
+        FIXED_SIZE = 0x4000 << KIND_SHIFT,
+        KIND_MASK = 31 << KIND_SHIFT,
+
+        NONE              = 0 << KIND_SHIFT,
+        MAT               = 1 << KIND_SHIFT,
+        MATX              = 2 << KIND_SHIFT,
+        STD_VECTOR        = 3 << KIND_SHIFT,
+        STD_VECTOR_VECTOR = 4 << KIND_SHIFT,
+        STD_VECTOR_MAT    = 5 << KIND_SHIFT,
+        EXPR              = 6 << KIND_SHIFT,
+        OPENGL_BUFFER     = 7 << KIND_SHIFT,
+        CUDA_HOST_MEM     = 8 << KIND_SHIFT,
+        CUDA_GPU_MAT      = 9 << KIND_SHIFT,
+        UMAT              =10 << KIND_SHIFT,
+        STD_VECTOR_UMAT   =11 << KIND_SHIFT,
+        STD_BOOL_VECTOR   =12 << KIND_SHIFT,
+        STD_VECTOR_CUDA_GPU_MAT = 13 << KIND_SHIFT
+    };
+
+    _InputArray();
+    _InputArray(int _flags, void* _obj);
+    _InputArray(const Mat& m);
+    _InputArray(const MatExpr& expr);
+    _InputArray(const std::vector<Mat>& vec);
+    template<typename _Tp> _InputArray(const Mat_<_Tp>& m);
+    template<typename _Tp> _InputArray(const std::vector<_Tp>& vec);
+    _InputArray(const std::vector<bool>& vec);
+    template<typename _Tp> _InputArray(const std::vector<std::vector<_Tp> >& vec);
+    template<typename _Tp> _InputArray(const std::vector<Mat_<_Tp> >& vec);
+    template<typename _Tp> _InputArray(const _Tp* vec, int n);
+    template<typename _Tp, int m, int n> _InputArray(const Matx<_Tp, m, n>& matx);
+    _InputArray(const double& val);
+    _InputArray(const cuda::GpuMat& d_mat);
+    _InputArray(const std::vector<cuda::GpuMat>& d_mat_array);
+    _InputArray(const ogl::Buffer& buf);
+    _InputArray(const cuda::HostMem& cuda_mem);
+    template<typename _Tp> _InputArray(const cudev::GpuMat_<_Tp>& m);
+    _InputArray(const UMat& um);
+    _InputArray(const std::vector<UMat>& umv);
+
+    Mat getMat(int idx=-1) const;
+    Mat getMat_(int idx=-1) const;
+    UMat getUMat(int idx=-1) const;
+    void getMatVector(std::vector<Mat>& mv) const;
+    void getUMatVector(std::vector<UMat>& umv) const;
+    void getGpuMatVector(std::vector<cuda::GpuMat>& gpumv) const;
+    cuda::GpuMat getGpuMat() const;
+    ogl::Buffer getOGlBuffer() const;
+
+    int getFlags() const;
+    void* getObj() const;
+    Size getSz() const;
+
+    int kind() const;
+    int dims(int i=-1) const;
+    int cols(int i=-1) const;
+    int rows(int i=-1) const;
+    Size size(int i=-1) const;
+    int sizend(int* sz, int i=-1) const;
+    bool sameSize(const _InputArray& arr) const;
+    size_t total(int i=-1) const;
+    int type(int i=-1) const;
+    int depth(int i=-1) const;
+    int channels(int i=-1) const;
+    bool isContinuous(int i=-1) const;
+    bool isSubmatrix(int i=-1) const;
+    bool empty() const;
+    void copyTo(const _OutputArray& arr) const;
+    void copyTo(const _OutputArray& arr, const _InputArray & mask) const;
+    size_t offset(int i=-1) const;
+    size_t step(int i=-1) const;
+    bool isMat() const;
+    bool isUMat() const;
+    bool isMatVector() const;
+    bool isUMatVector() const;
+    bool isMatx() const;
+    bool isVector() const;
+    bool isGpuMatVector() const;
+    ~_InputArray();
+
+protected:
+    int flags;
+    void* obj;
+    Size sz;
+
+    void init(int _flags, const void* _obj);
+    void init(int _flags, const void* _obj, Size _sz);
+};
+
+
+/** @brief This type is very similar to InputArray except that it is used for input/output and output function
+parameters.
+
+Just like with InputArray, OpenCV users should not care about OutputArray, they just pass `Mat`,
+`vector<T>` etc. to the functions. The same limitation as for `InputArray`: *Do not explicitly
+create OutputArray instances* applies here too.
+
+If you want to make your function polymorphic (i.e. accept different arrays as output parameters),
+it is also not very difficult. Take the sample above as the reference. Note that
+_OutputArray::create() needs to be called before _OutputArray::getMat(). This way you guarantee
+that the output array is properly allocated.
+
+Optional output parameters. If you do not need certain output array to be computed and returned to
+you, pass cv::noArray(), just like you would in the case of optional input array. At the
+implementation level, use _OutputArray::needed() to check if certain output array needs to be
+computed or not.
+
+There are several synonyms for OutputArray that are used to assist automatic Python/Java/... wrapper
+generators:
+@code
+    typedef OutputArray OutputArrayOfArrays;
+    typedef OutputArray InputOutputArray;
+    typedef OutputArray InputOutputArrayOfArrays;
+@endcode
+ */
+class CV_EXPORTS _OutputArray : public _InputArray
+{
+public:
+    enum
+    {
+        DEPTH_MASK_8U = 1 << CV_8U,
+        DEPTH_MASK_8S = 1 << CV_8S,
+        DEPTH_MASK_16U = 1 << CV_16U,
+        DEPTH_MASK_16S = 1 << CV_16S,
+        DEPTH_MASK_32S = 1 << CV_32S,
+        DEPTH_MASK_32F = 1 << CV_32F,
+        DEPTH_MASK_64F = 1 << CV_64F,
+        DEPTH_MASK_ALL = (DEPTH_MASK_64F<<1)-1,
+        DEPTH_MASK_ALL_BUT_8S = DEPTH_MASK_ALL & ~DEPTH_MASK_8S,
+        DEPTH_MASK_FLT = DEPTH_MASK_32F + DEPTH_MASK_64F
+    };
+
+    _OutputArray();
+    _OutputArray(int _flags, void* _obj);
+    _OutputArray(Mat& m);
+    _OutputArray(std::vector<Mat>& vec);
+    _OutputArray(cuda::GpuMat& d_mat);
+    _OutputArray(std::vector<cuda::GpuMat>& d_mat);
+    _OutputArray(ogl::Buffer& buf);
+    _OutputArray(cuda::HostMem& cuda_mem);
+    template<typename _Tp> _OutputArray(cudev::GpuMat_<_Tp>& m);
+    template<typename _Tp> _OutputArray(std::vector<_Tp>& vec);
+    _OutputArray(std::vector<bool>& vec);
+    template<typename _Tp> _OutputArray(std::vector<std::vector<_Tp> >& vec);
+    template<typename _Tp> _OutputArray(std::vector<Mat_<_Tp> >& vec);
+    template<typename _Tp> _OutputArray(Mat_<_Tp>& m);
+    template<typename _Tp> _OutputArray(_Tp* vec, int n);
+    template<typename _Tp, int m, int n> _OutputArray(Matx<_Tp, m, n>& matx);
+    _OutputArray(UMat& m);
+    _OutputArray(std::vector<UMat>& vec);
+
+    _OutputArray(const Mat& m);
+    _OutputArray(const std::vector<Mat>& vec);
+    _OutputArray(const cuda::GpuMat& d_mat);
+    _OutputArray(const std::vector<cuda::GpuMat>& d_mat);
+    _OutputArray(const ogl::Buffer& buf);
+    _OutputArray(const cuda::HostMem& cuda_mem);
+    template<typename _Tp> _OutputArray(const cudev::GpuMat_<_Tp>& m);
+    template<typename _Tp> _OutputArray(const std::vector<_Tp>& vec);
+    template<typename _Tp> _OutputArray(const std::vector<std::vector<_Tp> >& vec);
+    template<typename _Tp> _OutputArray(const std::vector<Mat_<_Tp> >& vec);
+    template<typename _Tp> _OutputArray(const Mat_<_Tp>& m);
+    template<typename _Tp> _OutputArray(const _Tp* vec, int n);
+    template<typename _Tp, int m, int n> _OutputArray(const Matx<_Tp, m, n>& matx);
+    _OutputArray(const UMat& m);
+    _OutputArray(const std::vector<UMat>& vec);
+
+    bool fixedSize() const;
+    bool fixedType() const;
+    bool needed() const;
+    Mat& getMatRef(int i=-1) const;
+    UMat& getUMatRef(int i=-1) const;
+    cuda::GpuMat& getGpuMatRef() const;
+    std::vector<cuda::GpuMat>& getGpuMatVecRef() const;
+    ogl::Buffer& getOGlBufferRef() const;
+    cuda::HostMem& getHostMemRef() const;
+    void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
+    void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
+    void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
+    void createSameSize(const _InputArray& arr, int mtype) const;
+    void release() const;
+    void clear() const;
+    void setTo(const _InputArray& value, const _InputArray & mask = _InputArray()) const;
+
+    void assign(const UMat& u) const;
+    void assign(const Mat& m) const;
+};
+
+
+class CV_EXPORTS _InputOutputArray : public _OutputArray
+{
+public:
+    _InputOutputArray();
+    _InputOutputArray(int _flags, void* _obj);
+    _InputOutputArray(Mat& m);
+    _InputOutputArray(std::vector<Mat>& vec);
+    _InputOutputArray(cuda::GpuMat& d_mat);
+    _InputOutputArray(ogl::Buffer& buf);
+    _InputOutputArray(cuda::HostMem& cuda_mem);
+    template<typename _Tp> _InputOutputArray(cudev::GpuMat_<_Tp>& m);
+    template<typename _Tp> _InputOutputArray(std::vector<_Tp>& vec);
+    _InputOutputArray(std::vector<bool>& vec);
+    template<typename _Tp> _InputOutputArray(std::vector<std::vector<_Tp> >& vec);
+    template<typename _Tp> _InputOutputArray(std::vector<Mat_<_Tp> >& vec);
+    template<typename _Tp> _InputOutputArray(Mat_<_Tp>& m);
+    template<typename _Tp> _InputOutputArray(_Tp* vec, int n);
+    template<typename _Tp, int m, int n> _InputOutputArray(Matx<_Tp, m, n>& matx);
+    _InputOutputArray(UMat& m);
+    _InputOutputArray(std::vector<UMat>& vec);
+
+    _InputOutputArray(const Mat& m);
+    _InputOutputArray(const std::vector<Mat>& vec);
+    _InputOutputArray(const cuda::GpuMat& d_mat);
+    _InputOutputArray(const std::vector<cuda::GpuMat>& d_mat);
+    _InputOutputArray(const ogl::Buffer& buf);
+    _InputOutputArray(const cuda::HostMem& cuda_mem);
+    template<typename _Tp> _InputOutputArray(const cudev::GpuMat_<_Tp>& m);
+    template<typename _Tp> _InputOutputArray(const std::vector<_Tp>& vec);
+    template<typename _Tp> _InputOutputArray(const std::vector<std::vector<_Tp> >& vec);
+    template<typename _Tp> _InputOutputArray(const std::vector<Mat_<_Tp> >& vec);
+    template<typename _Tp> _InputOutputArray(const Mat_<_Tp>& m);
+    template<typename _Tp> _InputOutputArray(const _Tp* vec, int n);
+    template<typename _Tp, int m, int n> _InputOutputArray(const Matx<_Tp, m, n>& matx);
+    _InputOutputArray(const UMat& m);
+    _InputOutputArray(const std::vector<UMat>& vec);
+};
+
+typedef const _InputArray& InputArray;
+typedef InputArray InputArrayOfArrays;
+typedef const _OutputArray& OutputArray;
+typedef OutputArray OutputArrayOfArrays;
+typedef const _InputOutputArray& InputOutputArray;
+typedef InputOutputArray InputOutputArrayOfArrays;
+
+CV_EXPORTS InputOutputArray noArray();
+
+/////////////////////////////////// MatAllocator //////////////////////////////////////
+
+//! Usage flags for allocator
+enum UMatUsageFlags
+{
+    USAGE_DEFAULT = 0,
+
+    // buffer allocation policy is platform and usage specific
+    USAGE_ALLOCATE_HOST_MEMORY = 1 << 0,
+    USAGE_ALLOCATE_DEVICE_MEMORY = 1 << 1,
+    USAGE_ALLOCATE_SHARED_MEMORY = 1 << 2, // It is not equal to: USAGE_ALLOCATE_HOST_MEMORY | USAGE_ALLOCATE_DEVICE_MEMORY
+
+    __UMAT_USAGE_FLAGS_32BIT = 0x7fffffff // Binary compatibility hint
+};
+
+struct CV_EXPORTS UMatData;
+
+/** @brief  Custom array allocator
+*/
+class CV_EXPORTS MatAllocator
+{
+public:
+    MatAllocator() {}
+    virtual ~MatAllocator() {}
+
+    // let's comment it off for now to detect and fix all the uses of allocator
+    //virtual void allocate(int dims, const int* sizes, int type, int*& refcount,
+    //                      uchar*& datastart, uchar*& data, size_t* step) = 0;
+    //virtual void deallocate(int* refcount, uchar* datastart, uchar* data) = 0;
+    virtual UMatData* allocate(int dims, const int* sizes, int type,
+                               void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const = 0;
+    virtual bool allocate(UMatData* data, int accessflags, UMatUsageFlags usageFlags) const = 0;
+    virtual void deallocate(UMatData* data) const = 0;
+    virtual void map(UMatData* data, int accessflags) const;
+    virtual void unmap(UMatData* data) const;
+    virtual void download(UMatData* data, void* dst, int dims, const size_t sz[],
+                          const size_t srcofs[], const size_t srcstep[],
+                          const size_t dststep[]) const;
+    virtual void upload(UMatData* data, const void* src, int dims, const size_t sz[],
+                        const size_t dstofs[], const size_t dststep[],
+                        const size_t srcstep[]) const;
+    virtual void copy(UMatData* srcdata, UMatData* dstdata, int dims, const size_t sz[],
+                      const size_t srcofs[], const size_t srcstep[],
+                      const size_t dstofs[], const size_t dststep[], bool sync) const;
+
+    // default implementation returns DummyBufferPoolController
+    virtual BufferPoolController* getBufferPoolController(const char* id = NULL) const;
+};
+
+
+//////////////////////////////// MatCommaInitializer //////////////////////////////////
+
+/** @brief  Comma-separated Matrix Initializer
+
+ The class instances are usually not created explicitly.
+ Instead, they are created on "matrix << firstValue" operator.
+
+ The sample below initializes 2x2 rotation matrix:
+
+ \code
+ double angle = 30, a = cos(angle*CV_PI/180), b = sin(angle*CV_PI/180);
+ Mat R = (Mat_<double>(2,2) << a, -b, b, a);
+ \endcode
+*/
+template<typename _Tp> class MatCommaInitializer_
+{
+public:
+    //! the constructor, created by "matrix << firstValue" operator, where matrix is cv::Mat
+    MatCommaInitializer_(Mat_<_Tp>* _m);
+    //! the operator that takes the next value and put it to the matrix
+    template<typename T2> MatCommaInitializer_<_Tp>& operator , (T2 v);
+    //! another form of conversion operator
+    operator Mat_<_Tp>() const;
+protected:
+    MatIterator_<_Tp> it;
+};
+
+
+/////////////////////////////////////// Mat ///////////////////////////////////////////
+
+// note that umatdata might be allocated together
+// with the matrix data, not as a separate object.
+// therefore, it does not have constructor or destructor;
+// it should be explicitly initialized using init().
+struct CV_EXPORTS UMatData
+{
+    enum { COPY_ON_MAP=1, HOST_COPY_OBSOLETE=2,
+        DEVICE_COPY_OBSOLETE=4, TEMP_UMAT=8, TEMP_COPIED_UMAT=24,
+        USER_ALLOCATED=32, DEVICE_MEM_MAPPED=64};
+    UMatData(const MatAllocator* allocator);
+    ~UMatData();
+
+    // provide atomic access to the structure
+    void lock();
+    void unlock();
+
+    bool hostCopyObsolete() const;
+    bool deviceCopyObsolete() const;
+    bool deviceMemMapped() const;
+    bool copyOnMap() const;
+    bool tempUMat() const;
+    bool tempCopiedUMat() const;
+    void markHostCopyObsolete(bool flag);
+    void markDeviceCopyObsolete(bool flag);
+    void markDeviceMemMapped(bool flag);
+
+    const MatAllocator* prevAllocator;
+    const MatAllocator* currAllocator;
+    int urefcount;
+    int refcount;
+    uchar* data;
+    uchar* origdata;
+    size_t size;
+
+    int flags;
+    void* handle;
+    void* userdata;
+    int allocatorFlags_;
+    int mapcount;
+    UMatData* originalUMatData;
+};
+
+
+struct CV_EXPORTS UMatDataAutoLock
+{
+    explicit UMatDataAutoLock(UMatData* u);
+    ~UMatDataAutoLock();
+    UMatData* u;
+};
+
+
+struct CV_EXPORTS MatSize
+{
+    explicit MatSize(int* _p);
+    Size operator()() const;
+    const int& operator[](int i) const;
+    int& operator[](int i);
+    operator const int*() const;
+    bool operator == (const MatSize& sz) const;
+    bool operator != (const MatSize& sz) const;
+
+    int* p;
+};
+
+struct CV_EXPORTS MatStep
+{
+    MatStep();
+    explicit MatStep(size_t s);
+    const size_t& operator[](int i) const;
+    size_t& operator[](int i);
+    operator size_t() const;
+    MatStep& operator = (size_t s);
+
+    size_t* p;
+    size_t buf[2];
+protected:
+    MatStep& operator = (const MatStep&);
+};
+
+/** @example cout_mat.cpp
+An example demonstrating the serial out capabilities of cv::Mat
+*/
+
+ /** @brief n-dimensional dense array class
+
+The class Mat represents an n-dimensional dense numerical single-channel or multi-channel array. It
+can be used to store real or complex-valued vectors and matrices, grayscale or color images, voxel
+volumes, vector fields, point clouds, tensors, histograms (though, very high-dimensional histograms
+may be better stored in a SparseMat ). The data layout of the array `M` is defined by the array
+`M.step[]`, so that the address of element \f$(i_0,...,i_{M.dims-1})\f$, where \f$0\leq i_k<M.size[k]\f$, is
+computed as:
+\f[addr(M_{i_0,...,i_{M.dims-1}}) = M.data + M.step[0]*i_0 + M.step[1]*i_1 + ... + M.step[M.dims-1]*i_{M.dims-1}\f]
+In case of a 2-dimensional array, the above formula is reduced to:
+\f[addr(M_{i,j}) = M.data + M.step[0]*i + M.step[1]*j\f]
+Note that `M.step[i] >= M.step[i+1]` (in fact, `M.step[i] >= M.step[i+1]*M.size[i+1]` ). This means
+that 2-dimensional matrices are stored row-by-row, 3-dimensional matrices are stored plane-by-plane,
+and so on. M.step[M.dims-1] is minimal and always equal to the element size M.elemSize() .
+
+So, the data layout in Mat is fully compatible with CvMat, IplImage, and CvMatND types from OpenCV
+1.x. It is also compatible with the majority of dense array types from the standard toolkits and
+SDKs, such as Numpy (ndarray), Win32 (independent device bitmaps), and others, that is, with any
+array that uses *steps* (or *strides*) to compute the position of a pixel. Due to this
+compatibility, it is possible to make a Mat header for user-allocated data and process it in-place
+using OpenCV functions.
+
+There are many different ways to create a Mat object. The most popular options are listed below:
+
+- Use the create(nrows, ncols, type) method or the similar Mat(nrows, ncols, type[, fillValue])
+constructor. A new array of the specified size and type is allocated. type has the same meaning as
+in the cvCreateMat method. For example, CV_8UC1 means a 8-bit single-channel array, CV_32FC2
+means a 2-channel (complex) floating-point array, and so on.
+@code
+    // make a 7x7 complex matrix filled with 1+3j.
+    Mat M(7,7,CV_32FC2,Scalar(1,3));
+    // and now turn M to a 100x60 15-channel 8-bit matrix.
+    // The old content will be deallocated
+    M.create(100,60,CV_8UC(15));
+@endcode
+As noted in the introduction to this chapter, create() allocates only a new array when the shape
+or type of the current array are different from the specified ones.
+
+- Create a multi-dimensional array:
+@code
+    // create a 100x100x100 8-bit array
+    int sz[] = {100, 100, 100};
+    Mat bigCube(3, sz, CV_8U, Scalar::all(0));
+@endcode
+It passes the number of dimensions =1 to the Mat constructor but the created array will be
+2-dimensional with the number of columns set to 1. So, Mat::dims is always \>= 2 (can also be 0
+when the array is empty).
+
+- Use a copy constructor or assignment operator where there can be an array or expression on the
+right side (see below). As noted in the introduction, the array assignment is an O(1) operation
+because it only copies the header and increases the reference counter. The Mat::clone() method can
+be used to get a full (deep) copy of the array when you need it.
+
+- Construct a header for a part of another array. It can be a single row, single column, several
+rows, several columns, rectangular region in the array (called a *minor* in algebra) or a
+diagonal. Such operations are also O(1) because the new header references the same data. You can
+actually modify a part of the array using this feature, for example:
+@code
+    // add the 5-th row, multiplied by 3 to the 3rd row
+    M.row(3) = M.row(3) + M.row(5)*3;
+    // now copy the 7-th column to the 1-st column
+    // M.col(1) = M.col(7); // this will not work
+    Mat M1 = M.col(1);
+    M.col(7).copyTo(M1);
+    // create a new 320x240 image
+    Mat img(Size(320,240),CV_8UC3);
+    // select a ROI
+    Mat roi(img, Rect(10,10,100,100));
+    // fill the ROI with (0,255,0) (which is green in RGB space);
+    // the original 320x240 image will be modified
+    roi = Scalar(0,255,0);
+@endcode
+Due to the additional datastart and dataend members, it is possible to compute a relative
+sub-array position in the main *container* array using locateROI():
+@code
+    Mat A = Mat::eye(10, 10, CV_32S);
+    // extracts A columns, 1 (inclusive) to 3 (exclusive).
+    Mat B = A(Range::all(), Range(1, 3));
+    // extracts B rows, 5 (inclusive) to 9 (exclusive).
+    // that is, C \~ A(Range(5, 9), Range(1, 3))
+    Mat C = B(Range(5, 9), Range::all());
+    Size size; Point ofs;
+    C.locateROI(size, ofs);
+    // size will be (width=10,height=10) and the ofs will be (x=1, y=5)
+@endcode
+As in case of whole matrices, if you need a deep copy, use the `clone()` method of the extracted
+sub-matrices.
+
+- Make a header for user-allocated data. It can be useful to do the following:
+    -# Process "foreign" data using OpenCV (for example, when you implement a DirectShow\* filter or
+    a processing module for gstreamer, and so on). For example:
+    @code
+        void process_video_frame(const unsigned char* pixels,
+                                 int width, int height, int step)
+        {
+            Mat img(height, width, CV_8UC3, pixels, step);
+            GaussianBlur(img, img, Size(7,7), 1.5, 1.5);
+        }
+    @endcode
+    -# Quickly initialize small matrices and/or get a super-fast element access.
+    @code
+        double m[3][3] = {{a, b, c}, {d, e, f}, {g, h, i}};
+        Mat M = Mat(3, 3, CV_64F, m).inv();
+    @endcode
+    .
+    Partial yet very common cases of this *user-allocated data* case are conversions from CvMat and
+    IplImage to Mat. For this purpose, there is function cv::cvarrToMat taking pointers to CvMat or
+    IplImage and the optional flag indicating whether to copy the data or not.
+    @snippet samples/cpp/image.cpp iplimage
+
+- Use MATLAB-style array initializers, zeros(), ones(), eye(), for example:
+@code
+    // create a double-precision identity martix and add it to M.
+    M += Mat::eye(M.rows, M.cols, CV_64F);
+@endcode
+
+- Use a comma-separated initializer:
+@code
+    // create a 3x3 double-precision identity matrix
+    Mat M = (Mat_<double>(3,3) << 1, 0, 0, 0, 1, 0, 0, 0, 1);
+@endcode
+With this approach, you first call a constructor of the Mat class with the proper parameters, and
+then you just put `<< operator` followed by comma-separated values that can be constants,
+variables, expressions, and so on. Also, note the extra parentheses required to avoid compilation
+errors.
+
+Once the array is created, it is automatically managed via a reference-counting mechanism. If the
+array header is built on top of user-allocated data, you should handle the data by yourself. The
+array data is deallocated when no one points to it. If you want to release the data pointed by a
+array header before the array destructor is called, use Mat::release().
+
+The next important thing to learn about the array class is element access. This manual already
+described how to compute an address of each array element. Normally, you are not required to use the
+formula directly in the code. If you know the array element type (which can be retrieved using the
+method Mat::type() ), you can access the element \f$M_{ij}\f$ of a 2-dimensional array as:
+@code
+    M.at<double>(i,j) += 1.f;
+@endcode
+assuming that `M` is a double-precision floating-point array. There are several variants of the method
+at for a different number of dimensions.
+
+If you need to process a whole row of a 2D array, the most efficient way is to get the pointer to
+the row first, and then just use the plain C operator [] :
+@code
+    // compute sum of positive matrix elements
+    // (assuming that M isa double-precision matrix)
+    double sum=0;
+    for(int i = 0; i < M.rows; i++)
+    {
+        const double* Mi = M.ptr<double>(i);
+        for(int j = 0; j < M.cols; j++)
+            sum += std::max(Mi[j], 0.);
+    }
+@endcode
+Some operations, like the one above, do not actually depend on the array shape. They just process
+elements of an array one by one (or elements from multiple arrays that have the same coordinates,
+for example, array addition). Such operations are called *element-wise*. It makes sense to check
+whether all the input/output arrays are continuous, namely, have no gaps at the end of each row. If
+yes, process them as a long single row:
+@code
+    // compute the sum of positive matrix elements, optimized variant
+    double sum=0;
+    int cols = M.cols, rows = M.rows;
+    if(M.isContinuous())
+    {
+        cols *= rows;
+        rows = 1;
+    }
+    for(int i = 0; i < rows; i++)
+    {
+        const double* Mi = M.ptr<double>(i);
+        for(int j = 0; j < cols; j++)
+            sum += std::max(Mi[j], 0.);
+    }
+@endcode
+In case of the continuous matrix, the outer loop body is executed just once. So, the overhead is
+smaller, which is especially noticeable in case of small matrices.
+
+Finally, there are STL-style iterators that are smart enough to skip gaps between successive rows:
+@code
+    // compute sum of positive matrix elements, iterator-based variant
+    double sum=0;
+    MatConstIterator_<double> it = M.begin<double>(), it_end = M.end<double>();
+    for(; it != it_end; ++it)
+        sum += std::max(*it, 0.);
+@endcode
+The matrix iterators are random-access iterators, so they can be passed to any STL algorithm,
+including std::sort().
+*/
+class CV_EXPORTS Mat
+{
+public:
+    /**
+    These are various constructors that form a matrix. As noted in the AutomaticAllocation, often
+    the default constructor is enough, and the proper matrix will be allocated by an OpenCV function.
+    The constructed matrix can further be assigned to another matrix or matrix expression or can be
+    allocated with Mat::create . In the former case, the old content is de-referenced.
+     */
+    Mat();
+
+    /** @overload
+    @param rows Number of rows in a 2D array.
+    @param cols Number of columns in a 2D array.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    */
+    Mat(int rows, int cols, int type);
+
+    /** @overload
+    @param size 2D array size: Size(cols, rows) . In the Size() constructor, the number of rows and the
+    number of columns go in the reverse order.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+      */
+    Mat(Size size, int type);
+
+    /** @overload
+    @param rows Number of rows in a 2D array.
+    @param cols Number of columns in a 2D array.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param s An optional value to initialize each matrix element with. To set all the matrix elements to
+    the particular value after the construction, use the assignment operator
+    Mat::operator=(const Scalar& value) .
+    */
+    Mat(int rows, int cols, int type, const Scalar& s);
+
+    /** @overload
+    @param size 2D array size: Size(cols, rows) . In the Size() constructor, the number of rows and the
+    number of columns go in the reverse order.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param s An optional value to initialize each matrix element with. To set all the matrix elements to
+    the particular value after the construction, use the assignment operator
+    Mat::operator=(const Scalar& value) .
+      */
+    Mat(Size size, int type, const Scalar& s);
+
+    /** @overload
+    @param ndims Array dimensionality.
+    @param sizes Array of integers specifying an n-dimensional array shape.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    */
+    Mat(int ndims, const int* sizes, int type);
+
+    /** @overload
+    @param sizes Array of integers specifying an n-dimensional array shape.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    */
+    Mat(const std::vector<int>& sizes, int type);
+
+    /** @overload
+    @param ndims Array dimensionality.
+    @param sizes Array of integers specifying an n-dimensional array shape.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param s An optional value to initialize each matrix element with. To set all the matrix elements to
+    the particular value after the construction, use the assignment operator
+    Mat::operator=(const Scalar& value) .
+    */
+    Mat(int ndims, const int* sizes, int type, const Scalar& s);
+
+    /** @overload
+    @param sizes Array of integers specifying an n-dimensional array shape.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param s An optional value to initialize each matrix element with. To set all the matrix elements to
+    the particular value after the construction, use the assignment operator
+    Mat::operator=(const Scalar& value) .
+    */
+    Mat(const std::vector<int>& sizes, int type, const Scalar& s);
+
+
+    /** @overload
+    @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied
+    by these constructors. Instead, the header pointing to m data or its sub-array is constructed and
+    associated with it. The reference counter, if any, is incremented. So, when you modify the matrix
+    formed using such a constructor, you also modify the corresponding elements of m . If you want to
+    have an independent copy of the sub-array, use Mat::clone() .
+    */
+    Mat(const Mat& m);
+
+    /** @overload
+    @param rows Number of rows in a 2D array.
+    @param cols Number of columns in a 2D array.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param data Pointer to the user data. Matrix constructors that take data and step parameters do not
+    allocate matrix data. Instead, they just initialize the matrix header that points to the specified
+    data, which means that no data is copied. This operation is very efficient and can be used to
+    process external data using OpenCV functions. The external data is not automatically deallocated, so
+    you should take care of it.
+    @param step Number of bytes each matrix row occupies. The value should include the padding bytes at
+    the end of each row, if any. If the parameter is missing (set to AUTO_STEP ), no padding is assumed
+    and the actual step is calculated as cols*elemSize(). See Mat::elemSize.
+    */
+    Mat(int rows, int cols, int type, void* data, size_t step=AUTO_STEP);
+
+    /** @overload
+    @param size 2D array size: Size(cols, rows) . In the Size() constructor, the number of rows and the
+    number of columns go in the reverse order.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param data Pointer to the user data. Matrix constructors that take data and step parameters do not
+    allocate matrix data. Instead, they just initialize the matrix header that points to the specified
+    data, which means that no data is copied. This operation is very efficient and can be used to
+    process external data using OpenCV functions. The external data is not automatically deallocated, so
+    you should take care of it.
+    @param step Number of bytes each matrix row occupies. The value should include the padding bytes at
+    the end of each row, if any. If the parameter is missing (set to AUTO_STEP ), no padding is assumed
+    and the actual step is calculated as cols*elemSize(). See Mat::elemSize.
+    */
+    Mat(Size size, int type, void* data, size_t step=AUTO_STEP);
+
+    /** @overload
+    @param ndims Array dimensionality.
+    @param sizes Array of integers specifying an n-dimensional array shape.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param data Pointer to the user data. Matrix constructors that take data and step parameters do not
+    allocate matrix data. Instead, they just initialize the matrix header that points to the specified
+    data, which means that no data is copied. This operation is very efficient and can be used to
+    process external data using OpenCV functions. The external data is not automatically deallocated, so
+    you should take care of it.
+    @param steps Array of ndims-1 steps in case of a multi-dimensional array (the last step is always
+    set to the element size). If not specified, the matrix is assumed to be continuous.
+    */
+    Mat(int ndims, const int* sizes, int type, void* data, const size_t* steps=0);
+
+    /** @overload
+    @param sizes Array of integers specifying an n-dimensional array shape.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param data Pointer to the user data. Matrix constructors that take data and step parameters do not
+    allocate matrix data. Instead, they just initialize the matrix header that points to the specified
+    data, which means that no data is copied. This operation is very efficient and can be used to
+    process external data using OpenCV functions. The external data is not automatically deallocated, so
+    you should take care of it.
+    @param steps Array of ndims-1 steps in case of a multi-dimensional array (the last step is always
+    set to the element size). If not specified, the matrix is assumed to be continuous.
+    */
+    Mat(const std::vector<int>& sizes, int type, void* data, const size_t* steps=0);
+
+    /** @overload
+    @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied
+    by these constructors. Instead, the header pointing to m data or its sub-array is constructed and
+    associated with it. The reference counter, if any, is incremented. So, when you modify the matrix
+    formed using such a constructor, you also modify the corresponding elements of m . If you want to
+    have an independent copy of the sub-array, use Mat::clone() .
+    @param rowRange Range of the m rows to take. As usual, the range start is inclusive and the range
+    end is exclusive. Use Range::all() to take all the rows.
+    @param colRange Range of the m columns to take. Use Range::all() to take all the columns.
+    */
+    Mat(const Mat& m, const Range& rowRange, const Range& colRange=Range::all());
+
+    /** @overload
+    @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied
+    by these constructors. Instead, the header pointing to m data or its sub-array is constructed and
+    associated with it. The reference counter, if any, is incremented. So, when you modify the matrix
+    formed using such a constructor, you also modify the corresponding elements of m . If you want to
+    have an independent copy of the sub-array, use Mat::clone() .
+    @param roi Region of interest.
+    */
+    Mat(const Mat& m, const Rect& roi);
+
+    /** @overload
+    @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied
+    by these constructors. Instead, the header pointing to m data or its sub-array is constructed and
+    associated with it. The reference counter, if any, is incremented. So, when you modify the matrix
+    formed using such a constructor, you also modify the corresponding elements of m . If you want to
+    have an independent copy of the sub-array, use Mat::clone() .
+    @param ranges Array of selected ranges of m along each dimensionality.
+    */
+    Mat(const Mat& m, const Range* ranges);
+
+    /** @overload
+    @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied
+    by these constructors. Instead, the header pointing to m data or its sub-array is constructed and
+    associated with it. The reference counter, if any, is incremented. So, when you modify the matrix
+    formed using such a constructor, you also modify the corresponding elements of m . If you want to
+    have an independent copy of the sub-array, use Mat::clone() .
+    @param ranges Array of selected ranges of m along each dimensionality.
+    */
+    Mat(const Mat& m, const std::vector<Range>& ranges);
+
+    /** @overload
+    @param vec STL vector whose elements form the matrix. The matrix has a single column and the number
+    of rows equal to the number of vector elements. Type of the matrix matches the type of vector
+    elements. The constructor can handle arbitrary types, for which there is a properly declared
+    DataType . This means that the vector elements must be primitive numbers or uni-type numerical
+    tuples of numbers. Mixed-type structures are not supported. The corresponding constructor is
+    explicit. Since STL vectors are not automatically converted to Mat instances, you should write
+    Mat(vec) explicitly. Unless you copy the data into the matrix ( copyData=true ), no new elements
+    will be added to the vector because it can potentially yield vector data reallocation, and, thus,
+    the matrix data pointer will be invalid.
+    @param copyData Flag to specify whether the underlying data of the STL vector should be copied
+    to (true) or shared with (false) the newly constructed matrix. When the data is copied, the
+    allocated buffer is managed using Mat reference counting mechanism. While the data is shared,
+    the reference counter is NULL, and you should not deallocate the data until the matrix is not
+    destructed.
+    */
+    template<typename _Tp> explicit Mat(const std::vector<_Tp>& vec, bool copyData=false);
+
+    /** @overload
+    */
+    template<typename _Tp, int n> explicit Mat(const Vec<_Tp, n>& vec, bool copyData=true);
+
+    /** @overload
+    */
+    template<typename _Tp, int m, int n> explicit Mat(const Matx<_Tp, m, n>& mtx, bool copyData=true);
+
+    /** @overload
+    */
+    template<typename _Tp> explicit Mat(const Point_<_Tp>& pt, bool copyData=true);
+
+    /** @overload
+    */
+    template<typename _Tp> explicit Mat(const Point3_<_Tp>& pt, bool copyData=true);
+
+    /** @overload
+    */
+    template<typename _Tp> explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer);
+
+    //! download data from GpuMat
+    explicit Mat(const cuda::GpuMat& m);
+
+    //! destructor - calls release()
+    ~Mat();
+
+    /** @brief assignment operators
+
+    These are available assignment operators. Since they all are very different, make sure to read the
+    operator parameters description.
+    @param m Assigned, right-hand-side matrix. Matrix assignment is an O(1) operation. This means that
+    no data is copied but the data is shared and the reference counter, if any, is incremented. Before
+    assigning new data, the old data is de-referenced via Mat::release .
+     */
+    Mat& operator = (const Mat& m);
+
+    /** @overload
+    @param expr Assigned matrix expression object. As opposite to the first form of the assignment
+    operation, the second form can reuse already allocated matrix if it has the right size and type to
+    fit the matrix expression result. It is automatically handled by the real function that the matrix
+    expressions is expanded to. For example, C=A+B is expanded to add(A, B, C), and add takes care of
+    automatic C reallocation.
+    */
+    Mat& operator = (const MatExpr& expr);
+
+    //! retrieve UMat from Mat
+    UMat getUMat(int accessFlags, UMatUsageFlags usageFlags = USAGE_DEFAULT) const;
+
+    /** @brief Creates a matrix header for the specified matrix row.
+
+    The method makes a new header for the specified matrix row and returns it. This is an O(1)
+    operation, regardless of the matrix size. The underlying data of the new matrix is shared with the
+    original matrix. Here is the example of one of the classical basic matrix processing operations,
+    axpy, used by LU and many other algorithms:
+    @code
+        inline void matrix_axpy(Mat& A, int i, int j, double alpha)
+        {
+            A.row(i) += A.row(j)*alpha;
+        }
+    @endcode
+    @note In the current implementation, the following code does not work as expected:
+    @code
+        Mat A;
+        ...
+        A.row(i) = A.row(j); // will not work
+    @endcode
+    This happens because A.row(i) forms a temporary header that is further assigned to another header.
+    Remember that each of these operations is O(1), that is, no data is copied. Thus, the above
+    assignment is not true if you may have expected the j-th row to be copied to the i-th row. To
+    achieve that, you should either turn this simple assignment into an expression or use the
+    Mat::copyTo method:
+    @code
+        Mat A;
+        ...
+        // works, but looks a bit obscure.
+        A.row(i) = A.row(j) + 0;
+        // this is a bit longer, but the recommended method.
+        A.row(j).copyTo(A.row(i));
+    @endcode
+    @param y A 0-based row index.
+     */
+    Mat row(int y) const;
+
+    /** @brief Creates a matrix header for the specified matrix column.
+
+    The method makes a new header for the specified matrix column and returns it. This is an O(1)
+    operation, regardless of the matrix size. The underlying data of the new matrix is shared with the
+    original matrix. See also the Mat::row description.
+    @param x A 0-based column index.
+     */
+    Mat col(int x) const;
+
+    /** @brief Creates a matrix header for the specified row span.
+
+    The method makes a new header for the specified row span of the matrix. Similarly to Mat::row and
+    Mat::col , this is an O(1) operation.
+    @param startrow An inclusive 0-based start index of the row span.
+    @param endrow An exclusive 0-based ending index of the row span.
+     */
+    Mat rowRange(int startrow, int endrow) const;
+
+    /** @overload
+    @param r Range structure containing both the start and the end indices.
+    */
+    Mat rowRange(const Range& r) const;
+
+    /** @brief Creates a matrix header for the specified column span.
+
+    The method makes a new header for the specified column span of the matrix. Similarly to Mat::row and
+    Mat::col , this is an O(1) operation.
+    @param startcol An inclusive 0-based start index of the column span.
+    @param endcol An exclusive 0-based ending index of the column span.
+     */
+    Mat colRange(int startcol, int endcol) const;
+
+    /** @overload
+    @param r Range structure containing both the start and the end indices.
+    */
+    Mat colRange(const Range& r) const;
+
+    /** @brief Extracts a diagonal from a matrix
+
+    The method makes a new header for the specified matrix diagonal. The new matrix is represented as a
+    single-column matrix. Similarly to Mat::row and Mat::col, this is an O(1) operation.
+    @param d index of the diagonal, with the following values:
+    - `d=0` is the main diagonal.
+    - `d>0` is a diagonal from the lower half. For example, d=1 means the diagonal is set
+      immediately below the main one.
+    - `d<0` is a diagonal from the upper half. For example, d=-1 means the diagonal is set
+      immediately above the main one.
+     */
+    Mat diag(int d=0) const;
+
+    /** @brief creates a diagonal matrix
+
+    The method creates a square diagonal matrix from specified main diagonal.
+    @param d One-dimensional matrix that represents the main diagonal.
+     */
+    static Mat diag(const Mat& d);
+
+    /** @brief Creates a full copy of the array and the underlying data.
+
+    The method creates a full copy of the array. The original step[] is not taken into account. So, the
+    array copy is a continuous array occupying total()*elemSize() bytes.
+     */
+    Mat clone() const;
+
+    /** @brief Copies the matrix to another one.
+
+    The method copies the matrix data to another matrix. Before copying the data, the method invokes :
+    @code
+        m.create(this->size(), this->type());
+    @endcode
+    so that the destination matrix is reallocated if needed. While m.copyTo(m); works flawlessly, the
+    function does not handle the case of a partial overlap between the source and the destination
+    matrices.
+
+    When the operation mask is specified, if the Mat::create call shown above reallocates the matrix,
+    the newly allocated matrix is initialized with all zeros before copying the data.
+    @param m Destination matrix. If it does not have a proper size or type before the operation, it is
+    reallocated.
+     */
+    void copyTo( OutputArray m ) const;
+
+    /** @overload
+    @param m Destination matrix. If it does not have a proper size or type before the operation, it is
+    reallocated.
+    @param mask Operation mask. Its non-zero elements indicate which matrix elements need to be copied.
+    The mask has to be of type CV_8U and can have 1 or multiple channels.
+    */
+    void copyTo( OutputArray m, InputArray mask ) const;
+
+    /** @brief Converts an array to another data type with optional scaling.
+
+    The method converts source pixel values to the target data type. saturate_cast\<\> is applied at
+    the end to avoid possible overflows:
+
+    \f[m(x,y) = saturate \_ cast<rType>( \alpha (*this)(x,y) +  \beta )\f]
+    @param m output matrix; if it does not have a proper size or type before the operation, it is
+    reallocated.
+    @param rtype desired output matrix type or, rather, the depth since the number of channels are the
+    same as the input has; if rtype is negative, the output matrix will have the same type as the input.
+    @param alpha optional scale factor.
+    @param beta optional delta added to the scaled values.
+     */
+    void convertTo( OutputArray m, int rtype, double alpha=1, double beta=0 ) const;
+
+    /** @brief Provides a functional form of convertTo.
+
+    This is an internally used method called by the @ref MatrixExpressions engine.
+    @param m Destination array.
+    @param type Desired destination array depth (or -1 if it should be the same as the source type).
+     */
+    void assignTo( Mat& m, int type=-1 ) const;
+
+    /** @brief Sets all or some of the array elements to the specified value.
+    @param s Assigned scalar converted to the actual array type.
+    */
+    Mat& operator = (const Scalar& s);
+
+    /** @brief Sets all or some of the array elements to the specified value.
+
+    This is an advanced variant of the Mat::operator=(const Scalar& s) operator.
+    @param value Assigned scalar converted to the actual array type.
+    @param mask Operation mask of the same size as \*this.
+     */
+    Mat& setTo(InputArray value, InputArray mask=noArray());
+
+    /** @brief Changes the shape and/or the number of channels of a 2D matrix without copying the data.
+
+    The method makes a new matrix header for \*this elements. The new matrix may have a different size
+    and/or different number of channels. Any combination is possible if:
+    -   No extra elements are included into the new matrix and no elements are excluded. Consequently,
+        the product rows\*cols\*channels() must stay the same after the transformation.
+    -   No data is copied. That is, this is an O(1) operation. Consequently, if you change the number of
+        rows, or the operation changes the indices of elements row in some other way, the matrix must be
+        continuous. See Mat::isContinuous .
+
+    For example, if there is a set of 3D points stored as an STL vector, and you want to represent the
+    points as a 3xN matrix, do the following:
+    @code
+        std::vector<Point3f> vec;
+        ...
+        Mat pointMat = Mat(vec). // convert vector to Mat, O(1) operation
+                          reshape(1). // make Nx3 1-channel matrix out of Nx1 3-channel.
+                                      // Also, an O(1) operation
+                             t(); // finally, transpose the Nx3 matrix.
+                                  // This involves copying all the elements
+    @endcode
+    @param cn New number of channels. If the parameter is 0, the number of channels remains the same.
+    @param rows New number of rows. If the parameter is 0, the number of rows remains the same.
+     */
+    Mat reshape(int cn, int rows=0) const;
+
+    /** @overload */
+    Mat reshape(int cn, int newndims, const int* newsz) const;
+
+    /** @brief Transposes a matrix.
+
+    The method performs matrix transposition by means of matrix expressions. It does not perform the
+    actual transposition but returns a temporary matrix transposition object that can be further used as
+    a part of more complex matrix expressions or can be assigned to a matrix:
+    @code
+        Mat A1 = A + Mat::eye(A.size(), A.type())*lambda;
+        Mat C = A1.t()*A1; // compute (A + lambda*I)^t * (A + lamda*I)
+    @endcode
+     */
+    MatExpr t() const;
+
+    /** @brief Inverses a matrix.
+
+    The method performs a matrix inversion by means of matrix expressions. This means that a temporary
+    matrix inversion object is returned by the method and can be used further as a part of more complex
+    matrix expressions or can be assigned to a matrix.
+    @param method Matrix inversion method. One of cv::DecompTypes
+     */
+    MatExpr inv(int method=DECOMP_LU) const;
+
+    /** @brief Performs an element-wise multiplication or division of the two matrices.
+
+    The method returns a temporary object encoding per-element array multiplication, with optional
+    scale. Note that this is not a matrix multiplication that corresponds to a simpler "\*" operator.
+
+    Example:
+    @code
+        Mat C = A.mul(5/B); // equivalent to divide(A, B, C, 5)
+    @endcode
+    @param m Another array of the same type and the same size as \*this, or a matrix expression.
+    @param scale Optional scale factor.
+     */
+    MatExpr mul(InputArray m, double scale=1) const;
+
+    /** @brief Computes a cross-product of two 3-element vectors.
+
+    The method computes a cross-product of two 3-element vectors. The vectors must be 3-element
+    floating-point vectors of the same shape and size. The result is another 3-element vector of the
+    same shape and type as operands.
+    @param m Another cross-product operand.
+     */
+    Mat cross(InputArray m) const;
+
+    /** @brief Computes a dot-product of two vectors.
+
+    The method computes a dot-product of two matrices. If the matrices are not single-column or
+    single-row vectors, the top-to-bottom left-to-right scan ordering is used to treat them as 1D
+    vectors. The vectors must have the same size and type. If the matrices have more than one channel,
+    the dot products from all the channels are summed together.
+    @param m another dot-product operand.
+     */
+    double dot(InputArray m) const;
+
+    /** @brief Returns a zero array of the specified size and type.
+
+    The method returns a Matlab-style zero array initializer. It can be used to quickly form a constant
+    array as a function parameter, part of a matrix expression, or as a matrix initializer. :
+    @code
+        Mat A;
+        A = Mat::zeros(3, 3, CV_32F);
+    @endcode
+    In the example above, a new matrix is allocated only if A is not a 3x3 floating-point matrix.
+    Otherwise, the existing matrix A is filled with zeros.
+    @param rows Number of rows.
+    @param cols Number of columns.
+    @param type Created matrix type.
+     */
+    static MatExpr zeros(int rows, int cols, int type);
+
+    /** @overload
+    @param size Alternative to the matrix size specification Size(cols, rows) .
+    @param type Created matrix type.
+    */
+    static MatExpr zeros(Size size, int type);
+
+    /** @overload
+    @param ndims Array dimensionality.
+    @param sz Array of integers specifying the array shape.
+    @param type Created matrix type.
+    */
+    static MatExpr zeros(int ndims, const int* sz, int type);
+
+    /** @brief Returns an array of all 1's of the specified size and type.
+
+    The method returns a Matlab-style 1's array initializer, similarly to Mat::zeros. Note that using
+    this method you can initialize an array with an arbitrary value, using the following Matlab idiom:
+    @code
+        Mat A = Mat::ones(100, 100, CV_8U)*3; // make 100x100 matrix filled with 3.
+    @endcode
+    The above operation does not form a 100x100 matrix of 1's and then multiply it by 3. Instead, it
+    just remembers the scale factor (3 in this case) and use it when actually invoking the matrix
+    initializer.
+    @param rows Number of rows.
+    @param cols Number of columns.
+    @param type Created matrix type.
+     */
+    static MatExpr ones(int rows, int cols, int type);
+
+    /** @overload
+    @param size Alternative to the matrix size specification Size(cols, rows) .
+    @param type Created matrix type.
+    */
+    static MatExpr ones(Size size, int type);
+
+    /** @overload
+    @param ndims Array dimensionality.
+    @param sz Array of integers specifying the array shape.
+    @param type Created matrix type.
+    */
+    static MatExpr ones(int ndims, const int* sz, int type);
+
+    /** @brief Returns an identity matrix of the specified size and type.
+
+    The method returns a Matlab-style identity matrix initializer, similarly to Mat::zeros. Similarly to
+    Mat::ones, you can use a scale operation to create a scaled identity matrix efficiently:
+    @code
+        // make a 4x4 diagonal matrix with 0.1's on the diagonal.
+        Mat A = Mat::eye(4, 4, CV_32F)*0.1;
+    @endcode
+    @param rows Number of rows.
+    @param cols Number of columns.
+    @param type Created matrix type.
+     */
+    static MatExpr eye(int rows, int cols, int type);
+
+    /** @overload
+    @param size Alternative matrix size specification as Size(cols, rows) .
+    @param type Created matrix type.
+    */
+    static MatExpr eye(Size size, int type);
+
+    /** @brief Allocates new array data if needed.
+
+    This is one of the key Mat methods. Most new-style OpenCV functions and methods that produce arrays
+    call this method for each output array. The method uses the following algorithm:
+
+    -# If the current array shape and the type match the new ones, return immediately. Otherwise,
+       de-reference the previous data by calling Mat::release.
+    -# Initialize the new header.
+    -# Allocate the new data of total()\*elemSize() bytes.
+    -# Allocate the new, associated with the data, reference counter and set it to 1.
+
+    Such a scheme makes the memory management robust and efficient at the same time and helps avoid
+    extra typing for you. This means that usually there is no need to explicitly allocate output arrays.
+    That is, instead of writing:
+    @code
+        Mat color;
+        ...
+        Mat gray(color.rows, color.cols, color.depth());
+        cvtColor(color, gray, COLOR_BGR2GRAY);
+    @endcode
+    you can simply write:
+    @code
+        Mat color;
+        ...
+        Mat gray;
+        cvtColor(color, gray, COLOR_BGR2GRAY);
+    @endcode
+    because cvtColor, as well as the most of OpenCV functions, calls Mat::create() for the output array
+    internally.
+    @param rows New number of rows.
+    @param cols New number of columns.
+    @param type New matrix type.
+     */
+    void create(int rows, int cols, int type);
+
+    /** @overload
+    @param size Alternative new matrix size specification: Size(cols, rows)
+    @param type New matrix type.
+    */
+    void create(Size size, int type);
+
+    /** @overload
+    @param ndims New array dimensionality.
+    @param sizes Array of integers specifying a new array shape.
+    @param type New matrix type.
+    */
+    void create(int ndims, const int* sizes, int type);
+
+    /** @overload
+    @param sizes Array of integers specifying a new array shape.
+    @param type New matrix type.
+    */
+    void create(const std::vector<int>& sizes, int type);
+
+    /** @brief Increments the reference counter.
+
+    The method increments the reference counter associated with the matrix data. If the matrix header
+    points to an external data set (see Mat::Mat ), the reference counter is NULL, and the method has no
+    effect in this case. Normally, to avoid memory leaks, the method should not be called explicitly. It
+    is called implicitly by the matrix assignment operator. The reference counter increment is an atomic
+    operation on the platforms that support it. Thus, it is safe to operate on the same matrices
+    asynchronously in different threads.
+     */
+    void addref();
+
+    /** @brief Decrements the reference counter and deallocates the matrix if needed.
+
+    The method decrements the reference counter associated with the matrix data. When the reference
+    counter reaches 0, the matrix data is deallocated and the data and the reference counter pointers
+    are set to NULL's. If the matrix header points to an external data set (see Mat::Mat ), the
+    reference counter is NULL, and the method has no effect in this case.
+
+    This method can be called manually to force the matrix data deallocation. But since this method is
+    automatically called in the destructor, or by any other method that changes the data pointer, it is
+    usually not needed. The reference counter decrement and check for 0 is an atomic operation on the
+    platforms that support it. Thus, it is safe to operate on the same matrices asynchronously in
+    different threads.
+     */
+    void release();
+
+    //! deallocates the matrix data
+    void deallocate();
+    //! internal use function; properly re-allocates _size, _step arrays
+    void copySize(const Mat& m);
+
+    /** @brief Reserves space for the certain number of rows.
+
+    The method reserves space for sz rows. If the matrix already has enough space to store sz rows,
+    nothing happens. If the matrix is reallocated, the first Mat::rows rows are preserved. The method
+    emulates the corresponding method of the STL vector class.
+    @param sz Number of rows.
+     */
+    void reserve(size_t sz);
+
+    /** @brief Changes the number of matrix rows.
+
+    The methods change the number of matrix rows. If the matrix is reallocated, the first
+    min(Mat::rows, sz) rows are preserved. The methods emulate the corresponding methods of the STL
+    vector class.
+    @param sz New number of rows.
+     */
+    void resize(size_t sz);
+
+    /** @overload
+    @param sz New number of rows.
+    @param s Value assigned to the newly added elements.
+     */
+    void resize(size_t sz, const Scalar& s);
+
+    //! internal function
+    void push_back_(const void* elem);
+
+    /** @brief Adds elements to the bottom of the matrix.
+
+    The methods add one or more elements to the bottom of the matrix. They emulate the corresponding
+    method of the STL vector class. When elem is Mat , its type and the number of columns must be the
+    same as in the container matrix.
+    @param elem Added element(s).
+     */
+    template<typename _Tp> void push_back(const _Tp& elem);
+
+    /** @overload
+    @param elem Added element(s).
+    */
+    template<typename _Tp> void push_back(const Mat_<_Tp>& elem);
+
+    /** @overload
+    @param m Added line(s).
+    */
+    void push_back(const Mat& m);
+
+    /** @brief Removes elements from the bottom of the matrix.
+
+    The method removes one or more rows from the bottom of the matrix.
+    @param nelems Number of removed rows. If it is greater than the total number of rows, an exception
+    is thrown.
+     */
+    void pop_back(size_t nelems=1);
+
+    /** @brief Locates the matrix header within a parent matrix.
+
+    After you extracted a submatrix from a matrix using Mat::row, Mat::col, Mat::rowRange,
+    Mat::colRange, and others, the resultant submatrix points just to the part of the original big
+    matrix. However, each submatrix contains information (represented by datastart and dataend
+    fields) that helps reconstruct the original matrix size and the position of the extracted
+    submatrix within the original matrix. The method locateROI does exactly that.
+    @param wholeSize Output parameter that contains the size of the whole matrix containing *this*
+    as a part.
+    @param ofs Output parameter that contains an offset of *this* inside the whole matrix.
+     */
+    void locateROI( Size& wholeSize, Point& ofs ) const;
+
+    /** @brief Adjusts a submatrix size and position within the parent matrix.
+
+    The method is complimentary to Mat::locateROI . The typical use of these functions is to determine
+    the submatrix position within the parent matrix and then shift the position somehow. Typically, it
+    can be required for filtering operations when pixels outside of the ROI should be taken into
+    account. When all the method parameters are positive, the ROI needs to grow in all directions by the
+    specified amount, for example:
+    @code
+        A.adjustROI(2, 2, 2, 2);
+    @endcode
+    In this example, the matrix size is increased by 4 elements in each direction. The matrix is shifted
+    by 2 elements to the left and 2 elements up, which brings in all the necessary pixels for the
+    filtering with the 5x5 kernel.
+
+    adjustROI forces the adjusted ROI to be inside of the parent matrix that is boundaries of the
+    adjusted ROI are constrained by boundaries of the parent matrix. For example, if the submatrix A is
+    located in the first row of a parent matrix and you called A.adjustROI(2, 2, 2, 2) then A will not
+    be increased in the upward direction.
+
+    The function is used internally by the OpenCV filtering functions, like filter2D , morphological
+    operations, and so on.
+    @param dtop Shift of the top submatrix boundary upwards.
+    @param dbottom Shift of the bottom submatrix boundary downwards.
+    @param dleft Shift of the left submatrix boundary to the left.
+    @param dright Shift of the right submatrix boundary to the right.
+    @sa copyMakeBorder
+     */
+    Mat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+
+    /** @brief Extracts a rectangular submatrix.
+
+    The operators make a new header for the specified sub-array of \*this . They are the most
+    generalized forms of Mat::row, Mat::col, Mat::rowRange, and Mat::colRange . For example,
+    `A(Range(0, 10), Range::all())` is equivalent to `A.rowRange(0, 10)`. Similarly to all of the above,
+    the operators are O(1) operations, that is, no matrix data is copied.
+    @param rowRange Start and end row of the extracted submatrix. The upper boundary is not included. To
+    select all the rows, use Range::all().
+    @param colRange Start and end column of the extracted submatrix. The upper boundary is not included.
+    To select all the columns, use Range::all().
+     */
+    Mat operator()( Range rowRange, Range colRange ) const;
+
+    /** @overload
+    @param roi Extracted submatrix specified as a rectangle.
+    */
+    Mat operator()( const Rect& roi ) const;
+
+    /** @overload
+    @param ranges Array of selected ranges along each array dimension.
+    */
+    Mat operator()( const Range* ranges ) const;
+
+    /** @overload
+    @param ranges Array of selected ranges along each array dimension.
+    */
+    Mat operator()(const std::vector<Range>& ranges) const;
+
+    // //! converts header to CvMat; no data is copied
+    // operator CvMat() const;
+    // //! converts header to CvMatND; no data is copied
+    // operator CvMatND() const;
+    // //! converts header to IplImage; no data is copied
+    // operator IplImage() const;
+
+    template<typename _Tp> operator std::vector<_Tp>() const;
+    template<typename _Tp, int n> operator Vec<_Tp, n>() const;
+    template<typename _Tp, int m, int n> operator Matx<_Tp, m, n>() const;
+
+    /** @brief Reports whether the matrix is continuous or not.
+
+    The method returns true if the matrix elements are stored continuously without gaps at the end of
+    each row. Otherwise, it returns false. Obviously, 1x1 or 1xN matrices are always continuous.
+    Matrices created with Mat::create are always continuous. But if you extract a part of the matrix
+    using Mat::col, Mat::diag, and so on, or constructed a matrix header for externally allocated data,
+    such matrices may no longer have this property.
+
+    The continuity flag is stored as a bit in the Mat::flags field and is computed automatically when
+    you construct a matrix header. Thus, the continuity check is a very fast operation, though
+    theoretically it could be done as follows:
+    @code
+        // alternative implementation of Mat::isContinuous()
+        bool myCheckMatContinuity(const Mat& m)
+        {
+            //return (m.flags & Mat::CONTINUOUS_FLAG) != 0;
+            return m.rows == 1 || m.step == m.cols*m.elemSize();
+        }
+    @endcode
+    The method is used in quite a few of OpenCV functions. The point is that element-wise operations
+    (such as arithmetic and logical operations, math functions, alpha blending, color space
+    transformations, and others) do not depend on the image geometry. Thus, if all the input and output
+    arrays are continuous, the functions can process them as very long single-row vectors. The example
+    below illustrates how an alpha-blending function can be implemented:
+    @code
+        template<typename T>
+        void alphaBlendRGBA(const Mat& src1, const Mat& src2, Mat& dst)
+        {
+            const float alpha_scale = (float)std::numeric_limits<T>::max(),
+                        inv_scale = 1.f/alpha_scale;
+
+            CV_Assert( src1.type() == src2.type() &&
+                       src1.type() == CV_MAKETYPE(DataType<T>::depth, 4) &&
+                       src1.size() == src2.size());
+            Size size = src1.size();
+            dst.create(size, src1.type());
+
+            // here is the idiom: check the arrays for continuity and,
+            // if this is the case,
+            // treat the arrays as 1D vectors
+            if( src1.isContinuous() && src2.isContinuous() && dst.isContinuous() )
+            {
+                size.width *= size.height;
+                size.height = 1;
+            }
+            size.width *= 4;
+
+            for( int i = 0; i < size.height; i++ )
+            {
+                // when the arrays are continuous,
+                // the outer loop is executed only once
+                const T* ptr1 = src1.ptr<T>(i);
+                const T* ptr2 = src2.ptr<T>(i);
+                T* dptr = dst.ptr<T>(i);
+
+                for( int j = 0; j < size.width; j += 4 )
+                {
+                    float alpha = ptr1[j+3]*inv_scale, beta = ptr2[j+3]*inv_scale;
+                    dptr[j] = saturate_cast<T>(ptr1[j]*alpha + ptr2[j]*beta);
+                    dptr[j+1] = saturate_cast<T>(ptr1[j+1]*alpha + ptr2[j+1]*beta);
+                    dptr[j+2] = saturate_cast<T>(ptr1[j+2]*alpha + ptr2[j+2]*beta);
+                    dptr[j+3] = saturate_cast<T>((1 - (1-alpha)*(1-beta))*alpha_scale);
+                }
+            }
+        }
+    @endcode
+    This approach, while being very simple, can boost the performance of a simple element-operation by
+    10-20 percents, especially if the image is rather small and the operation is quite simple.
+
+    Another OpenCV idiom in this function, a call of Mat::create for the destination array, that
+    allocates the destination array unless it already has the proper size and type. And while the newly
+    allocated arrays are always continuous, you still need to check the destination array because
+    Mat::create does not always allocate a new matrix.
+     */
+    bool isContinuous() const;
+
+    //! returns true if the matrix is a submatrix of another matrix
+    bool isSubmatrix() const;
+
+    /** @brief Returns the matrix element size in bytes.
+
+    The method returns the matrix element size in bytes. For example, if the matrix type is CV_16SC3 ,
+    the method returns 3\*sizeof(short) or 6.
+     */
+    size_t elemSize() const;
+
+    /** @brief Returns the size of each matrix element channel in bytes.
+
+    The method returns the matrix element channel size in bytes, that is, it ignores the number of
+    channels. For example, if the matrix type is CV_16SC3 , the method returns sizeof(short) or 2.
+     */
+    size_t elemSize1() const;
+
+    /** @brief Returns the type of a matrix element.
+
+    The method returns a matrix element type. This is an identifier compatible with the CvMat type
+    system, like CV_16SC3 or 16-bit signed 3-channel array, and so on.
+     */
+    int type() const;
+
+    /** @brief Returns the depth of a matrix element.
+
+    The method returns the identifier of the matrix element depth (the type of each individual channel).
+    For example, for a 16-bit signed element array, the method returns CV_16S . A complete list of
+    matrix types contains the following values:
+    -   CV_8U - 8-bit unsigned integers ( 0..255 )
+    -   CV_8S - 8-bit signed integers ( -128..127 )
+    -   CV_16U - 16-bit unsigned integers ( 0..65535 )
+    -   CV_16S - 16-bit signed integers ( -32768..32767 )
+    -   CV_32S - 32-bit signed integers ( -2147483648..2147483647 )
+    -   CV_32F - 32-bit floating-point numbers ( -FLT_MAX..FLT_MAX, INF, NAN )
+    -   CV_64F - 64-bit floating-point numbers ( -DBL_MAX..DBL_MAX, INF, NAN )
+     */
+    int depth() const;
+
+    /** @brief Returns the number of matrix channels.
+
+    The method returns the number of matrix channels.
+     */
+    int channels() const;
+
+    /** @brief Returns a normalized step.
+
+    The method returns a matrix step divided by Mat::elemSize1() . It can be useful to quickly access an
+    arbitrary matrix element.
+     */
+    size_t step1(int i=0) const;
+
+    /** @brief Returns true if the array has no elements.
+
+    The method returns true if Mat::total() is 0 or if Mat::data is NULL. Because of pop_back() and
+    resize() methods `M.total() == 0` does not imply that `M.data == NULL`.
+     */
+    bool empty() const;
+
+    /** @brief Returns the total number of array elements.
+
+    The method returns the number of array elements (a number of pixels if the array represents an
+    image).
+     */
+    size_t total() const;
+
+    //! returns N if the matrix is 1-channel (N x ptdim) or ptdim-channel (1 x N) or (N x 1); negative number otherwise
+    int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const;
+
+    /** @brief Returns a pointer to the specified matrix row.
+
+    The methods return `uchar*` or typed pointer to the specified matrix row. See the sample in
+    Mat::isContinuous to know how to use these methods.
+    @param i0 A 0-based row index.
+     */
+    uchar* ptr(int i0=0);
+    /** @overload */
+    const uchar* ptr(int i0=0) const;
+
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    uchar* ptr(int row, int col);
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    const uchar* ptr(int row, int col) const;
+
+    /** @overload */
+    uchar* ptr(int i0, int i1, int i2);
+    /** @overload */
+    const uchar* ptr(int i0, int i1, int i2) const;
+
+    /** @overload */
+    uchar* ptr(const int* idx);
+    /** @overload */
+    const uchar* ptr(const int* idx) const;
+    /** @overload */
+    template<int n> uchar* ptr(const Vec<int, n>& idx);
+    /** @overload */
+    template<int n> const uchar* ptr(const Vec<int, n>& idx) const;
+
+    /** @overload */
+    template<typename _Tp> _Tp* ptr(int i0=0);
+    /** @overload */
+    template<typename _Tp> const _Tp* ptr(int i0=0) const;
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    template<typename _Tp> _Tp* ptr(int row, int col);
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    template<typename _Tp> const _Tp* ptr(int row, int col) const;
+    /** @overload */
+    template<typename _Tp> _Tp* ptr(int i0, int i1, int i2);
+    /** @overload */
+    template<typename _Tp> const _Tp* ptr(int i0, int i1, int i2) const;
+    /** @overload */
+    template<typename _Tp> _Tp* ptr(const int* idx);
+    /** @overload */
+    template<typename _Tp> const _Tp* ptr(const int* idx) const;
+    /** @overload */
+    template<typename _Tp, int n> _Tp* ptr(const Vec<int, n>& idx);
+    /** @overload */
+    template<typename _Tp, int n> const _Tp* ptr(const Vec<int, n>& idx) const;
+
+    /** @brief Returns a reference to the specified array element.
+
+    The template methods return a reference to the specified array element. For the sake of higher
+    performance, the index range checks are only performed in the Debug configuration.
+
+    Note that the variants with a single index (i) can be used to access elements of single-row or
+    single-column 2-dimensional arrays. That is, if, for example, A is a 1 x N floating-point matrix and
+    B is an M x 1 integer matrix, you can simply write `A.at<float>(k+4)` and `B.at<int>(2*i+1)`
+    instead of `A.at<float>(0,k+4)` and `B.at<int>(2*i+1,0)`, respectively.
+
+    The example below initializes a Hilbert matrix:
+    @code
+        Mat H(100, 100, CV_64F);
+        for(int i = 0; i < H.rows; i++)
+            for(int j = 0; j < H.cols; j++)
+                H.at<double>(i,j)=1./(i+j+1);
+    @endcode
+
+    Keep in mind that the size identifier used in the at operator cannot be chosen at random. It depends
+    on the image from which you are trying to retrieve the data. The table below gives a better insight in this:
+     - If matrix is of type `CV_8U` then use `Mat.at<uchar>(y,x)`.
+     - If matrix is of type `CV_8S` then use `Mat.at<schar>(y,x)`.
+     - If matrix is of type `CV_16U` then use `Mat.at<ushort>(y,x)`.
+     - If matrix is of type `CV_16S` then use `Mat.at<short>(y,x)`.
+     - If matrix is of type `CV_32S`  then use `Mat.at<int>(y,x)`.
+     - If matrix is of type `CV_32F`  then use `Mat.at<float>(y,x)`.
+     - If matrix is of type `CV_64F` then use `Mat.at<double>(y,x)`.
+
+    @param i0 Index along the dimension 0
+     */
+    template<typename _Tp> _Tp& at(int i0=0);
+    /** @overload
+    @param i0 Index along the dimension 0
+    */
+    template<typename _Tp> const _Tp& at(int i0=0) const;
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    template<typename _Tp> _Tp& at(int row, int col);
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    template<typename _Tp> const _Tp& at(int row, int col) const;
+
+    /** @overload
+    @param i0 Index along the dimension 0
+    @param i1 Index along the dimension 1
+    @param i2 Index along the dimension 2
+    */
+    template<typename _Tp> _Tp& at(int i0, int i1, int i2);
+    /** @overload
+    @param i0 Index along the dimension 0
+    @param i1 Index along the dimension 1
+    @param i2 Index along the dimension 2
+    */
+    template<typename _Tp> const _Tp& at(int i0, int i1, int i2) const;
+
+    /** @overload
+    @param idx Array of Mat::dims indices.
+    */
+    template<typename _Tp> _Tp& at(const int* idx);
+    /** @overload
+    @param idx Array of Mat::dims indices.
+    */
+    template<typename _Tp> const _Tp& at(const int* idx) const;
+
+    /** @overload */
+    template<typename _Tp, int n> _Tp& at(const Vec<int, n>& idx);
+    /** @overload */
+    template<typename _Tp, int n> const _Tp& at(const Vec<int, n>& idx) const;
+
+    /** @overload
+    special versions for 2D arrays (especially convenient for referencing image pixels)
+    @param pt Element position specified as Point(j,i) .
+    */
+    template<typename _Tp> _Tp& at(Point pt);
+    /** @overload
+    special versions for 2D arrays (especially convenient for referencing image pixels)
+    @param pt Element position specified as Point(j,i) .
+    */
+    template<typename _Tp> const _Tp& at(Point pt) const;
+
+    /** @brief Returns the matrix iterator and sets it to the first matrix element.
+
+    The methods return the matrix read-only or read-write iterators. The use of matrix iterators is very
+    similar to the use of bi-directional STL iterators. In the example below, the alpha blending
+    function is rewritten using the matrix iterators:
+    @code
+        template<typename T>
+        void alphaBlendRGBA(const Mat& src1, const Mat& src2, Mat& dst)
+        {
+            typedef Vec<T, 4> VT;
+
+            const float alpha_scale = (float)std::numeric_limits<T>::max(),
+                        inv_scale = 1.f/alpha_scale;
+
+            CV_Assert( src1.type() == src2.type() &&
+                       src1.type() == DataType<VT>::type &&
+                       src1.size() == src2.size());
+            Size size = src1.size();
+            dst.create(size, src1.type());
+
+            MatConstIterator_<VT> it1 = src1.begin<VT>(), it1_end = src1.end<VT>();
+            MatConstIterator_<VT> it2 = src2.begin<VT>();
+            MatIterator_<VT> dst_it = dst.begin<VT>();
+
+            for( ; it1 != it1_end; ++it1, ++it2, ++dst_it )
+            {
+                VT pix1 = *it1, pix2 = *it2;
+                float alpha = pix1[3]*inv_scale, beta = pix2[3]*inv_scale;
+                *dst_it = VT(saturate_cast<T>(pix1[0]*alpha + pix2[0]*beta),
+                             saturate_cast<T>(pix1[1]*alpha + pix2[1]*beta),
+                             saturate_cast<T>(pix1[2]*alpha + pix2[2]*beta),
+                             saturate_cast<T>((1 - (1-alpha)*(1-beta))*alpha_scale));
+            }
+        }
+    @endcode
+     */
+    template<typename _Tp> MatIterator_<_Tp> begin();
+    template<typename _Tp> MatConstIterator_<_Tp> begin() const;
+
+    /** @brief Returns the matrix iterator and sets it to the after-last matrix element.
+
+    The methods return the matrix read-only or read-write iterators, set to the point following the last
+    matrix element.
+     */
+    template<typename _Tp> MatIterator_<_Tp> end();
+    template<typename _Tp> MatConstIterator_<_Tp> end() const;
+
+    /** @brief Runs the given functor over all matrix elements in parallel.
+
+    The operation passed as argument has to be a function pointer, a function object or a lambda(C++11).
+
+    Example 1. All of the operations below put 0xFF the first channel of all matrix elements:
+    @code
+        Mat image(1920, 1080, CV_8UC3);
+        typedef cv::Point3_<uint8_t> Pixel;
+
+        // first. raw pointer access.
+        for (int r = 0; r < image.rows; ++r) {
+            Pixel* ptr = image.ptr<Pixel>(0, r);
+            const Pixel* ptr_end = ptr + image.cols;
+            for (; ptr != ptr_end; ++ptr) {
+                ptr->x = 255;
+            }
+        }
+
+        // Using MatIterator. (Simple but there are a Iterator's overhead)
+        for (Pixel &p : cv::Mat_<Pixel>(image)) {
+            p.x = 255;
+        }
+
+        // Parallel execution with function object.
+        struct Operator {
+            void operator ()(Pixel &pixel, const int * position) {
+                pixel.x = 255;
+            }
+        };
+        image.forEach<Pixel>(Operator());
+
+        // Parallel execution using C++11 lambda.
+        image.forEach<Pixel>([](Pixel &p, const int * position) -> void {
+            p.x = 255;
+        });
+    @endcode
+    Example 2. Using the pixel's position:
+    @code
+        // Creating 3D matrix (255 x 255 x 255) typed uint8_t
+        // and initialize all elements by the value which equals elements position.
+        // i.e. pixels (x,y,z) = (1,2,3) is (b,g,r) = (1,2,3).
+
+        int sizes[] = { 255, 255, 255 };
+        typedef cv::Point3_<uint8_t> Pixel;
+
+        Mat_<Pixel> image = Mat::zeros(3, sizes, CV_8UC3);
+
+        image.forEach<Pixel>([&](Pixel& pixel, const int position[]) -> void {
+            pixel.x = position[0];
+            pixel.y = position[1];
+            pixel.z = position[2];
+        });
+    @endcode
+     */
+    template<typename _Tp, typename Functor> void forEach(const Functor& operation);
+    /** @overload */
+    template<typename _Tp, typename Functor> void forEach(const Functor& operation) const;
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+    Mat(Mat&& m);
+    Mat& operator = (Mat&& m);
+#endif
+
+    enum { MAGIC_VAL  = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
+    enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 };
+
+    /*! includes several bit-fields:
+         - the magic signature
+         - continuity flag
+         - depth
+         - number of channels
+     */
+    int flags;
+    //! the matrix dimensionality, >= 2
+    int dims;
+    //! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions
+    int rows, cols;
+    //! pointer to the data
+    uchar* data;
+
+    //! helper fields used in locateROI and adjustROI
+    const uchar* datastart;
+    const uchar* dataend;
+    const uchar* datalimit;
+
+    //! custom allocator
+    MatAllocator* allocator;
+    //! and the standard allocator
+    static MatAllocator* getStdAllocator();
+    static MatAllocator* getDefaultAllocator();
+    static void setDefaultAllocator(MatAllocator* allocator);
+
+    //! interaction with UMat
+    UMatData* u;
+
+    MatSize size;
+    MatStep step;
+
+protected:
+    template<typename _Tp, typename Functor> void forEach_impl(const Functor& operation);
+};
+
+
+///////////////////////////////// Mat_<_Tp> ////////////////////////////////////
+
+/** @brief Template matrix class derived from Mat
+
+@code
+    template<typename _Tp> class Mat_ : public Mat
+    {
+    public:
+        // ... some specific methods
+        //         and
+        // no new extra fields
+    };
+@endcode
+The class `Mat_<_Tp>` is a *thin* template wrapper on top of the Mat class. It does not have any
+extra data fields. Nor this class nor Mat has any virtual methods. Thus, references or pointers to
+these two classes can be freely but carefully converted one to another. For example:
+@code
+    // create a 100x100 8-bit matrix
+    Mat M(100,100,CV_8U);
+    // this will be compiled fine. no any data conversion will be done.
+    Mat_<float>& M1 = (Mat_<float>&)M;
+    // the program is likely to crash at the statement below
+    M1(99,99) = 1.f;
+@endcode
+While Mat is sufficient in most cases, Mat_ can be more convenient if you use a lot of element
+access operations and if you know matrix type at the compilation time. Note that
+`Mat::at(int y,int x)` and `Mat_::operator()(int y,int x)` do absolutely the same
+and run at the same speed, but the latter is certainly shorter:
+@code
+    Mat_<double> M(20,20);
+    for(int i = 0; i < M.rows; i++)
+        for(int j = 0; j < M.cols; j++)
+            M(i,j) = 1./(i+j+1);
+    Mat E, V;
+    eigen(M,E,V);
+    cout << E.at<double>(0,0)/E.at<double>(M.rows-1,0);
+@endcode
+To use Mat_ for multi-channel images/matrices, pass Vec as a Mat_ parameter:
+@code
+    // allocate a 320x240 color image and fill it with green (in RGB space)
+    Mat_<Vec3b> img(240, 320, Vec3b(0,255,0));
+    // now draw a diagonal white line
+    for(int i = 0; i < 100; i++)
+        img(i,i)=Vec3b(255,255,255);
+    // and now scramble the 2nd (red) channel of each pixel
+    for(int i = 0; i < img.rows; i++)
+        for(int j = 0; j < img.cols; j++)
+            img(i,j)[2] ^= (uchar)(i ^ j);
+@endcode
+ */
+template<typename _Tp> class Mat_ : public Mat
+{
+public:
+    typedef _Tp value_type;
+    typedef typename DataType<_Tp>::channel_type channel_type;
+    typedef MatIterator_<_Tp> iterator;
+    typedef MatConstIterator_<_Tp> const_iterator;
+
+    //! default constructor
+    Mat_();
+    //! equivalent to Mat(_rows, _cols, DataType<_Tp>::type)
+    Mat_(int _rows, int _cols);
+    //! constructor that sets each matrix element to specified value
+    Mat_(int _rows, int _cols, const _Tp& value);
+    //! equivalent to Mat(_size, DataType<_Tp>::type)
+    explicit Mat_(Size _size);
+    //! constructor that sets each matrix element to specified value
+    Mat_(Size _size, const _Tp& value);
+    //! n-dim array constructor
+    Mat_(int _ndims, const int* _sizes);
+    //! n-dim array constructor that sets each matrix element to specified value
+    Mat_(int _ndims, const int* _sizes, const _Tp& value);
+    //! copy/conversion contructor. If m is of different type, it's converted
+    Mat_(const Mat& m);
+    //! copy constructor
+    Mat_(const Mat_& m);
+    //! constructs a matrix on top of user-allocated data. step is in bytes(!!!), regardless of the type
+    Mat_(int _rows, int _cols, _Tp* _data, size_t _step=AUTO_STEP);
+    //! constructs n-dim matrix on top of user-allocated data. steps are in bytes(!!!), regardless of the type
+    Mat_(int _ndims, const int* _sizes, _Tp* _data, const size_t* _steps=0);
+    //! selects a submatrix
+    Mat_(const Mat_& m, const Range& rowRange, const Range& colRange=Range::all());
+    //! selects a submatrix
+    Mat_(const Mat_& m, const Rect& roi);
+    //! selects a submatrix, n-dim version
+    Mat_(const Mat_& m, const Range* ranges);
+    //! selects a submatrix, n-dim version
+    Mat_(const Mat_& m, const std::vector<Range>& ranges);
+    //! from a matrix expression
+    explicit Mat_(const MatExpr& e);
+    //! makes a matrix out of Vec, std::vector, Point_ or Point3_. The matrix will have a single column
+    explicit Mat_(const std::vector<_Tp>& vec, bool copyData=false);
+    template<int n> explicit Mat_(const Vec<typename DataType<_Tp>::channel_type, n>& vec, bool copyData=true);
+    template<int m, int n> explicit Mat_(const Matx<typename DataType<_Tp>::channel_type, m, n>& mtx, bool copyData=true);
+    explicit Mat_(const Point_<typename DataType<_Tp>::channel_type>& pt, bool copyData=true);
+    explicit Mat_(const Point3_<typename DataType<_Tp>::channel_type>& pt, bool copyData=true);
+    explicit Mat_(const MatCommaInitializer_<_Tp>& commaInitializer);
+
+    Mat_& operator = (const Mat& m);
+    Mat_& operator = (const Mat_& m);
+    //! set all the elements to s.
+    Mat_& operator = (const _Tp& s);
+    //! assign a matrix expression
+    Mat_& operator = (const MatExpr& e);
+
+    //! iterators; they are smart enough to skip gaps in the end of rows
+    iterator begin();
+    iterator end();
+    const_iterator begin() const;
+    const_iterator end() const;
+
+    //! template methods for for operation over all matrix elements.
+    // the operations take care of skipping gaps in the end of rows (if any)
+    template<typename Functor> void forEach(const Functor& operation);
+    template<typename Functor> void forEach(const Functor& operation) const;
+
+    //! equivalent to Mat::create(_rows, _cols, DataType<_Tp>::type)
+    void create(int _rows, int _cols);
+    //! equivalent to Mat::create(_size, DataType<_Tp>::type)
+    void create(Size _size);
+    //! equivalent to Mat::create(_ndims, _sizes, DatType<_Tp>::type)
+    void create(int _ndims, const int* _sizes);
+    //! cross-product
+    Mat_ cross(const Mat_& m) const;
+    //! data type conversion
+    template<typename T2> operator Mat_<T2>() const;
+    //! overridden forms of Mat::row() etc.
+    Mat_ row(int y) const;
+    Mat_ col(int x) const;
+    Mat_ diag(int d=0) const;
+    Mat_ clone() const;
+
+    //! overridden forms of Mat::elemSize() etc.
+    size_t elemSize() const;
+    size_t elemSize1() const;
+    int type() const;
+    int depth() const;
+    int channels() const;
+    size_t step1(int i=0) const;
+    //! returns step()/sizeof(_Tp)
+    size_t stepT(int i=0) const;
+
+    //! overridden forms of Mat::zeros() etc. Data type is omitted, of course
+    static MatExpr zeros(int rows, int cols);
+    static MatExpr zeros(Size size);
+    static MatExpr zeros(int _ndims, const int* _sizes);
+    static MatExpr ones(int rows, int cols);
+    static MatExpr ones(Size size);
+    static MatExpr ones(int _ndims, const int* _sizes);
+    static MatExpr eye(int rows, int cols);
+    static MatExpr eye(Size size);
+
+    //! some more overriden methods
+    Mat_& adjustROI( int dtop, int dbottom, int dleft, int dright );
+    Mat_ operator()( const Range& rowRange, const Range& colRange ) const;
+    Mat_ operator()( const Rect& roi ) const;
+    Mat_ operator()( const Range* ranges ) const;
+    Mat_ operator()(const std::vector<Range>& ranges) const;
+
+    //! more convenient forms of row and element access operators
+    _Tp* operator [](int y);
+    const _Tp* operator [](int y) const;
+
+    //! returns reference to the specified element
+    _Tp& operator ()(const int* idx);
+    //! returns read-only reference to the specified element
+    const _Tp& operator ()(const int* idx) const;
+
+    //! returns reference to the specified element
+    template<int n> _Tp& operator ()(const Vec<int, n>& idx);
+    //! returns read-only reference to the specified element
+    template<int n> const _Tp& operator ()(const Vec<int, n>& idx) const;
+
+    //! returns reference to the specified element (1D case)
+    _Tp& operator ()(int idx0);
+    //! returns read-only reference to the specified element (1D case)
+    const _Tp& operator ()(int idx0) const;
+    //! returns reference to the specified element (2D case)
+    _Tp& operator ()(int row, int col);
+    //! returns read-only reference to the specified element (2D case)
+    const _Tp& operator ()(int row, int col) const;
+    //! returns reference to the specified element (3D case)
+    _Tp& operator ()(int idx0, int idx1, int idx2);
+    //! returns read-only reference to the specified element (3D case)
+    const _Tp& operator ()(int idx0, int idx1, int idx2) const;
+
+    _Tp& operator ()(Point pt);
+    const _Tp& operator ()(Point pt) const;
+
+    //! conversion to vector.
+    operator std::vector<_Tp>() const;
+    //! conversion to Vec
+    template<int n> operator Vec<typename DataType<_Tp>::channel_type, n>() const;
+    //! conversion to Matx
+    template<int m, int n> operator Matx<typename DataType<_Tp>::channel_type, m, n>() const;
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+    Mat_(Mat_&& m);
+    Mat_& operator = (Mat_&& m);
+
+    Mat_(Mat&& m);
+    Mat_& operator = (Mat&& m);
+
+    Mat_(MatExpr&& e);
+#endif
+};
+
+typedef Mat_<uchar> Mat1b;
+typedef Mat_<Vec2b> Mat2b;
+typedef Mat_<Vec3b> Mat3b;
+typedef Mat_<Vec4b> Mat4b;
+
+typedef Mat_<short> Mat1s;
+typedef Mat_<Vec2s> Mat2s;
+typedef Mat_<Vec3s> Mat3s;
+typedef Mat_<Vec4s> Mat4s;
+
+typedef Mat_<ushort> Mat1w;
+typedef Mat_<Vec2w> Mat2w;
+typedef Mat_<Vec3w> Mat3w;
+typedef Mat_<Vec4w> Mat4w;
+
+typedef Mat_<int>   Mat1i;
+typedef Mat_<Vec2i> Mat2i;
+typedef Mat_<Vec3i> Mat3i;
+typedef Mat_<Vec4i> Mat4i;
+
+typedef Mat_<float> Mat1f;
+typedef Mat_<Vec2f> Mat2f;
+typedef Mat_<Vec3f> Mat3f;
+typedef Mat_<Vec4f> Mat4f;
+
+typedef Mat_<double> Mat1d;
+typedef Mat_<Vec2d> Mat2d;
+typedef Mat_<Vec3d> Mat3d;
+typedef Mat_<Vec4d> Mat4d;
+
+/** @todo document */
+class CV_EXPORTS UMat
+{
+public:
+    //! default constructor
+    UMat(UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    //! constructs 2D matrix of the specified size and type
+    // (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
+    UMat(int rows, int cols, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    UMat(Size size, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    //! constucts 2D matrix and fills it with the specified value _s.
+    UMat(int rows, int cols, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    UMat(Size size, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+
+    //! constructs n-dimensional matrix
+    UMat(int ndims, const int* sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    UMat(int ndims, const int* sizes, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+
+    //! copy constructor
+    UMat(const UMat& m);
+
+    //! creates a matrix header for a part of the bigger matrix
+    UMat(const UMat& m, const Range& rowRange, const Range& colRange=Range::all());
+    UMat(const UMat& m, const Rect& roi);
+    UMat(const UMat& m, const Range* ranges);
+    UMat(const UMat& m, const std::vector<Range>& ranges);
+    //! builds matrix from std::vector with or without copying the data
+    template<typename _Tp> explicit UMat(const std::vector<_Tp>& vec, bool copyData=false);
+    //! builds matrix from cv::Vec; the data is copied by default
+    template<typename _Tp, int n> explicit UMat(const Vec<_Tp, n>& vec, bool copyData=true);
+    //! builds matrix from cv::Matx; the data is copied by default
+    template<typename _Tp, int m, int n> explicit UMat(const Matx<_Tp, m, n>& mtx, bool copyData=true);
+    //! builds matrix from a 2D point
+    template<typename _Tp> explicit UMat(const Point_<_Tp>& pt, bool copyData=true);
+    //! builds matrix from a 3D point
+    template<typename _Tp> explicit UMat(const Point3_<_Tp>& pt, bool copyData=true);
+    //! builds matrix from comma initializer
+    template<typename _Tp> explicit UMat(const MatCommaInitializer_<_Tp>& commaInitializer);
+
+    //! destructor - calls release()
+    ~UMat();
+    //! assignment operators
+    UMat& operator = (const UMat& m);
+
+    Mat getMat(int flags) const;
+
+    //! returns a new matrix header for the specified row
+    UMat row(int y) const;
+    //! returns a new matrix header for the specified column
+    UMat col(int x) const;
+    //! ... for the specified row span
+    UMat rowRange(int startrow, int endrow) const;
+    UMat rowRange(const Range& r) const;
+    //! ... for the specified column span
+    UMat colRange(int startcol, int endcol) const;
+    UMat colRange(const Range& r) const;
+    //! ... for the specified diagonal
+    // (d=0 - the main diagonal,
+    //  >0 - a diagonal from the lower half,
+    //  <0 - a diagonal from the upper half)
+    UMat diag(int d=0) const;
+    //! constructs a square diagonal matrix which main diagonal is vector "d"
+    static UMat diag(const UMat& d);
+
+    //! returns deep copy of the matrix, i.e. the data is copied
+    UMat clone() const;
+    //! copies the matrix content to "m".
+    // It calls m.create(this->size(), this->type()).
+    void copyTo( OutputArray m ) const;
+    //! copies those matrix elements to "m" that are marked with non-zero mask elements.
+    void copyTo( OutputArray m, InputArray mask ) const;
+    //! converts matrix to another datatype with optional scalng. See cvConvertScale.
+    void convertTo( OutputArray m, int rtype, double alpha=1, double beta=0 ) const;
+
+    void assignTo( UMat& m, int type=-1 ) const;
+
+    //! sets every matrix element to s
+    UMat& operator = (const Scalar& s);
+    //! sets some of the matrix elements to s, according to the mask
+    UMat& setTo(InputArray value, InputArray mask=noArray());
+    //! creates alternative matrix header for the same data, with different
+    // number of channels and/or different number of rows. see cvReshape.
+    UMat reshape(int cn, int rows=0) const;
+    UMat reshape(int cn, int newndims, const int* newsz) const;
+
+    //! matrix transposition by means of matrix expressions
+    UMat t() const;
+    //! matrix inversion by means of matrix expressions
+    UMat inv(int method=DECOMP_LU) const;
+    //! per-element matrix multiplication by means of matrix expressions
+    UMat mul(InputArray m, double scale=1) const;
+
+    //! computes dot-product
+    double dot(InputArray m) const;
+
+    //! Matlab-style matrix initialization
+    static UMat zeros(int rows, int cols, int type);
+    static UMat zeros(Size size, int type);
+    static UMat zeros(int ndims, const int* sz, int type);
+    static UMat ones(int rows, int cols, int type);
+    static UMat ones(Size size, int type);
+    static UMat ones(int ndims, const int* sz, int type);
+    static UMat eye(int rows, int cols, int type);
+    static UMat eye(Size size, int type);
+
+    //! allocates new matrix data unless the matrix already has specified size and type.
+    // previous data is unreferenced if needed.
+    void create(int rows, int cols, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    void create(Size size, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    void create(int ndims, const int* sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    void create(const std::vector<int>& sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+
+    //! increases the reference counter; use with care to avoid memleaks
+    void addref();
+    //! decreases reference counter;
+    // deallocates the data when reference counter reaches 0.
+    void release();
+
+    //! deallocates the matrix data
+    void deallocate();
+    //! internal use function; properly re-allocates _size, _step arrays
+    void copySize(const UMat& m);
+
+    //! locates matrix header within a parent matrix. See below
+    void locateROI( Size& wholeSize, Point& ofs ) const;
+    //! moves/resizes the current matrix ROI inside the parent matrix.
+    UMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+    //! extracts a rectangular sub-matrix
+    // (this is a generalized form of row, rowRange etc.)
+    UMat operator()( Range rowRange, Range colRange ) const;
+    UMat operator()( const Rect& roi ) const;
+    UMat operator()( const Range* ranges ) const;
+    UMat operator()(const std::vector<Range>& ranges) const;
+
+    //! returns true iff the matrix data is continuous
+    // (i.e. when there are no gaps between successive rows).
+    // similar to CV_IS_MAT_CONT(cvmat->type)
+    bool isContinuous() const;
+
+    //! returns true if the matrix is a submatrix of another matrix
+    bool isSubmatrix() const;
+
+    //! returns element size in bytes,
+    // similar to CV_ELEM_SIZE(cvmat->type)
+    size_t elemSize() const;
+    //! returns the size of element channel in bytes.
+    size_t elemSize1() const;
+    //! returns element type, similar to CV_MAT_TYPE(cvmat->type)
+    int type() const;
+    //! returns element type, similar to CV_MAT_DEPTH(cvmat->type)
+    int depth() const;
+    //! returns element type, similar to CV_MAT_CN(cvmat->type)
+    int channels() const;
+    //! returns step/elemSize1()
+    size_t step1(int i=0) const;
+    //! returns true if matrix data is NULL
+    bool empty() const;
+    //! returns the total number of matrix elements
+    size_t total() const;
+
+    //! returns N if the matrix is 1-channel (N x ptdim) or ptdim-channel (1 x N) or (N x 1); negative number otherwise
+    int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const;
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+    UMat(UMat&& m);
+    UMat& operator = (UMat&& m);
+#endif
+
+    void* handle(int accessFlags) const;
+    void ndoffset(size_t* ofs) const;
+
+    enum { MAGIC_VAL  = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
+    enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 };
+
+    /*! includes several bit-fields:
+         - the magic signature
+         - continuity flag
+         - depth
+         - number of channels
+     */
+    int flags;
+    //! the matrix dimensionality, >= 2
+    int dims;
+    //! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions
+    int rows, cols;
+
+    //! custom allocator
+    MatAllocator* allocator;
+    UMatUsageFlags usageFlags; // usage flags for allocator
+    //! and the standard allocator
+    static MatAllocator* getStdAllocator();
+
+    // black-box container of UMat data
+    UMatData* u;
+
+    // offset of the submatrix (or 0)
+    size_t offset;
+
+    MatSize size;
+    MatStep step;
+
+protected:
+};
+
+
+/////////////////////////// multi-dimensional sparse matrix //////////////////////////
+
+/** @brief The class SparseMat represents multi-dimensional sparse numerical arrays.
+
+Such a sparse array can store elements of any type that Mat can store. *Sparse* means that only
+non-zero elements are stored (though, as a result of operations on a sparse matrix, some of its
+stored elements can actually become 0. It is up to you to detect such elements and delete them
+using SparseMat::erase ). The non-zero elements are stored in a hash table that grows when it is
+filled so that the search time is O(1) in average (regardless of whether element is there or not).
+Elements can be accessed using the following methods:
+-   Query operations (SparseMat::ptr and the higher-level SparseMat::ref, SparseMat::value and
+    SparseMat::find), for example:
+    @code
+        const int dims = 5;
+        int size[5] = {10, 10, 10, 10, 10};
+        SparseMat sparse_mat(dims, size, CV_32F);
+        for(int i = 0; i < 1000; i++)
+        {
+            int idx[dims];
+            for(int k = 0; k < dims; k++)
+                idx[k] = rand() % size[k];
+            sparse_mat.ref<float>(idx) += 1.f;
+        }
+        cout << "nnz = " << sparse_mat.nzcount() << endl;
+    @endcode
+-   Sparse matrix iterators. They are similar to MatIterator but different from NAryMatIterator.
+    That is, the iteration loop is familiar to STL users:
+    @code
+        // prints elements of a sparse floating-point matrix
+        // and the sum of elements.
+        SparseMatConstIterator_<float>
+            it = sparse_mat.begin<float>(),
+            it_end = sparse_mat.end<float>();
+        double s = 0;
+        int dims = sparse_mat.dims();
+        for(; it != it_end; ++it)
+        {
+            // print element indices and the element value
+            const SparseMat::Node* n = it.node();
+            printf("(");
+            for(int i = 0; i < dims; i++)
+                printf("%d%s", n->idx[i], i < dims-1 ? ", " : ")");
+            printf(": %g\n", it.value<float>());
+            s += *it;
+        }
+        printf("Element sum is %g\n", s);
+    @endcode
+    If you run this loop, you will notice that elements are not enumerated in a logical order
+    (lexicographical, and so on). They come in the same order as they are stored in the hash table
+    (semi-randomly). You may collect pointers to the nodes and sort them to get the proper ordering.
+    Note, however, that pointers to the nodes may become invalid when you add more elements to the
+    matrix. This may happen due to possible buffer reallocation.
+-   Combination of the above 2 methods when you need to process 2 or more sparse matrices
+    simultaneously. For example, this is how you can compute unnormalized cross-correlation of the 2
+    floating-point sparse matrices:
+    @code
+        double cross_corr(const SparseMat& a, const SparseMat& b)
+        {
+            const SparseMat *_a = &a, *_b = &b;
+            // if b contains less elements than a,
+            // it is faster to iterate through b
+            if(_a->nzcount() > _b->nzcount())
+                std::swap(_a, _b);
+            SparseMatConstIterator_<float> it = _a->begin<float>(),
+                                           it_end = _a->end<float>();
+            double ccorr = 0;
+            for(; it != it_end; ++it)
+            {
+                // take the next element from the first matrix
+                float avalue = *it;
+                const Node* anode = it.node();
+                // and try to find an element with the same index in the second matrix.
+                // since the hash value depends only on the element index,
+                // reuse the hash value stored in the node
+                float bvalue = _b->value<float>(anode->idx,&anode->hashval);
+                ccorr += avalue*bvalue;
+            }
+            return ccorr;
+        }
+    @endcode
+ */
+class CV_EXPORTS SparseMat
+{
+public:
+    typedef SparseMatIterator iterator;
+    typedef SparseMatConstIterator const_iterator;
+
+    enum { MAGIC_VAL=0x42FD0000, MAX_DIM=32, HASH_SCALE=0x5bd1e995, HASH_BIT=0x80000000 };
+
+    //! the sparse matrix header
+    struct CV_EXPORTS Hdr
+    {
+        Hdr(int _dims, const int* _sizes, int _type);
+        void clear();
+        int refcount;
+        int dims;
+        int valueOffset;
+        size_t nodeSize;
+        size_t nodeCount;
+        size_t freeList;
+        std::vector<uchar> pool;
+        std::vector<size_t> hashtab;
+        int size[MAX_DIM];
+    };
+
+    //! sparse matrix node - element of a hash table
+    struct CV_EXPORTS Node
+    {
+        //! hash value
+        size_t hashval;
+        //! index of the next node in the same hash table entry
+        size_t next;
+        //! index of the matrix element
+        int idx[MAX_DIM];
+    };
+
+    /** @brief Various SparseMat constructors.
+     */
+    SparseMat();
+
+    /** @overload
+    @param dims Array dimensionality.
+    @param _sizes Sparce matrix size on all dementions.
+    @param _type Sparse matrix data type.
+    */
+    SparseMat(int dims, const int* _sizes, int _type);
+
+    /** @overload
+    @param m Source matrix for copy constructor. If m is dense matrix (ocvMat) then it will be converted
+    to sparse representation.
+    */
+    SparseMat(const SparseMat& m);
+
+    /** @overload
+    @param m Source matrix for copy constructor. If m is dense matrix (ocvMat) then it will be converted
+    to sparse representation.
+    */
+    explicit SparseMat(const Mat& m);
+
+    //! the destructor
+    ~SparseMat();
+
+    //! assignment operator. This is O(1) operation, i.e. no data is copied
+    SparseMat& operator = (const SparseMat& m);
+    //! equivalent to the corresponding constructor
+    SparseMat& operator = (const Mat& m);
+
+    //! creates full copy of the matrix
+    SparseMat clone() const;
+
+    //! copies all the data to the destination matrix. All the previous content of m is erased
+    void copyTo( SparseMat& m ) const;
+    //! converts sparse matrix to dense matrix.
+    void copyTo( Mat& m ) const;
+    //! multiplies all the matrix elements by the specified scale factor alpha and converts the results to the specified data type
+    void convertTo( SparseMat& m, int rtype, double alpha=1 ) const;
+    //! converts sparse matrix to dense n-dim matrix with optional type conversion and scaling.
+    /*!
+        @param [out] m - output matrix; if it does not have a proper size or type before the operation,
+            it is reallocated
+        @param [in] rtype – desired output matrix type or, rather, the depth since the number of channels
+            are the same as the input has; if rtype is negative, the output matrix will have the
+            same type as the input.
+        @param [in] alpha – optional scale factor
+        @param [in] beta – optional delta added to the scaled values
+    */
+    void convertTo( Mat& m, int rtype, double alpha=1, double beta=0 ) const;
+
+    // not used now
+    void assignTo( SparseMat& m, int type=-1 ) const;
+
+    //! reallocates sparse matrix.
+    /*!
+        If the matrix already had the proper size and type,
+        it is simply cleared with clear(), otherwise,
+        the old matrix is released (using release()) and the new one is allocated.
+    */
+    void create(int dims, const int* _sizes, int _type);
+    //! sets all the sparse matrix elements to 0, which means clearing the hash table.
+    void clear();
+    //! manually increments the reference counter to the header.
+    void addref();
+    // decrements the header reference counter. When the counter reaches 0, the header and all the underlying data are deallocated.
+    void release();
+
+    //! converts sparse matrix to the old-style representation; all the elements are copied.
+    //operator CvSparseMat*() const;
+    //! returns the size of each element in bytes (not including the overhead - the space occupied by SparseMat::Node elements)
+    size_t elemSize() const;
+    //! returns elemSize()/channels()
+    size_t elemSize1() const;
+
+    //! returns type of sparse matrix elements
+    int type() const;
+    //! returns the depth of sparse matrix elements
+    int depth() const;
+    //! returns the number of channels
+    int channels() const;
+
+    //! returns the array of sizes, or NULL if the matrix is not allocated
+    const int* size() const;
+    //! returns the size of i-th matrix dimension (or 0)
+    int size(int i) const;
+    //! returns the matrix dimensionality
+    int dims() const;
+    //! returns the number of non-zero elements (=the number of hash table nodes)
+    size_t nzcount() const;
+
+    //! computes the element hash value (1D case)
+    size_t hash(int i0) const;
+    //! computes the element hash value (2D case)
+    size_t hash(int i0, int i1) const;
+    //! computes the element hash value (3D case)
+    size_t hash(int i0, int i1, int i2) const;
+    //! computes the element hash value (nD case)
+    size_t hash(const int* idx) const;
+
+    //!@{
+    /*!
+     specialized variants for 1D, 2D, 3D cases and the generic_type one for n-D case.
+     return pointer to the matrix element.
+      - if the element is there (it's non-zero), the pointer to it is returned
+      - if it's not there and createMissing=false, NULL pointer is returned
+      - if it's not there and createMissing=true, then the new element
+        is created and initialized with 0. Pointer to it is returned
+      - if the optional hashval pointer is not NULL, the element hash value is
+        not computed, but *hashval is taken instead.
+    */
+    //! returns pointer to the specified element (1D case)
+    uchar* ptr(int i0, bool createMissing, size_t* hashval=0);
+    //! returns pointer to the specified element (2D case)
+    uchar* ptr(int i0, int i1, bool createMissing, size_t* hashval=0);
+    //! returns pointer to the specified element (3D case)
+    uchar* ptr(int i0, int i1, int i2, bool createMissing, size_t* hashval=0);
+    //! returns pointer to the specified element (nD case)
+    uchar* ptr(const int* idx, bool createMissing, size_t* hashval=0);
+    //!@}
+
+    //!@{
+    /*!
+     return read-write reference to the specified sparse matrix element.
+
+     `ref<_Tp>(i0,...[,hashval])` is equivalent to `*(_Tp*)ptr(i0,...,true[,hashval])`.
+     The methods always return a valid reference.
+     If the element did not exist, it is created and initialiazed with 0.
+    */
+    //! returns reference to the specified element (1D case)
+    template<typename _Tp> _Tp& ref(int i0, size_t* hashval=0);
+    //! returns reference to the specified element (2D case)
+    template<typename _Tp> _Tp& ref(int i0, int i1, size_t* hashval=0);
+    //! returns reference to the specified element (3D case)
+    template<typename _Tp> _Tp& ref(int i0, int i1, int i2, size_t* hashval=0);
+    //! returns reference to the specified element (nD case)
+    template<typename _Tp> _Tp& ref(const int* idx, size_t* hashval=0);
+    //!@}
+
+    //!@{
+    /*!
+     return value of the specified sparse matrix element.
+
+     `value<_Tp>(i0,...[,hashval])` is equivalent to
+     @code
+     { const _Tp* p = find<_Tp>(i0,...[,hashval]); return p ? *p : _Tp(); }
+     @endcode
+
+     That is, if the element did not exist, the methods return 0.
+     */
+    //! returns value of the specified element (1D case)
+    template<typename _Tp> _Tp value(int i0, size_t* hashval=0) const;
+    //! returns value of the specified element (2D case)
+    template<typename _Tp> _Tp value(int i0, int i1, size_t* hashval=0) const;
+    //! returns value of the specified element (3D case)
+    template<typename _Tp> _Tp value(int i0, int i1, int i2, size_t* hashval=0) const;
+    //! returns value of the specified element (nD case)
+    template<typename _Tp> _Tp value(const int* idx, size_t* hashval=0) const;
+    //!@}
+
+    //!@{
+    /*!
+     Return pointer to the specified sparse matrix element if it exists
+
+     `find<_Tp>(i0,...[,hashval])` is equivalent to `(_const Tp*)ptr(i0,...false[,hashval])`.
+
+     If the specified element does not exist, the methods return NULL.
+    */
+    //! returns pointer to the specified element (1D case)
+    template<typename _Tp> const _Tp* find(int i0, size_t* hashval=0) const;
+    //! returns pointer to the specified element (2D case)
+    template<typename _Tp> const _Tp* find(int i0, int i1, size_t* hashval=0) const;
+    //! returns pointer to the specified element (3D case)
+    template<typename _Tp> const _Tp* find(int i0, int i1, int i2, size_t* hashval=0) const;
+    //! returns pointer to the specified element (nD case)
+    template<typename _Tp> const _Tp* find(const int* idx, size_t* hashval=0) const;
+    //!@}
+
+    //! erases the specified element (2D case)
+    void erase(int i0, int i1, size_t* hashval=0);
+    //! erases the specified element (3D case)
+    void erase(int i0, int i1, int i2, size_t* hashval=0);
+    //! erases the specified element (nD case)
+    void erase(const int* idx, size_t* hashval=0);
+
+    //!@{
+    /*!
+       return the sparse matrix iterator pointing to the first sparse matrix element
+    */
+    //! returns the sparse matrix iterator at the matrix beginning
+    SparseMatIterator begin();
+    //! returns the sparse matrix iterator at the matrix beginning
+    template<typename _Tp> SparseMatIterator_<_Tp> begin();
+    //! returns the read-only sparse matrix iterator at the matrix beginning
+    SparseMatConstIterator begin() const;
+    //! returns the read-only sparse matrix iterator at the matrix beginning
+    template<typename _Tp> SparseMatConstIterator_<_Tp> begin() const;
+    //!@}
+    /*!
+       return the sparse matrix iterator pointing to the element following the last sparse matrix element
+    */
+    //! returns the sparse matrix iterator at the matrix end
+    SparseMatIterator end();
+    //! returns the read-only sparse matrix iterator at the matrix end
+    SparseMatConstIterator end() const;
+    //! returns the typed sparse matrix iterator at the matrix end
+    template<typename _Tp> SparseMatIterator_<_Tp> end();
+    //! returns the typed read-only sparse matrix iterator at the matrix end
+    template<typename _Tp> SparseMatConstIterator_<_Tp> end() const;
+
+    //! returns the value stored in the sparse martix node
+    template<typename _Tp> _Tp& value(Node* n);
+    //! returns the value stored in the sparse martix node
+    template<typename _Tp> const _Tp& value(const Node* n) const;
+
+    ////////////// some internal-use methods ///////////////
+    Node* node(size_t nidx);
+    const Node* node(size_t nidx) const;
+
+    uchar* newNode(const int* idx, size_t hashval);
+    void removeNode(size_t hidx, size_t nidx, size_t previdx);
+    void resizeHashTab(size_t newsize);
+
+    int flags;
+    Hdr* hdr;
+};
+
+
+
+///////////////////////////////// SparseMat_<_Tp> ////////////////////////////////////
+
+/** @brief Template sparse n-dimensional array class derived from SparseMat
+
+SparseMat_ is a thin wrapper on top of SparseMat created in the same way as Mat_ . It simplifies
+notation of some operations:
+@code
+    int sz[] = {10, 20, 30};
+    SparseMat_<double> M(3, sz);
+    ...
+    M.ref(1, 2, 3) = M(4, 5, 6) + M(7, 8, 9);
+@endcode
+ */
+template<typename _Tp> class SparseMat_ : public SparseMat
+{
+public:
+    typedef SparseMatIterator_<_Tp> iterator;
+    typedef SparseMatConstIterator_<_Tp> const_iterator;
+
+    //! the default constructor
+    SparseMat_();
+    //! the full constructor equivelent to SparseMat(dims, _sizes, DataType<_Tp>::type)
+    SparseMat_(int dims, const int* _sizes);
+    //! the copy constructor. If DataType<_Tp>.type != m.type(), the m elements are converted
+    SparseMat_(const SparseMat& m);
+    //! the copy constructor. This is O(1) operation - no data is copied
+    SparseMat_(const SparseMat_& m);
+    //! converts dense matrix to the sparse form
+    SparseMat_(const Mat& m);
+    //! converts the old-style sparse matrix to the C++ class. All the elements are copied
+    //SparseMat_(const CvSparseMat* m);
+    //! the assignment operator. If DataType<_Tp>.type != m.type(), the m elements are converted
+    SparseMat_& operator = (const SparseMat& m);
+    //! the assignment operator. This is O(1) operation - no data is copied
+    SparseMat_& operator = (const SparseMat_& m);
+    //! converts dense matrix to the sparse form
+    SparseMat_& operator = (const Mat& m);
+
+    //! makes full copy of the matrix. All the elements are duplicated
+    SparseMat_ clone() const;
+    //! equivalent to cv::SparseMat::create(dims, _sizes, DataType<_Tp>::type)
+    void create(int dims, const int* _sizes);
+    //! converts sparse matrix to the old-style CvSparseMat. All the elements are copied
+    //operator CvSparseMat*() const;
+
+    //! returns type of the matrix elements
+    int type() const;
+    //! returns depth of the matrix elements
+    int depth() const;
+    //! returns the number of channels in each matrix element
+    int channels() const;
+
+    //! equivalent to SparseMat::ref<_Tp>(i0, hashval)
+    _Tp& ref(int i0, size_t* hashval=0);
+    //! equivalent to SparseMat::ref<_Tp>(i0, i1, hashval)
+    _Tp& ref(int i0, int i1, size_t* hashval=0);
+    //! equivalent to SparseMat::ref<_Tp>(i0, i1, i2, hashval)
+    _Tp& ref(int i0, int i1, int i2, size_t* hashval=0);
+    //! equivalent to SparseMat::ref<_Tp>(idx, hashval)
+    _Tp& ref(const int* idx, size_t* hashval=0);
+
+    //! equivalent to SparseMat::value<_Tp>(i0, hashval)
+    _Tp operator()(int i0, size_t* hashval=0) const;
+    //! equivalent to SparseMat::value<_Tp>(i0, i1, hashval)
+    _Tp operator()(int i0, int i1, size_t* hashval=0) const;
+    //! equivalent to SparseMat::value<_Tp>(i0, i1, i2, hashval)
+    _Tp operator()(int i0, int i1, int i2, size_t* hashval=0) const;
+    //! equivalent to SparseMat::value<_Tp>(idx, hashval)
+    _Tp operator()(const int* idx, size_t* hashval=0) const;
+
+    //! returns sparse matrix iterator pointing to the first sparse matrix element
+    SparseMatIterator_<_Tp> begin();
+    //! returns read-only sparse matrix iterator pointing to the first sparse matrix element
+    SparseMatConstIterator_<_Tp> begin() const;
+    //! returns sparse matrix iterator pointing to the element following the last sparse matrix element
+    SparseMatIterator_<_Tp> end();
+    //! returns read-only sparse matrix iterator pointing to the element following the last sparse matrix element
+    SparseMatConstIterator_<_Tp> end() const;
+};
+
+
+
+////////////////////////////////// MatConstIterator //////////////////////////////////
+
+class CV_EXPORTS MatConstIterator
+{
+public:
+    typedef uchar* value_type;
+    typedef ptrdiff_t difference_type;
+    typedef const uchar** pointer;
+    typedef uchar* reference;
+
+#ifndef OPENCV_NOSTL
+    typedef std::random_access_iterator_tag iterator_category;
+#endif
+
+    //! default constructor
+    MatConstIterator();
+    //! constructor that sets the iterator to the beginning of the matrix
+    MatConstIterator(const Mat* _m);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatConstIterator(const Mat* _m, int _row, int _col=0);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatConstIterator(const Mat* _m, Point _pt);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatConstIterator(const Mat* _m, const int* _idx);
+    //! copy constructor
+    MatConstIterator(const MatConstIterator& it);
+
+    //! copy operator
+    MatConstIterator& operator = (const MatConstIterator& it);
+    //! returns the current matrix element
+    const uchar* operator *() const;
+    //! returns the i-th matrix element, relative to the current
+    const uchar* operator [](ptrdiff_t i) const;
+
+    //! shifts the iterator forward by the specified number of elements
+    MatConstIterator& operator += (ptrdiff_t ofs);
+    //! shifts the iterator backward by the specified number of elements
+    MatConstIterator& operator -= (ptrdiff_t ofs);
+    //! decrements the iterator
+    MatConstIterator& operator --();
+    //! decrements the iterator
+    MatConstIterator operator --(int);
+    //! increments the iterator
+    MatConstIterator& operator ++();
+    //! increments the iterator
+    MatConstIterator operator ++(int);
+    //! returns the current iterator position
+    Point pos() const;
+    //! returns the current iterator position
+    void pos(int* _idx) const;
+
+    ptrdiff_t lpos() const;
+    void seek(ptrdiff_t ofs, bool relative = false);
+    void seek(const int* _idx, bool relative = false);
+
+    const Mat* m;
+    size_t elemSize;
+    const uchar* ptr;
+    const uchar* sliceStart;
+    const uchar* sliceEnd;
+};
+
+
+
+////////////////////////////////// MatConstIterator_ /////////////////////////////////
+
+/** @brief Matrix read-only iterator
+ */
+template<typename _Tp>
+class MatConstIterator_ : public MatConstIterator
+{
+public:
+    typedef _Tp value_type;
+    typedef ptrdiff_t difference_type;
+    typedef const _Tp* pointer;
+    typedef const _Tp& reference;
+
+#ifndef OPENCV_NOSTL
+    typedef std::random_access_iterator_tag iterator_category;
+#endif
+
+    //! default constructor
+    MatConstIterator_();
+    //! constructor that sets the iterator to the beginning of the matrix
+    MatConstIterator_(const Mat_<_Tp>* _m);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatConstIterator_(const Mat_<_Tp>* _m, int _row, int _col=0);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatConstIterator_(const Mat_<_Tp>* _m, Point _pt);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatConstIterator_(const Mat_<_Tp>* _m, const int* _idx);
+    //! copy constructor
+    MatConstIterator_(const MatConstIterator_& it);
+
+    //! copy operator
+    MatConstIterator_& operator = (const MatConstIterator_& it);
+    //! returns the current matrix element
+    const _Tp& operator *() const;
+    //! returns the i-th matrix element, relative to the current
+    const _Tp& operator [](ptrdiff_t i) const;
+
+    //! shifts the iterator forward by the specified number of elements
+    MatConstIterator_& operator += (ptrdiff_t ofs);
+    //! shifts the iterator backward by the specified number of elements
+    MatConstIterator_& operator -= (ptrdiff_t ofs);
+    //! decrements the iterator
+    MatConstIterator_& operator --();
+    //! decrements the iterator
+    MatConstIterator_ operator --(int);
+    //! increments the iterator
+    MatConstIterator_& operator ++();
+    //! increments the iterator
+    MatConstIterator_ operator ++(int);
+    //! returns the current iterator position
+    Point pos() const;
+};
+
+
+
+//////////////////////////////////// MatIterator_ ////////////////////////////////////
+
+/** @brief Matrix read-write iterator
+*/
+template<typename _Tp>
+class MatIterator_ : public MatConstIterator_<_Tp>
+{
+public:
+    typedef _Tp* pointer;
+    typedef _Tp& reference;
+
+#ifndef OPENCV_NOSTL
+    typedef std::random_access_iterator_tag iterator_category;
+#endif
+
+    //! the default constructor
+    MatIterator_();
+    //! constructor that sets the iterator to the beginning of the matrix
+    MatIterator_(Mat_<_Tp>* _m);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatIterator_(Mat_<_Tp>* _m, int _row, int _col=0);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatIterator_(Mat_<_Tp>* _m, Point _pt);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatIterator_(Mat_<_Tp>* _m, const int* _idx);
+    //! copy constructor
+    MatIterator_(const MatIterator_& it);
+    //! copy operator
+    MatIterator_& operator = (const MatIterator_<_Tp>& it );
+
+    //! returns the current matrix element
+    _Tp& operator *() const;
+    //! returns the i-th matrix element, relative to the current
+    _Tp& operator [](ptrdiff_t i) const;
+
+    //! shifts the iterator forward by the specified number of elements
+    MatIterator_& operator += (ptrdiff_t ofs);
+    //! shifts the iterator backward by the specified number of elements
+    MatIterator_& operator -= (ptrdiff_t ofs);
+    //! decrements the iterator
+    MatIterator_& operator --();
+    //! decrements the iterator
+    MatIterator_ operator --(int);
+    //! increments the iterator
+    MatIterator_& operator ++();
+    //! increments the iterator
+    MatIterator_ operator ++(int);
+};
+
+
+
+/////////////////////////////// SparseMatConstIterator ///////////////////////////////
+
+/**  @brief Read-Only Sparse Matrix Iterator.
+
+ Here is how to use the iterator to compute the sum of floating-point sparse matrix elements:
+
+ \code
+ SparseMatConstIterator it = m.begin(), it_end = m.end();
+ double s = 0;
+ CV_Assert( m.type() == CV_32F );
+ for( ; it != it_end; ++it )
+    s += it.value<float>();
+ \endcode
+*/
+class CV_EXPORTS SparseMatConstIterator
+{
+public:
+    //! the default constructor
+    SparseMatConstIterator();
+    //! the full constructor setting the iterator to the first sparse matrix element
+    SparseMatConstIterator(const SparseMat* _m);
+    //! the copy constructor
+    SparseMatConstIterator(const SparseMatConstIterator& it);
+
+    //! the assignment operator
+    SparseMatConstIterator& operator = (const SparseMatConstIterator& it);
+
+    //! template method returning the current matrix element
+    template<typename _Tp> const _Tp& value() const;
+    //! returns the current node of the sparse matrix. it.node->idx is the current element index
+    const SparseMat::Node* node() const;
+
+    //! moves iterator to the previous element
+    SparseMatConstIterator& operator --();
+    //! moves iterator to the previous element
+    SparseMatConstIterator operator --(int);
+    //! moves iterator to the next element
+    SparseMatConstIterator& operator ++();
+    //! moves iterator to the next element
+    SparseMatConstIterator operator ++(int);
+
+    //! moves iterator to the element after the last element
+    void seekEnd();
+
+    const SparseMat* m;
+    size_t hashidx;
+    uchar* ptr;
+};
+
+
+
+////////////////////////////////// SparseMatIterator /////////////////////////////////
+
+/** @brief  Read-write Sparse Matrix Iterator
+
+ The class is similar to cv::SparseMatConstIterator,
+ but can be used for in-place modification of the matrix elements.
+*/
+class CV_EXPORTS SparseMatIterator : public SparseMatConstIterator
+{
+public:
+    //! the default constructor
+    SparseMatIterator();
+    //! the full constructor setting the iterator to the first sparse matrix element
+    SparseMatIterator(SparseMat* _m);
+    //! the full constructor setting the iterator to the specified sparse matrix element
+    SparseMatIterator(SparseMat* _m, const int* idx);
+    //! the copy constructor
+    SparseMatIterator(const SparseMatIterator& it);
+
+    //! the assignment operator
+    SparseMatIterator& operator = (const SparseMatIterator& it);
+    //! returns read-write reference to the current sparse matrix element
+    template<typename _Tp> _Tp& value() const;
+    //! returns pointer to the current sparse matrix node. it.node->idx is the index of the current element (do not modify it!)
+    SparseMat::Node* node() const;
+
+    //! moves iterator to the next element
+    SparseMatIterator& operator ++();
+    //! moves iterator to the next element
+    SparseMatIterator operator ++(int);
+};
+
+
+
+/////////////////////////////// SparseMatConstIterator_ //////////////////////////////
+
+/** @brief  Template Read-Only Sparse Matrix Iterator Class.
+
+ This is the derived from SparseMatConstIterator class that
+ introduces more convenient operator *() for accessing the current element.
+*/
+template<typename _Tp> class SparseMatConstIterator_ : public SparseMatConstIterator
+{
+public:
+
+#ifndef OPENCV_NOSTL
+    typedef std::forward_iterator_tag iterator_category;
+#endif
+
+    //! the default constructor
+    SparseMatConstIterator_();
+    //! the full constructor setting the iterator to the first sparse matrix element
+    SparseMatConstIterator_(const SparseMat_<_Tp>* _m);
+    SparseMatConstIterator_(const SparseMat* _m);
+    //! the copy constructor
+    SparseMatConstIterator_(const SparseMatConstIterator_& it);
+
+    //! the assignment operator
+    SparseMatConstIterator_& operator = (const SparseMatConstIterator_& it);
+    //! the element access operator
+    const _Tp& operator *() const;
+
+    //! moves iterator to the next element
+    SparseMatConstIterator_& operator ++();
+    //! moves iterator to the next element
+    SparseMatConstIterator_ operator ++(int);
+};
+
+
+
+///////////////////////////////// SparseMatIterator_ /////////////////////////////////
+
+/** @brief  Template Read-Write Sparse Matrix Iterator Class.
+
+ This is the derived from cv::SparseMatConstIterator_ class that
+ introduces more convenient operator *() for accessing the current element.
+*/
+template<typename _Tp> class SparseMatIterator_ : public SparseMatConstIterator_<_Tp>
+{
+public:
+
+#ifndef OPENCV_NOSTL
+    typedef std::forward_iterator_tag iterator_category;
+#endif
+
+    //! the default constructor
+    SparseMatIterator_();
+    //! the full constructor setting the iterator to the first sparse matrix element
+    SparseMatIterator_(SparseMat_<_Tp>* _m);
+    SparseMatIterator_(SparseMat* _m);
+    //! the copy constructor
+    SparseMatIterator_(const SparseMatIterator_& it);
+
+    //! the assignment operator
+    SparseMatIterator_& operator = (const SparseMatIterator_& it);
+    //! returns the reference to the current element
+    _Tp& operator *() const;
+
+    //! moves the iterator to the next element
+    SparseMatIterator_& operator ++();
+    //! moves the iterator to the next element
+    SparseMatIterator_ operator ++(int);
+};
+
+
+
+/////////////////////////////////// NAryMatIterator //////////////////////////////////
+
+/** @brief n-ary multi-dimensional array iterator.
+
+Use the class to implement unary, binary, and, generally, n-ary element-wise operations on
+multi-dimensional arrays. Some of the arguments of an n-ary function may be continuous arrays, some
+may be not. It is possible to use conventional MatIterator 's for each array but incrementing all of
+the iterators after each small operations may be a big overhead. In this case consider using
+NAryMatIterator to iterate through several matrices simultaneously as long as they have the same
+geometry (dimensionality and all the dimension sizes are the same). On each iteration `it.planes[0]`,
+`it.planes[1]`,... will be the slices of the corresponding matrices.
+
+The example below illustrates how you can compute a normalized and threshold 3D color histogram:
+@code
+    void computeNormalizedColorHist(const Mat& image, Mat& hist, int N, double minProb)
+    {
+        const int histSize[] = {N, N, N};
+
+        // make sure that the histogram has a proper size and type
+        hist.create(3, histSize, CV_32F);
+
+        // and clear it
+        hist = Scalar(0);
+
+        // the loop below assumes that the image
+        // is a 8-bit 3-channel. check it.
+        CV_Assert(image.type() == CV_8UC3);
+        MatConstIterator_<Vec3b> it = image.begin<Vec3b>(),
+                                 it_end = image.end<Vec3b>();
+        for( ; it != it_end; ++it )
+        {
+            const Vec3b& pix = *it;
+            hist.at<float>(pix[0]*N/256, pix[1]*N/256, pix[2]*N/256) += 1.f;
+        }
+
+        minProb *= image.rows*image.cols;
+
+        // initialize iterator (the style is different from STL).
+        // after initialization the iterator will contain
+        // the number of slices or planes the iterator will go through.
+        // it simultaneously increments iterators for several matrices
+        // supplied as a null terminated list of pointers
+        const Mat* arrays[] = {&hist, 0};
+        Mat planes[1];
+        NAryMatIterator itNAry(arrays, planes, 1);
+        double s = 0;
+        // iterate through the matrix. on each iteration
+        // itNAry.planes[i] (of type Mat) will be set to the current plane
+        // of the i-th n-dim matrix passed to the iterator constructor.
+        for(int p = 0; p < itNAry.nplanes; p++, ++itNAry)
+        {
+            threshold(itNAry.planes[0], itNAry.planes[0], minProb, 0, THRESH_TOZERO);
+            s += sum(itNAry.planes[0])[0];
+        }
+
+        s = 1./s;
+        itNAry = NAryMatIterator(arrays, planes, 1);
+        for(int p = 0; p < itNAry.nplanes; p++, ++itNAry)
+            itNAry.planes[0] *= s;
+    }
+@endcode
+ */
+class CV_EXPORTS NAryMatIterator
+{
+public:
+    //! the default constructor
+    NAryMatIterator();
+    //! the full constructor taking arbitrary number of n-dim matrices
+    NAryMatIterator(const Mat** arrays, uchar** ptrs, int narrays=-1);
+    //! the full constructor taking arbitrary number of n-dim matrices
+    NAryMatIterator(const Mat** arrays, Mat* planes, int narrays=-1);
+    //! the separate iterator initialization method
+    void init(const Mat** arrays, Mat* planes, uchar** ptrs, int narrays=-1);
+
+    //! proceeds to the next plane of every iterated matrix
+    NAryMatIterator& operator ++();
+    //! proceeds to the next plane of every iterated matrix (postfix increment operator)
+    NAryMatIterator operator ++(int);
+
+    //! the iterated arrays
+    const Mat** arrays;
+    //! the current planes
+    Mat* planes;
+    //! data pointers
+    uchar** ptrs;
+    //! the number of arrays
+    int narrays;
+    //! the number of hyper-planes that the iterator steps through
+    size_t nplanes;
+    //! the size of each segment (in elements)
+    size_t size;
+protected:
+    int iterdepth;
+    size_t idx;
+};
+
+
+
+///////////////////////////////// Matrix Expressions /////////////////////////////////
+
+class CV_EXPORTS MatOp
+{
+public:
+    MatOp();
+    virtual ~MatOp();
+
+    virtual bool elementWise(const MatExpr& expr) const;
+    virtual void assign(const MatExpr& expr, Mat& m, int type=-1) const = 0;
+    virtual void roi(const MatExpr& expr, const Range& rowRange,
+                     const Range& colRange, MatExpr& res) const;
+    virtual void diag(const MatExpr& expr, int d, MatExpr& res) const;
+    virtual void augAssignAdd(const MatExpr& expr, Mat& m) const;
+    virtual void augAssignSubtract(const MatExpr& expr, Mat& m) const;
+    virtual void augAssignMultiply(const MatExpr& expr, Mat& m) const;
+    virtual void augAssignDivide(const MatExpr& expr, Mat& m) const;
+    virtual void augAssignAnd(const MatExpr& expr, Mat& m) const;
+    virtual void augAssignOr(const MatExpr& expr, Mat& m) const;
+    virtual void augAssignXor(const MatExpr& expr, Mat& m) const;
+
+    virtual void add(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res) const;
+    virtual void add(const MatExpr& expr1, const Scalar& s, MatExpr& res) const;
+
+    virtual void subtract(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res) const;
+    virtual void subtract(const Scalar& s, const MatExpr& expr, MatExpr& res) const;
+
+    virtual void multiply(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res, double scale=1) const;
+    virtual void multiply(const MatExpr& expr1, double s, MatExpr& res) const;
+
+    virtual void divide(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res, double scale=1) const;
+    virtual void divide(double s, const MatExpr& expr, MatExpr& res) const;
+
+    virtual void abs(const MatExpr& expr, MatExpr& res) const;
+
+    virtual void transpose(const MatExpr& expr, MatExpr& res) const;
+    virtual void matmul(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res) const;
+    virtual void invert(const MatExpr& expr, int method, MatExpr& res) const;
+
+    virtual Size size(const MatExpr& expr) const;
+    virtual int type(const MatExpr& expr) const;
+};
+
+/** @brief Matrix expression representation
+@anchor MatrixExpressions
+This is a list of implemented matrix operations that can be combined in arbitrary complex
+expressions (here A, B stand for matrices ( Mat ), s for a scalar ( Scalar ), alpha for a
+real-valued scalar ( double )):
+-   Addition, subtraction, negation: `A+B`, `A-B`, `A+s`, `A-s`, `s+A`, `s-A`, `-A`
+-   Scaling: `A*alpha`
+-   Per-element multiplication and division: `A.mul(B)`, `A/B`, `alpha/A`
+-   Matrix multiplication: `A*B`
+-   Transposition: `A.t()` (means A<sup>T</sup>)
+-   Matrix inversion and pseudo-inversion, solving linear systems and least-squares problems:
+    `A.inv([method]) (~ A<sup>-1</sup>)`,   `A.inv([method])*B (~ X: AX=B)`
+-   Comparison: `A cmpop B`, `A cmpop alpha`, `alpha cmpop A`, where *cmpop* is one of
+  `>`, `>=`, `==`, `!=`, `<=`, `<`. The result of comparison is an 8-bit single channel mask whose
+    elements are set to 255 (if the particular element or pair of elements satisfy the condition) or
+    0.
+-   Bitwise logical operations: `A logicop B`, `A logicop s`, `s logicop A`, `~A`, where *logicop* is one of
+  `&`, `|`, `^`.
+-   Element-wise minimum and maximum: `min(A, B)`, `min(A, alpha)`, `max(A, B)`, `max(A, alpha)`
+-   Element-wise absolute value: `abs(A)`
+-   Cross-product, dot-product: `A.cross(B)`, `A.dot(B)`
+-   Any function of matrix or matrices and scalars that returns a matrix or a scalar, such as norm,
+    mean, sum, countNonZero, trace, determinant, repeat, and others.
+-   Matrix initializers ( Mat::eye(), Mat::zeros(), Mat::ones() ), matrix comma-separated
+    initializers, matrix constructors and operators that extract sub-matrices (see Mat description).
+-   Mat_<destination_type>() constructors to cast the result to the proper type.
+@note Comma-separated initializers and probably some other operations may require additional
+explicit Mat() or Mat_<T>() constructor calls to resolve a possible ambiguity.
+
+Here are examples of matrix expressions:
+@code
+    // compute pseudo-inverse of A, equivalent to A.inv(DECOMP_SVD)
+    SVD svd(A);
+    Mat pinvA = svd.vt.t()*Mat::diag(1./svd.w)*svd.u.t();
+
+    // compute the new vector of parameters in the Levenberg-Marquardt algorithm
+    x -= (A.t()*A + lambda*Mat::eye(A.cols,A.cols,A.type())).inv(DECOMP_CHOLESKY)*(A.t()*err);
+
+    // sharpen image using "unsharp mask" algorithm
+    Mat blurred; double sigma = 1, threshold = 5, amount = 1;
+    GaussianBlur(img, blurred, Size(), sigma, sigma);
+    Mat lowContrastMask = abs(img - blurred) < threshold;
+    Mat sharpened = img*(1+amount) + blurred*(-amount);
+    img.copyTo(sharpened, lowContrastMask);
+@endcode
+*/
+class CV_EXPORTS MatExpr
+{
+public:
+    MatExpr();
+    explicit MatExpr(const Mat& m);
+
+    MatExpr(const MatOp* _op, int _flags, const Mat& _a = Mat(), const Mat& _b = Mat(),
+            const Mat& _c = Mat(), double _alpha = 1, double _beta = 1, const Scalar& _s = Scalar());
+
+    operator Mat() const;
+    template<typename _Tp> operator Mat_<_Tp>() const;
+
+    Size size() const;
+    int type() const;
+
+    MatExpr row(int y) const;
+    MatExpr col(int x) const;
+    MatExpr diag(int d = 0) const;
+    MatExpr operator()( const Range& rowRange, const Range& colRange ) const;
+    MatExpr operator()( const Rect& roi ) const;
+
+    MatExpr t() const;
+    MatExpr inv(int method = DECOMP_LU) const;
+    MatExpr mul(const MatExpr& e, double scale=1) const;
+    MatExpr mul(const Mat& m, double scale=1) const;
+
+    Mat cross(const Mat& m) const;
+    double dot(const Mat& m) const;
+
+    const MatOp* op;
+    int flags;
+
+    Mat a, b, c;
+    double alpha, beta;
+    Scalar s;
+};
+
+//! @} core_basic
+
+//! @relates cv::MatExpr
+//! @{
+CV_EXPORTS MatExpr operator + (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator + (const Mat& a, const Scalar& s);
+CV_EXPORTS MatExpr operator + (const Scalar& s, const Mat& a);
+CV_EXPORTS MatExpr operator + (const MatExpr& e, const Mat& m);
+CV_EXPORTS MatExpr operator + (const Mat& m, const MatExpr& e);
+CV_EXPORTS MatExpr operator + (const MatExpr& e, const Scalar& s);
+CV_EXPORTS MatExpr operator + (const Scalar& s, const MatExpr& e);
+CV_EXPORTS MatExpr operator + (const MatExpr& e1, const MatExpr& e2);
+
+CV_EXPORTS MatExpr operator - (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator - (const Mat& a, const Scalar& s);
+CV_EXPORTS MatExpr operator - (const Scalar& s, const Mat& a);
+CV_EXPORTS MatExpr operator - (const MatExpr& e, const Mat& m);
+CV_EXPORTS MatExpr operator - (const Mat& m, const MatExpr& e);
+CV_EXPORTS MatExpr operator - (const MatExpr& e, const Scalar& s);
+CV_EXPORTS MatExpr operator - (const Scalar& s, const MatExpr& e);
+CV_EXPORTS MatExpr operator - (const MatExpr& e1, const MatExpr& e2);
+
+CV_EXPORTS MatExpr operator - (const Mat& m);
+CV_EXPORTS MatExpr operator - (const MatExpr& e);
+
+CV_EXPORTS MatExpr operator * (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator * (const Mat& a, double s);
+CV_EXPORTS MatExpr operator * (double s, const Mat& a);
+CV_EXPORTS MatExpr operator * (const MatExpr& e, const Mat& m);
+CV_EXPORTS MatExpr operator * (const Mat& m, const MatExpr& e);
+CV_EXPORTS MatExpr operator * (const MatExpr& e, double s);
+CV_EXPORTS MatExpr operator * (double s, const MatExpr& e);
+CV_EXPORTS MatExpr operator * (const MatExpr& e1, const MatExpr& e2);
+
+CV_EXPORTS MatExpr operator / (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator / (const Mat& a, double s);
+CV_EXPORTS MatExpr operator / (double s, const Mat& a);
+CV_EXPORTS MatExpr operator / (const MatExpr& e, const Mat& m);
+CV_EXPORTS MatExpr operator / (const Mat& m, const MatExpr& e);
+CV_EXPORTS MatExpr operator / (const MatExpr& e, double s);
+CV_EXPORTS MatExpr operator / (double s, const MatExpr& e);
+CV_EXPORTS MatExpr operator / (const MatExpr& e1, const MatExpr& e2);
+
+CV_EXPORTS MatExpr operator < (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator < (const Mat& a, double s);
+CV_EXPORTS MatExpr operator < (double s, const Mat& a);
+
+CV_EXPORTS MatExpr operator <= (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator <= (const Mat& a, double s);
+CV_EXPORTS MatExpr operator <= (double s, const Mat& a);
+
+CV_EXPORTS MatExpr operator == (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator == (const Mat& a, double s);
+CV_EXPORTS MatExpr operator == (double s, const Mat& a);
+
+CV_EXPORTS MatExpr operator != (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator != (const Mat& a, double s);
+CV_EXPORTS MatExpr operator != (double s, const Mat& a);
+
+CV_EXPORTS MatExpr operator >= (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator >= (const Mat& a, double s);
+CV_EXPORTS MatExpr operator >= (double s, const Mat& a);
+
+CV_EXPORTS MatExpr operator > (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator > (const Mat& a, double s);
+CV_EXPORTS MatExpr operator > (double s, const Mat& a);
+
+CV_EXPORTS MatExpr operator & (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator & (const Mat& a, const Scalar& s);
+CV_EXPORTS MatExpr operator & (const Scalar& s, const Mat& a);
+
+CV_EXPORTS MatExpr operator | (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator | (const Mat& a, const Scalar& s);
+CV_EXPORTS MatExpr operator | (const Scalar& s, const Mat& a);
+
+CV_EXPORTS MatExpr operator ^ (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator ^ (const Mat& a, const Scalar& s);
+CV_EXPORTS MatExpr operator ^ (const Scalar& s, const Mat& a);
+
+CV_EXPORTS MatExpr operator ~(const Mat& m);
+
+CV_EXPORTS MatExpr min(const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr min(const Mat& a, double s);
+CV_EXPORTS MatExpr min(double s, const Mat& a);
+
+CV_EXPORTS MatExpr max(const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr max(const Mat& a, double s);
+CV_EXPORTS MatExpr max(double s, const Mat& a);
+
+/** @brief Calculates an absolute value of each matrix element.
+
+abs is a meta-function that is expanded to one of absdiff or convertScaleAbs forms:
+- C = abs(A-B) is equivalent to `absdiff(A, B, C)`
+- C = abs(A) is equivalent to `absdiff(A, Scalar::all(0), C)`
+- C = `Mat_<Vec<uchar,n> >(abs(A*alpha + beta))` is equivalent to `convertScaleAbs(A, C, alpha,
+beta)`
+
+The output matrix has the same size and the same type as the input one except for the last case,
+where C is depth=CV_8U .
+@param m matrix.
+@sa @ref MatrixExpressions, absdiff, convertScaleAbs
+ */
+CV_EXPORTS MatExpr abs(const Mat& m);
+/** @overload
+@param e matrix expression.
+*/
+CV_EXPORTS MatExpr abs(const MatExpr& e);
+//! @} relates cv::MatExpr
+
+} // cv
+
+#include "opencv2/core/mat.inl.hpp"
+
+#endif // OPENCV_CORE_MAT_HPP
diff --git a/thirdparty/linux/include/opencv2/core/mat.inl.hpp b/thirdparty/linux/include/opencv2/core/mat.inl.hpp
new file mode 100644
index 0000000..4a32de1
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/mat.inl.hpp
@@ -0,0 +1,3733 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_MATRIX_OPERATIONS_HPP
+#define OPENCV_CORE_MATRIX_OPERATIONS_HPP
+
+#ifndef __cplusplus
+#  error mat.inl.hpp header must be compiled as C++
+#endif
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+//////////////////////// Input/Output Arrays ////////////////////////
+
+inline void _InputArray::init(int _flags, const void* _obj)
+{ flags = _flags; obj = (void*)_obj; }
+
+inline void _InputArray::init(int _flags, const void* _obj, Size _sz)
+{ flags = _flags; obj = (void*)_obj; sz = _sz; }
+
+inline void* _InputArray::getObj() const { return obj; }
+inline int _InputArray::getFlags() const { return flags; }
+inline Size _InputArray::getSz() const { return sz; }
+
+inline _InputArray::_InputArray() { init(NONE, 0); }
+inline _InputArray::_InputArray(int _flags, void* _obj) { init(_flags, _obj); }
+inline _InputArray::_InputArray(const Mat& m) { init(MAT+ACCESS_READ, &m); }
+inline _InputArray::_InputArray(const std::vector<Mat>& vec) { init(STD_VECTOR_MAT+ACCESS_READ, &vec); }
+inline _InputArray::_InputArray(const UMat& m) { init(UMAT+ACCESS_READ, &m); }
+inline _InputArray::_InputArray(const std::vector<UMat>& vec) { init(STD_VECTOR_UMAT+ACCESS_READ, &vec); }
+
+template<typename _Tp> inline
+_InputArray::_InputArray(const std::vector<_Tp>& vec)
+{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_READ, &vec); }
+
+inline
+_InputArray::_InputArray(const std::vector<bool>& vec)
+{ init(FIXED_TYPE + STD_BOOL_VECTOR + DataType<bool>::type + ACCESS_READ, &vec); }
+
+template<typename _Tp> inline
+_InputArray::_InputArray(const std::vector<std::vector<_Tp> >& vec)
+{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_READ, &vec); }
+
+template<typename _Tp> inline
+_InputArray::_InputArray(const std::vector<Mat_<_Tp> >& vec)
+{ init(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_READ, &vec); }
+
+template<typename _Tp, int m, int n> inline
+_InputArray::_InputArray(const Matx<_Tp, m, n>& mtx)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_READ, &mtx, Size(n, m)); }
+
+template<typename _Tp> inline
+_InputArray::_InputArray(const _Tp* vec, int n)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_READ, vec, Size(n, 1)); }
+
+template<typename _Tp> inline
+_InputArray::_InputArray(const Mat_<_Tp>& m)
+{ init(FIXED_TYPE + MAT + DataType<_Tp>::type + ACCESS_READ, &m); }
+
+inline _InputArray::_InputArray(const double& val)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F + ACCESS_READ, &val, Size(1,1)); }
+
+inline _InputArray::_InputArray(const MatExpr& expr)
+{ init(FIXED_TYPE + FIXED_SIZE + EXPR + ACCESS_READ, &expr); }
+
+inline _InputArray::_InputArray(const cuda::GpuMat& d_mat)
+{ init(CUDA_GPU_MAT + ACCESS_READ, &d_mat); }
+
+inline _InputArray::_InputArray(const std::vector<cuda::GpuMat>& d_mat)
+{	init(STD_VECTOR_CUDA_GPU_MAT + ACCESS_READ, &d_mat);}
+
+inline _InputArray::_InputArray(const ogl::Buffer& buf)
+{ init(OPENGL_BUFFER + ACCESS_READ, &buf); }
+
+inline _InputArray::_InputArray(const cuda::HostMem& cuda_mem)
+{ init(CUDA_HOST_MEM + ACCESS_READ, &cuda_mem); }
+
+inline _InputArray::~_InputArray() {}
+
+inline Mat _InputArray::getMat(int i) const
+{
+    if( kind() == MAT && i < 0 )
+        return *(const Mat*)obj;
+    return getMat_(i);
+}
+
+inline bool _InputArray::isMat() const { return kind() == _InputArray::MAT; }
+inline bool _InputArray::isUMat() const  { return kind() == _InputArray::UMAT; }
+inline bool _InputArray::isMatVector() const { return kind() == _InputArray::STD_VECTOR_MAT; }
+inline bool _InputArray::isUMatVector() const  { return kind() == _InputArray::STD_VECTOR_UMAT; }
+inline bool _InputArray::isMatx() const { return kind() == _InputArray::MATX; }
+inline bool _InputArray::isVector() const { return kind() == _InputArray::STD_VECTOR || kind() == _InputArray::STD_BOOL_VECTOR; }
+inline bool _InputArray::isGpuMatVector() const { return kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT; }
+
+////////////////////////////////////////////////////////////////////////////////////////
+
+inline _OutputArray::_OutputArray() { init(ACCESS_WRITE, 0); }
+inline _OutputArray::_OutputArray(int _flags, void* _obj) { init(_flags|ACCESS_WRITE, _obj); }
+inline _OutputArray::_OutputArray(Mat& m) { init(MAT+ACCESS_WRITE, &m); }
+inline _OutputArray::_OutputArray(std::vector<Mat>& vec) { init(STD_VECTOR_MAT+ACCESS_WRITE, &vec); }
+inline _OutputArray::_OutputArray(UMat& m) { init(UMAT+ACCESS_WRITE, &m); }
+inline _OutputArray::_OutputArray(std::vector<UMat>& vec) { init(STD_VECTOR_UMAT+ACCESS_WRITE, &vec); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(std::vector<_Tp>& vec)
+{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
+
+inline
+_OutputArray::_OutputArray(std::vector<bool>&)
+{ CV_Error(Error::StsUnsupportedFormat, "std::vector<bool> cannot be an output array\n"); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(std::vector<std::vector<_Tp> >& vec)
+{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(std::vector<Mat_<_Tp> >& vec)
+{ init(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(Mat_<_Tp>& m)
+{ init(FIXED_TYPE + MAT + DataType<_Tp>::type + ACCESS_WRITE, &m); }
+
+template<typename _Tp, int m, int n> inline
+_OutputArray::_OutputArray(Matx<_Tp, m, n>& mtx)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, &mtx, Size(n, m)); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(_Tp* vec, int n)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, vec, Size(n, 1)); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(const std::vector<_Tp>& vec)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(const std::vector<std::vector<_Tp> >& vec)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(const std::vector<Mat_<_Tp> >& vec)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(const Mat_<_Tp>& m)
+{ init(FIXED_TYPE + FIXED_SIZE + MAT + DataType<_Tp>::type + ACCESS_WRITE, &m); }
+
+template<typename _Tp, int m, int n> inline
+_OutputArray::_OutputArray(const Matx<_Tp, m, n>& mtx)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, &mtx, Size(n, m)); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(const _Tp* vec, int n)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, vec, Size(n, 1)); }
+
+inline _OutputArray::_OutputArray(cuda::GpuMat& d_mat)
+{ init(CUDA_GPU_MAT + ACCESS_WRITE, &d_mat); }
+
+inline _OutputArray::_OutputArray(std::vector<cuda::GpuMat>& d_mat)
+{	init(STD_VECTOR_CUDA_GPU_MAT + ACCESS_WRITE, &d_mat);}
+
+inline _OutputArray::_OutputArray(ogl::Buffer& buf)
+{ init(OPENGL_BUFFER + ACCESS_WRITE, &buf); }
+
+inline _OutputArray::_OutputArray(cuda::HostMem& cuda_mem)
+{ init(CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); }
+
+inline _OutputArray::_OutputArray(const Mat& m)
+{ init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_WRITE, &m); }
+
+inline _OutputArray::_OutputArray(const std::vector<Mat>& vec)
+{ init(FIXED_SIZE + STD_VECTOR_MAT + ACCESS_WRITE, &vec); }
+
+inline _OutputArray::_OutputArray(const UMat& m)
+{ init(FIXED_TYPE + FIXED_SIZE + UMAT + ACCESS_WRITE, &m); }
+
+inline _OutputArray::_OutputArray(const std::vector<UMat>& vec)
+{ init(FIXED_SIZE + STD_VECTOR_UMAT + ACCESS_WRITE, &vec); }
+
+inline _OutputArray::_OutputArray(const cuda::GpuMat& d_mat)
+{ init(FIXED_TYPE + FIXED_SIZE + CUDA_GPU_MAT + ACCESS_WRITE, &d_mat); }
+
+
+inline _OutputArray::_OutputArray(const ogl::Buffer& buf)
+{ init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_WRITE, &buf); }
+
+inline _OutputArray::_OutputArray(const cuda::HostMem& cuda_mem)
+{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); }
+
+///////////////////////////////////////////////////////////////////////////////////////////
+
+inline _InputOutputArray::_InputOutputArray() { init(ACCESS_RW, 0); }
+inline _InputOutputArray::_InputOutputArray(int _flags, void* _obj) { init(_flags|ACCESS_RW, _obj); }
+inline _InputOutputArray::_InputOutputArray(Mat& m) { init(MAT+ACCESS_RW, &m); }
+inline _InputOutputArray::_InputOutputArray(std::vector<Mat>& vec) { init(STD_VECTOR_MAT+ACCESS_RW, &vec); }
+inline _InputOutputArray::_InputOutputArray(UMat& m) { init(UMAT+ACCESS_RW, &m); }
+inline _InputOutputArray::_InputOutputArray(std::vector<UMat>& vec) { init(STD_VECTOR_UMAT+ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(std::vector<_Tp>& vec)
+{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); }
+
+inline _InputOutputArray::_InputOutputArray(std::vector<bool>&)
+{ CV_Error(Error::StsUnsupportedFormat, "std::vector<bool> cannot be an input/output array\n"); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(std::vector<std::vector<_Tp> >& vec)
+{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(std::vector<Mat_<_Tp> >& vec)
+{ init(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(Mat_<_Tp>& m)
+{ init(FIXED_TYPE + MAT + DataType<_Tp>::type + ACCESS_RW, &m); }
+
+template<typename _Tp, int m, int n> inline
+_InputOutputArray::_InputOutputArray(Matx<_Tp, m, n>& mtx)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, &mtx, Size(n, m)); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(_Tp* vec, int n)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, vec, Size(n, 1)); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(const std::vector<_Tp>& vec)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(const std::vector<std::vector<_Tp> >& vec)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(const std::vector<Mat_<_Tp> >& vec)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(const Mat_<_Tp>& m)
+{ init(FIXED_TYPE + FIXED_SIZE + MAT + DataType<_Tp>::type + ACCESS_RW, &m); }
+
+template<typename _Tp, int m, int n> inline
+_InputOutputArray::_InputOutputArray(const Matx<_Tp, m, n>& mtx)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, &mtx, Size(n, m)); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(const _Tp* vec, int n)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, vec, Size(n, 1)); }
+
+inline _InputOutputArray::_InputOutputArray(cuda::GpuMat& d_mat)
+{ init(CUDA_GPU_MAT + ACCESS_RW, &d_mat); }
+
+inline _InputOutputArray::_InputOutputArray(ogl::Buffer& buf)
+{ init(OPENGL_BUFFER + ACCESS_RW, &buf); }
+
+inline _InputOutputArray::_InputOutputArray(cuda::HostMem& cuda_mem)
+{ init(CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); }
+
+inline _InputOutputArray::_InputOutputArray(const Mat& m)
+{ init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_RW, &m); }
+
+inline _InputOutputArray::_InputOutputArray(const std::vector<Mat>& vec)
+{ init(FIXED_SIZE + STD_VECTOR_MAT + ACCESS_RW, &vec); }
+
+inline _InputOutputArray::_InputOutputArray(const UMat& m)
+{ init(FIXED_TYPE + FIXED_SIZE + UMAT + ACCESS_RW, &m); }
+
+inline _InputOutputArray::_InputOutputArray(const std::vector<UMat>& vec)
+{ init(FIXED_SIZE + STD_VECTOR_UMAT + ACCESS_RW, &vec); }
+
+inline _InputOutputArray::_InputOutputArray(const cuda::GpuMat& d_mat)
+{ init(FIXED_TYPE + FIXED_SIZE + CUDA_GPU_MAT + ACCESS_RW, &d_mat); }
+
+inline _InputOutputArray::_InputOutputArray(const std::vector<cuda::GpuMat>& d_mat)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_CUDA_GPU_MAT + ACCESS_RW, &d_mat);}
+
+template<> inline _InputOutputArray::_InputOutputArray(std::vector<cuda::GpuMat>& d_mat)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_CUDA_GPU_MAT + ACCESS_RW, &d_mat);}
+
+inline _InputOutputArray::_InputOutputArray(const ogl::Buffer& buf)
+{ init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_RW, &buf); }
+
+inline _InputOutputArray::_InputOutputArray(const cuda::HostMem& cuda_mem)
+{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); }
+
+//////////////////////////////////////////// Mat //////////////////////////////////////////
+
+inline
+Mat::Mat()
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows)
+{}
+
+inline
+Mat::Mat(int _rows, int _cols, int _type)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows)
+{
+    create(_rows, _cols, _type);
+}
+
+inline
+Mat::Mat(int _rows, int _cols, int _type, const Scalar& _s)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows)
+{
+    create(_rows, _cols, _type);
+    *this = _s;
+}
+
+inline
+Mat::Mat(Size _sz, int _type)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows)
+{
+    create( _sz.height, _sz.width, _type );
+}
+
+inline
+Mat::Mat(Size _sz, int _type, const Scalar& _s)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows)
+{
+    create(_sz.height, _sz.width, _type);
+    *this = _s;
+}
+
+inline
+Mat::Mat(int _dims, const int* _sz, int _type)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows)
+{
+    create(_dims, _sz, _type);
+}
+
+inline
+Mat::Mat(int _dims, const int* _sz, int _type, const Scalar& _s)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows)
+{
+    create(_dims, _sz, _type);
+    *this = _s;
+}
+
+inline
+Mat::Mat(const std::vector<int>& _sz, int _type)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows)
+{
+    create(_sz, _type);
+}
+
+inline
+Mat::Mat(const std::vector<int>& _sz, int _type, const Scalar& _s)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows)
+{
+    create(_sz, _type);
+    *this = _s;
+}
+
+inline
+Mat::Mat(const Mat& m)
+    : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data),
+      datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit), allocator(m.allocator),
+      u(m.u), size(&rows)
+{
+    if( u )
+        CV_XADD(&u->refcount, 1);
+    if( m.dims <= 2 )
+    {
+        step[0] = m.step[0]; step[1] = m.step[1];
+    }
+    else
+    {
+        dims = 0;
+        copySize(m);
+    }
+}
+
+inline
+Mat::Mat(int _rows, int _cols, int _type, void* _data, size_t _step)
+    : flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_rows), cols(_cols),
+      data((uchar*)_data), datastart((uchar*)_data), dataend(0), datalimit(0),
+      allocator(0), u(0), size(&rows)
+{
+    CV_Assert(total() == 0 || data != NULL);
+
+    size_t esz = CV_ELEM_SIZE(_type), esz1 = CV_ELEM_SIZE1(_type);
+    size_t minstep = cols * esz;
+    if( _step == AUTO_STEP )
+    {
+        _step = minstep;
+        flags |= CONTINUOUS_FLAG;
+    }
+    else
+    {
+        if( rows == 1 ) _step = minstep;
+        CV_DbgAssert( _step >= minstep );
+
+        if (_step % esz1 != 0)
+        {
+            CV_Error(Error::BadStep, "Step must be a multiple of esz1");
+        }
+
+        flags |= _step == minstep ? CONTINUOUS_FLAG : 0;
+    }
+    step[0] = _step;
+    step[1] = esz;
+    datalimit = datastart + _step * rows;
+    dataend = datalimit - _step + minstep;
+}
+
+inline
+Mat::Mat(Size _sz, int _type, void* _data, size_t _step)
+    : flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_sz.height), cols(_sz.width),
+      data((uchar*)_data), datastart((uchar*)_data), dataend(0), datalimit(0),
+      allocator(0), u(0), size(&rows)
+{
+    CV_Assert(total() == 0 || data != NULL);
+
+    size_t esz = CV_ELEM_SIZE(_type), esz1 = CV_ELEM_SIZE1(_type);
+    size_t minstep = cols*esz;
+    if( _step == AUTO_STEP )
+    {
+        _step = minstep;
+        flags |= CONTINUOUS_FLAG;
+    }
+    else
+    {
+        if( rows == 1 ) _step = minstep;
+        CV_DbgAssert( _step >= minstep );
+
+        if (_step % esz1 != 0)
+        {
+            CV_Error(Error::BadStep, "Step must be a multiple of esz1");
+        }
+
+        flags |= _step == minstep ? CONTINUOUS_FLAG : 0;
+    }
+    step[0] = _step;
+    step[1] = esz;
+    datalimit = datastart + _step*rows;
+    dataend = datalimit - _step + minstep;
+}
+
+template<typename _Tp> inline
+Mat::Mat(const std::vector<_Tp>& vec, bool copyData)
+    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
+      cols(1), data(0), datastart(0), dataend(0), allocator(0), u(0), size(&rows)
+{
+    if(vec.empty())
+        return;
+    if( !copyData )
+    {
+        step[0] = step[1] = sizeof(_Tp);
+        datastart = data = (uchar*)&vec[0];
+        datalimit = dataend = datastart + rows * step[0];
+    }
+    else
+        Mat((int)vec.size(), 1, DataType<_Tp>::type, (uchar*)&vec[0]).copyTo(*this);
+}
+
+template<typename _Tp, int n> inline
+Mat::Mat(const Vec<_Tp, n>& vec, bool copyData)
+    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(n), cols(1), data(0),
+      datastart(0), dataend(0), allocator(0), u(0), size(&rows)
+{
+    if( !copyData )
+    {
+        step[0] = step[1] = sizeof(_Tp);
+        datastart = data = (uchar*)vec.val;
+        datalimit = dataend = datastart + rows * step[0];
+    }
+    else
+        Mat(n, 1, DataType<_Tp>::type, (void*)vec.val).copyTo(*this);
+}
+
+
+template<typename _Tp, int m, int n> inline
+Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData)
+    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(m), cols(n), data(0),
+      datastart(0), dataend(0), allocator(0), u(0), size(&rows)
+{
+    if( !copyData )
+    {
+        step[0] = cols * sizeof(_Tp);
+        step[1] = sizeof(_Tp);
+        datastart = data = (uchar*)M.val;
+        datalimit = dataend = datastart + rows * step[0];
+    }
+    else
+        Mat(m, n, DataType<_Tp>::type, (uchar*)M.val).copyTo(*this);
+}
+
+template<typename _Tp> inline
+Mat::Mat(const Point_<_Tp>& pt, bool copyData)
+    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(2), cols(1), data(0),
+      datastart(0), dataend(0), allocator(0), u(0), size(&rows)
+{
+    if( !copyData )
+    {
+        step[0] = step[1] = sizeof(_Tp);
+        datastart = data = (uchar*)&pt.x;
+        datalimit = dataend = datastart + rows * step[0];
+    }
+    else
+    {
+        create(2, 1, DataType<_Tp>::type);
+        ((_Tp*)data)[0] = pt.x;
+        ((_Tp*)data)[1] = pt.y;
+    }
+}
+
+template<typename _Tp> inline
+Mat::Mat(const Point3_<_Tp>& pt, bool copyData)
+    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(3), cols(1), data(0),
+      datastart(0), dataend(0), allocator(0), u(0), size(&rows)
+{
+    if( !copyData )
+    {
+        step[0] = step[1] = sizeof(_Tp);
+        datastart = data = (uchar*)&pt.x;
+        datalimit = dataend = datastart + rows * step[0];
+    }
+    else
+    {
+        create(3, 1, DataType<_Tp>::type);
+        ((_Tp*)data)[0] = pt.x;
+        ((_Tp*)data)[1] = pt.y;
+        ((_Tp*)data)[2] = pt.z;
+    }
+}
+
+template<typename _Tp> inline
+Mat::Mat(const MatCommaInitializer_<_Tp>& commaInitializer)
+    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(0), rows(0), cols(0), data(0),
+      datastart(0), dataend(0), allocator(0), u(0), size(&rows)
+{
+    *this = commaInitializer.operator Mat_<_Tp>();
+}
+
+inline
+Mat::~Mat()
+{
+    release();
+    if( step.p != step.buf )
+        fastFree(step.p);
+}
+
+inline
+Mat& Mat::operator = (const Mat& m)
+{
+    if( this != &m )
+    {
+        if( m.u )
+            CV_XADD(&m.u->refcount, 1);
+        release();
+        flags = m.flags;
+        if( dims <= 2 && m.dims <= 2 )
+        {
+            dims = m.dims;
+            rows = m.rows;
+            cols = m.cols;
+            step[0] = m.step[0];
+            step[1] = m.step[1];
+        }
+        else
+            copySize(m);
+        data = m.data;
+        datastart = m.datastart;
+        dataend = m.dataend;
+        datalimit = m.datalimit;
+        allocator = m.allocator;
+        u = m.u;
+    }
+    return *this;
+}
+
+inline
+Mat Mat::row(int y) const
+{
+    return Mat(*this, Range(y, y + 1), Range::all());
+}
+
+inline
+Mat Mat::col(int x) const
+{
+    return Mat(*this, Range::all(), Range(x, x + 1));
+}
+
+inline
+Mat Mat::rowRange(int startrow, int endrow) const
+{
+    return Mat(*this, Range(startrow, endrow), Range::all());
+}
+
+inline
+Mat Mat::rowRange(const Range& r) const
+{
+    return Mat(*this, r, Range::all());
+}
+
+inline
+Mat Mat::colRange(int startcol, int endcol) const
+{
+    return Mat(*this, Range::all(), Range(startcol, endcol));
+}
+
+inline
+Mat Mat::colRange(const Range& r) const
+{
+    return Mat(*this, Range::all(), r);
+}
+
+inline
+Mat Mat::clone() const
+{
+    Mat m;
+    copyTo(m);
+    return m;
+}
+
+inline
+void Mat::assignTo( Mat& m, int _type ) const
+{
+    if( _type < 0 )
+        m = *this;
+    else
+        convertTo(m, _type);
+}
+
+inline
+void Mat::create(int _rows, int _cols, int _type)
+{
+    _type &= TYPE_MASK;
+    if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && data )
+        return;
+    int sz[] = {_rows, _cols};
+    create(2, sz, _type);
+}
+
+inline
+void Mat::create(Size _sz, int _type)
+{
+    create(_sz.height, _sz.width, _type);
+}
+
+inline
+void Mat::addref()
+{
+    if( u )
+        CV_XADD(&u->refcount, 1);
+}
+
+inline
+void Mat::release()
+{
+    if( u && CV_XADD(&u->refcount, -1) == 1 )
+        deallocate();
+    u = NULL;
+    datastart = dataend = datalimit = data = 0;
+    for(int i = 0; i < dims; i++)
+        size.p[i] = 0;
+#ifdef _DEBUG
+    flags = MAGIC_VAL;
+    dims = rows = cols = 0;
+    if(step.p != step.buf)
+    {
+        fastFree(step.p);
+        step.p = step.buf;
+        size.p = &rows;
+    }
+#endif
+}
+
+inline
+Mat Mat::operator()( Range _rowRange, Range _colRange ) const
+{
+    return Mat(*this, _rowRange, _colRange);
+}
+
+inline
+Mat Mat::operator()( const Rect& roi ) const
+{
+    return Mat(*this, roi);
+}
+
+inline
+Mat Mat::operator()(const Range* ranges) const
+{
+    return Mat(*this, ranges);
+}
+
+inline
+Mat Mat::operator()(const std::vector<Range>& ranges) const
+{
+    return Mat(*this, ranges);
+}
+
+inline
+bool Mat::isContinuous() const
+{
+    return (flags & CONTINUOUS_FLAG) != 0;
+}
+
+inline
+bool Mat::isSubmatrix() const
+{
+    return (flags & SUBMATRIX_FLAG) != 0;
+}
+
+inline
+size_t Mat::elemSize() const
+{
+    return dims > 0 ? step.p[dims - 1] : 0;
+}
+
+inline
+size_t Mat::elemSize1() const
+{
+    return CV_ELEM_SIZE1(flags);
+}
+
+inline
+int Mat::type() const
+{
+    return CV_MAT_TYPE(flags);
+}
+
+inline
+int Mat::depth() const
+{
+    return CV_MAT_DEPTH(flags);
+}
+
+inline
+int Mat::channels() const
+{
+    return CV_MAT_CN(flags);
+}
+
+inline
+size_t Mat::step1(int i) const
+{
+    return step.p[i] / elemSize1();
+}
+
+inline
+bool Mat::empty() const
+{
+    return data == 0 || total() == 0;
+}
+
+inline
+size_t Mat::total() const
+{
+    if( dims <= 2 )
+        return (size_t)rows * cols;
+    size_t p = 1;
+    for( int i = 0; i < dims; i++ )
+        p *= size[i];
+    return p;
+}
+
+inline
+uchar* Mat::ptr(int y)
+{
+    CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) );
+    return data + step.p[0] * y;
+}
+
+inline
+const uchar* Mat::ptr(int y) const
+{
+    CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) );
+    return data + step.p[0] * y;
+}
+
+template<typename _Tp> inline
+_Tp* Mat::ptr(int y)
+{
+    CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) );
+    return (_Tp*)(data + step.p[0] * y);
+}
+
+template<typename _Tp> inline
+const _Tp* Mat::ptr(int y) const
+{
+    CV_DbgAssert( y == 0 || (data && dims >= 1 && data && (unsigned)y < (unsigned)size.p[0]) );
+    return (const _Tp*)(data + step.p[0] * y);
+}
+
+inline
+uchar* Mat::ptr(int i0, int i1)
+{
+    CV_DbgAssert(dims >= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    return data + i0 * step.p[0] + i1 * step.p[1];
+}
+
+inline
+const uchar* Mat::ptr(int i0, int i1) const
+{
+    CV_DbgAssert(dims >= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    return data + i0 * step.p[0] + i1 * step.p[1];
+}
+
+template<typename _Tp> inline
+_Tp* Mat::ptr(int i0, int i1)
+{
+    CV_DbgAssert(dims >= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    return (_Tp*)(data + i0 * step.p[0] + i1 * step.p[1]);
+}
+
+template<typename _Tp> inline
+const _Tp* Mat::ptr(int i0, int i1) const
+{
+    CV_DbgAssert(dims >= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    return (const _Tp*)(data + i0 * step.p[0] + i1 * step.p[1]);
+}
+
+inline
+uchar* Mat::ptr(int i0, int i1, int i2)
+{
+    CV_DbgAssert(dims >= 3);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]);
+    return data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2];
+}
+
+inline
+const uchar* Mat::ptr(int i0, int i1, int i2) const
+{
+    CV_DbgAssert(dims >= 3);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]);
+    return data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2];
+}
+
+template<typename _Tp> inline
+_Tp* Mat::ptr(int i0, int i1, int i2)
+{
+    CV_DbgAssert(dims >= 3);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]);
+    return (_Tp*)(data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2]);
+}
+
+template<typename _Tp> inline
+const _Tp* Mat::ptr(int i0, int i1, int i2) const
+{
+    CV_DbgAssert(dims >= 3);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]);
+    return (const _Tp*)(data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2]);
+}
+
+inline
+uchar* Mat::ptr(const int* idx)
+{
+    int i, d = dims;
+    uchar* p = data;
+    CV_DbgAssert( d >= 1 && p );
+    for( i = 0; i < d; i++ )
+    {
+        CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] );
+        p += idx[i] * step.p[i];
+    }
+    return p;
+}
+
+inline
+const uchar* Mat::ptr(const int* idx) const
+{
+    int i, d = dims;
+    uchar* p = data;
+    CV_DbgAssert( d >= 1 && p );
+    for( i = 0; i < d; i++ )
+    {
+        CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] );
+        p += idx[i] * step.p[i];
+    }
+    return p;
+}
+
+template<typename _Tp> inline
+_Tp& Mat::at(int i0, int i1)
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)(i1 * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels()));
+    CV_DbgAssert(CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1());
+    return ((_Tp*)(data + step.p[0] * i0))[i1];
+}
+
+template<typename _Tp> inline
+const _Tp& Mat::at(int i0, int i1) const
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)(i1 * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels()));
+    CV_DbgAssert(CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1());
+    return ((const _Tp*)(data + step.p[0] * i0))[i1];
+}
+
+template<typename _Tp> inline
+_Tp& Mat::at(Point pt)
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)(pt.x * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels()));
+    CV_DbgAssert(CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1());
+    return ((_Tp*)(data + step.p[0] * pt.y))[pt.x];
+}
+
+template<typename _Tp> inline
+const _Tp& Mat::at(Point pt) const
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)(pt.x * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels()));
+    CV_DbgAssert(CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1());
+    return ((const _Tp*)(data + step.p[0] * pt.y))[pt.x];
+}
+
+template<typename _Tp> inline
+_Tp& Mat::at(int i0)
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)(size.p[0] * size.p[1]));
+    CV_DbgAssert(elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type));
+    if( isContinuous() || size.p[0] == 1 )
+        return ((_Tp*)data)[i0];
+    if( size.p[1] == 1 )
+        return *(_Tp*)(data + step.p[0] * i0);
+    int i = i0 / cols, j = i0 - i * cols;
+    return ((_Tp*)(data + step.p[0] * i))[j];
+}
+
+template<typename _Tp> inline
+const _Tp& Mat::at(int i0) const
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)(size.p[0] * size.p[1]));
+    CV_DbgAssert(elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type));
+    if( isContinuous() || size.p[0] == 1 )
+        return ((const _Tp*)data)[i0];
+    if( size.p[1] == 1 )
+        return *(const _Tp*)(data + step.p[0] * i0);
+    int i = i0 / cols, j = i0 - i * cols;
+    return ((const _Tp*)(data + step.p[0] * i))[j];
+}
+
+template<typename _Tp> inline
+_Tp& Mat::at(int i0, int i1, int i2)
+{
+    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
+    return *(_Tp*)ptr(i0, i1, i2);
+}
+
+template<typename _Tp> inline
+const _Tp& Mat::at(int i0, int i1, int i2) const
+{
+    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
+    return *(const _Tp*)ptr(i0, i1, i2);
+}
+
+template<typename _Tp> inline
+_Tp& Mat::at(const int* idx)
+{
+    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
+    return *(_Tp*)ptr(idx);
+}
+
+template<typename _Tp> inline
+const _Tp& Mat::at(const int* idx) const
+{
+    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
+    return *(const _Tp*)ptr(idx);
+}
+
+template<typename _Tp, int n> inline
+_Tp& Mat::at(const Vec<int, n>& idx)
+{
+    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
+    return *(_Tp*)ptr(idx.val);
+}
+
+template<typename _Tp, int n> inline
+const _Tp& Mat::at(const Vec<int, n>& idx) const
+{
+    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
+    return *(const _Tp*)ptr(idx.val);
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp> Mat::begin() const
+{
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
+    return MatConstIterator_<_Tp>((const Mat_<_Tp>*)this);
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp> Mat::end() const
+{
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
+    MatConstIterator_<_Tp> it((const Mat_<_Tp>*)this);
+    it += total();
+    return it;
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp> Mat::begin()
+{
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
+    return MatIterator_<_Tp>((Mat_<_Tp>*)this);
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp> Mat::end()
+{
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
+    MatIterator_<_Tp> it((Mat_<_Tp>*)this);
+    it += total();
+    return it;
+}
+
+template<typename _Tp, typename Functor> inline
+void Mat::forEach(const Functor& operation) {
+    this->forEach_impl<_Tp>(operation);
+}
+
+template<typename _Tp, typename Functor> inline
+void Mat::forEach(const Functor& operation) const {
+    // call as not const
+    (const_cast<Mat*>(this))->forEach<const _Tp>(operation);
+}
+
+template<typename _Tp> inline
+Mat::operator std::vector<_Tp>() const
+{
+    std::vector<_Tp> v;
+    copyTo(v);
+    return v;
+}
+
+template<typename _Tp, int n> inline
+Mat::operator Vec<_Tp, n>() const
+{
+    CV_Assert( data && dims <= 2 && (rows == 1 || cols == 1) &&
+               rows + cols - 1 == n && channels() == 1 );
+
+    if( isContinuous() && type() == DataType<_Tp>::type )
+        return Vec<_Tp, n>((_Tp*)data);
+    Vec<_Tp, n> v;
+    Mat tmp(rows, cols, DataType<_Tp>::type, v.val);
+    convertTo(tmp, tmp.type());
+    return v;
+}
+
+template<typename _Tp, int m, int n> inline
+Mat::operator Matx<_Tp, m, n>() const
+{
+    CV_Assert( data && dims <= 2 && rows == m && cols == n && channels() == 1 );
+
+    if( isContinuous() && type() == DataType<_Tp>::type )
+        return Matx<_Tp, m, n>((_Tp*)data);
+    Matx<_Tp, m, n> mtx;
+    Mat tmp(rows, cols, DataType<_Tp>::type, mtx.val);
+    convertTo(tmp, tmp.type());
+    return mtx;
+}
+
+template<typename _Tp> inline
+void Mat::push_back(const _Tp& elem)
+{
+    if( !data )
+    {
+        *this = Mat(1, 1, DataType<_Tp>::type, (void*)&elem).clone();
+        return;
+    }
+    CV_Assert(DataType<_Tp>::type == type() && cols == 1
+              /* && dims == 2 (cols == 1 implies dims == 2) */);
+    const uchar* tmp = dataend + step[0];
+    if( !isSubmatrix() && isContinuous() && tmp <= datalimit )
+    {
+        *(_Tp*)(data + (size.p[0]++) * step.p[0]) = elem;
+        dataend = tmp;
+    }
+    else
+        push_back_(&elem);
+}
+
+template<typename _Tp> inline
+void Mat::push_back(const Mat_<_Tp>& m)
+{
+    push_back((const Mat&)m);
+}
+
+template<> inline
+void Mat::push_back(const MatExpr& expr)
+{
+    push_back(static_cast<Mat>(expr));
+}
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+
+inline
+Mat::Mat(Mat&& m)
+    : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data),
+      datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit), allocator(m.allocator),
+      u(m.u), size(&rows)
+{
+    if (m.dims <= 2)  // move new step/size info
+    {
+        step[0] = m.step[0];
+        step[1] = m.step[1];
+    }
+    else
+    {
+        CV_DbgAssert(m.step.p != m.step.buf);
+        step.p = m.step.p;
+        size.p = m.size.p;
+        m.step.p = m.step.buf;
+        m.size.p = &m.rows;
+    }
+    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
+    m.data = NULL; m.datastart = NULL; m.dataend = NULL; m.datalimit = NULL;
+    m.allocator = NULL;
+    m.u = NULL;
+}
+
+inline
+Mat& Mat::operator = (Mat&& m)
+{
+    if (this == &m)
+      return *this;
+
+    release();
+    flags = m.flags; dims = m.dims; rows = m.rows; cols = m.cols; data = m.data;
+    datastart = m.datastart; dataend = m.dataend; datalimit = m.datalimit; allocator = m.allocator;
+    u = m.u;
+    if (step.p != step.buf) // release self step/size
+    {
+        fastFree(step.p);
+        step.p = step.buf;
+        size.p = &rows;
+    }
+    if (m.dims <= 2) // move new step/size info
+    {
+        step[0] = m.step[0];
+        step[1] = m.step[1];
+    }
+    else
+    {
+        CV_DbgAssert(m.step.p != m.step.buf);
+        step.p = m.step.p;
+        size.p = m.size.p;
+        m.step.p = m.step.buf;
+        m.size.p = &m.rows;
+    }
+    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
+    m.data = NULL; m.datastart = NULL; m.dataend = NULL; m.datalimit = NULL;
+    m.allocator = NULL;
+    m.u = NULL;
+    return *this;
+}
+
+#endif
+
+
+///////////////////////////// MatSize ////////////////////////////
+
+inline
+MatSize::MatSize(int* _p)
+    : p(_p) {}
+
+inline
+Size MatSize::operator()() const
+{
+    CV_DbgAssert(p[-1] <= 2);
+    return Size(p[1], p[0]);
+}
+
+inline
+const int& MatSize::operator[](int i) const
+{
+    return p[i];
+}
+
+inline
+int& MatSize::operator[](int i)
+{
+    return p[i];
+}
+
+inline
+MatSize::operator const int*() const
+{
+    return p;
+}
+
+inline
+bool MatSize::operator == (const MatSize& sz) const
+{
+    int d = p[-1];
+    int dsz = sz.p[-1];
+    if( d != dsz )
+        return false;
+    if( d == 2 )
+        return p[0] == sz.p[0] && p[1] == sz.p[1];
+
+    for( int i = 0; i < d; i++ )
+        if( p[i] != sz.p[i] )
+            return false;
+    return true;
+}
+
+inline
+bool MatSize::operator != (const MatSize& sz) const
+{
+    return !(*this == sz);
+}
+
+
+
+///////////////////////////// MatStep ////////////////////////////
+
+inline
+MatStep::MatStep()
+{
+    p = buf; p[0] = p[1] = 0;
+}
+
+inline
+MatStep::MatStep(size_t s)
+{
+    p = buf; p[0] = s; p[1] = 0;
+}
+
+inline
+const size_t& MatStep::operator[](int i) const
+{
+    return p[i];
+}
+
+inline
+size_t& MatStep::operator[](int i)
+{
+    return p[i];
+}
+
+inline MatStep::operator size_t() const
+{
+    CV_DbgAssert( p == buf );
+    return buf[0];
+}
+
+inline MatStep& MatStep::operator = (size_t s)
+{
+    CV_DbgAssert( p == buf );
+    buf[0] = s;
+    return *this;
+}
+
+
+
+////////////////////////////// Mat_<_Tp> ////////////////////////////
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_()
+    : Mat()
+{
+    flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<_Tp>::type;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(int _rows, int _cols)
+    : Mat(_rows, _cols, DataType<_Tp>::type)
+{
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(int _rows, int _cols, const _Tp& value)
+    : Mat(_rows, _cols, DataType<_Tp>::type)
+{
+    *this = value;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(Size _sz)
+    : Mat(_sz.height, _sz.width, DataType<_Tp>::type)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(Size _sz, const _Tp& value)
+    : Mat(_sz.height, _sz.width, DataType<_Tp>::type)
+{
+    *this = value;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(int _dims, const int* _sz)
+    : Mat(_dims, _sz, DataType<_Tp>::type)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(int _dims, const int* _sz, const _Tp& _s)
+    : Mat(_dims, _sz, DataType<_Tp>::type, Scalar(_s))
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(int _dims, const int* _sz, _Tp* _data, const size_t* _steps)
+    : Mat(_dims, _sz, DataType<_Tp>::type, _data, _steps)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Mat_<_Tp>& m, const Range* ranges)
+    : Mat(m, ranges)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Mat_<_Tp>& m, const std::vector<Range>& ranges)
+    : Mat(m, ranges)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Mat& m)
+    : Mat()
+{
+    flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<_Tp>::type;
+    *this = m;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Mat_& m)
+    : Mat(m)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(int _rows, int _cols, _Tp* _data, size_t steps)
+    : Mat(_rows, _cols, DataType<_Tp>::type, _data, steps)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Mat_& m, const Range& _rowRange, const Range& _colRange)
+    : Mat(m, _rowRange, _colRange)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Mat_& m, const Rect& roi)
+    : Mat(m, roi)
+{}
+
+template<typename _Tp> template<int n> inline
+Mat_<_Tp>::Mat_(const Vec<typename DataType<_Tp>::channel_type, n>& vec, bool copyData)
+    : Mat(n / DataType<_Tp>::channels, 1, DataType<_Tp>::type, (void*)&vec)
+{
+    CV_Assert(n%DataType<_Tp>::channels == 0);
+    if( copyData )
+        *this = clone();
+}
+
+template<typename _Tp> template<int m, int n> inline
+Mat_<_Tp>::Mat_(const Matx<typename DataType<_Tp>::channel_type, m, n>& M, bool copyData)
+    : Mat(m, n / DataType<_Tp>::channels, DataType<_Tp>::type, (void*)&M)
+{
+    CV_Assert(n % DataType<_Tp>::channels == 0);
+    if( copyData )
+        *this = clone();
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Point_<typename DataType<_Tp>::channel_type>& pt, bool copyData)
+    : Mat(2 / DataType<_Tp>::channels, 1, DataType<_Tp>::type, (void*)&pt)
+{
+    CV_Assert(2 % DataType<_Tp>::channels == 0);
+    if( copyData )
+        *this = clone();
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Point3_<typename DataType<_Tp>::channel_type>& pt, bool copyData)
+    : Mat(3 / DataType<_Tp>::channels, 1, DataType<_Tp>::type, (void*)&pt)
+{
+    CV_Assert(3 % DataType<_Tp>::channels == 0);
+    if( copyData )
+        *this = clone();
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const MatCommaInitializer_<_Tp>& commaInitializer)
+    : Mat(commaInitializer)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const std::vector<_Tp>& vec, bool copyData)
+    : Mat(vec, copyData)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>& Mat_<_Tp>::operator = (const Mat& m)
+{
+    if( DataType<_Tp>::type == m.type() )
+    {
+        Mat::operator = (m);
+        return *this;
+    }
+    if( DataType<_Tp>::depth == m.depth() )
+    {
+        return (*this = m.reshape(DataType<_Tp>::channels, m.dims, 0));
+    }
+    CV_DbgAssert(DataType<_Tp>::channels == m.channels());
+    m.convertTo(*this, type());
+    return *this;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>& Mat_<_Tp>::operator = (const Mat_& m)
+{
+    Mat::operator=(m);
+    return *this;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>& Mat_<_Tp>::operator = (const _Tp& s)
+{
+    typedef typename DataType<_Tp>::vec_type VT;
+    Mat::operator=(Scalar((const VT&)s));
+    return *this;
+}
+
+template<typename _Tp> inline
+void Mat_<_Tp>::create(int _rows, int _cols)
+{
+    Mat::create(_rows, _cols, DataType<_Tp>::type);
+}
+
+template<typename _Tp> inline
+void Mat_<_Tp>::create(Size _sz)
+{
+    Mat::create(_sz, DataType<_Tp>::type);
+}
+
+template<typename _Tp> inline
+void Mat_<_Tp>::create(int _dims, const int* _sz)
+{
+    Mat::create(_dims, _sz, DataType<_Tp>::type);
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::cross(const Mat_& m) const
+{
+    return Mat_<_Tp>(Mat::cross(m));
+}
+
+template<typename _Tp> template<typename T2> inline
+Mat_<_Tp>::operator Mat_<T2>() const
+{
+    return Mat_<T2>(*this);
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::row(int y) const
+{
+    return Mat_(*this, Range(y, y+1), Range::all());
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::col(int x) const
+{
+    return Mat_(*this, Range::all(), Range(x, x+1));
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::diag(int d) const
+{
+    return Mat_(Mat::diag(d));
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::clone() const
+{
+    return Mat_(Mat::clone());
+}
+
+template<typename _Tp> inline
+size_t Mat_<_Tp>::elemSize() const
+{
+    CV_DbgAssert( Mat::elemSize() == sizeof(_Tp) );
+    return sizeof(_Tp);
+}
+
+template<typename _Tp> inline
+size_t Mat_<_Tp>::elemSize1() const
+{
+    CV_DbgAssert( Mat::elemSize1() == sizeof(_Tp) / DataType<_Tp>::channels );
+    return sizeof(_Tp) / DataType<_Tp>::channels;
+}
+
+template<typename _Tp> inline
+int Mat_<_Tp>::type() const
+{
+    CV_DbgAssert( Mat::type() == DataType<_Tp>::type );
+    return DataType<_Tp>::type;
+}
+
+template<typename _Tp> inline
+int Mat_<_Tp>::depth() const
+{
+    CV_DbgAssert( Mat::depth() == DataType<_Tp>::depth );
+    return DataType<_Tp>::depth;
+}
+
+template<typename _Tp> inline
+int Mat_<_Tp>::channels() const
+{
+    CV_DbgAssert( Mat::channels() == DataType<_Tp>::channels );
+    return DataType<_Tp>::channels;
+}
+
+template<typename _Tp> inline
+size_t Mat_<_Tp>::stepT(int i) const
+{
+    return step.p[i] / elemSize();
+}
+
+template<typename _Tp> inline
+size_t Mat_<_Tp>::step1(int i) const
+{
+    return step.p[i] / elemSize1();
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>& Mat_<_Tp>::adjustROI( int dtop, int dbottom, int dleft, int dright )
+{
+    return (Mat_<_Tp>&)(Mat::adjustROI(dtop, dbottom, dleft, dright));
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::operator()( const Range& _rowRange, const Range& _colRange ) const
+{
+    return Mat_<_Tp>(*this, _rowRange, _colRange);
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::operator()( const Rect& roi ) const
+{
+    return Mat_<_Tp>(*this, roi);
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::operator()( const Range* ranges ) const
+{
+    return Mat_<_Tp>(*this, ranges);
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::operator()(const std::vector<Range>& ranges) const
+{
+    return Mat_<_Tp>(*this, ranges);
+}
+
+template<typename _Tp> inline
+_Tp* Mat_<_Tp>::operator [](int y)
+{
+    CV_DbgAssert( 0 <= y && y < rows );
+    return (_Tp*)(data + y*step.p[0]);
+}
+
+template<typename _Tp> inline
+const _Tp* Mat_<_Tp>::operator [](int y) const
+{
+    CV_DbgAssert( 0 <= y && y < rows );
+    return (const _Tp*)(data + y*step.p[0]);
+}
+
+template<typename _Tp> inline
+_Tp& Mat_<_Tp>::operator ()(int i0, int i1)
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    CV_DbgAssert(type() == DataType<_Tp>::type);
+    return ((_Tp*)(data + step.p[0] * i0))[i1];
+}
+
+template<typename _Tp> inline
+const _Tp& Mat_<_Tp>::operator ()(int i0, int i1) const
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    CV_DbgAssert(type() == DataType<_Tp>::type);
+    return ((const _Tp*)(data + step.p[0] * i0))[i1];
+}
+
+template<typename _Tp> inline
+_Tp& Mat_<_Tp>::operator ()(Point pt)
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)pt.x < (unsigned)size.p[1]);
+    CV_DbgAssert(type() == DataType<_Tp>::type);
+    return ((_Tp*)(data + step.p[0] * pt.y))[pt.x];
+}
+
+template<typename _Tp> inline
+const _Tp& Mat_<_Tp>::operator ()(Point pt) const
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)pt.x < (unsigned)size.p[1]);
+    CV_DbgAssert(type() == DataType<_Tp>::type);
+    return ((const _Tp*)(data + step.p[0] * pt.y))[pt.x];
+}
+
+template<typename _Tp> inline
+_Tp& Mat_<_Tp>::operator ()(const int* idx)
+{
+    return Mat::at<_Tp>(idx);
+}
+
+template<typename _Tp> inline
+const _Tp& Mat_<_Tp>::operator ()(const int* idx) const
+{
+    return Mat::at<_Tp>(idx);
+}
+
+template<typename _Tp> template<int n> inline
+_Tp& Mat_<_Tp>::operator ()(const Vec<int, n>& idx)
+{
+    return Mat::at<_Tp>(idx);
+}
+
+template<typename _Tp> template<int n> inline
+const _Tp& Mat_<_Tp>::operator ()(const Vec<int, n>& idx) const
+{
+    return Mat::at<_Tp>(idx);
+}
+
+template<typename _Tp> inline
+_Tp& Mat_<_Tp>::operator ()(int i0)
+{
+    return this->at<_Tp>(i0);
+}
+
+template<typename _Tp> inline
+const _Tp& Mat_<_Tp>::operator ()(int i0) const
+{
+    return this->at<_Tp>(i0);
+}
+
+template<typename _Tp> inline
+_Tp& Mat_<_Tp>::operator ()(int i0, int i1, int i2)
+{
+    return this->at<_Tp>(i0, i1, i2);
+}
+
+template<typename _Tp> inline
+const _Tp& Mat_<_Tp>::operator ()(int i0, int i1, int i2) const
+{
+    return this->at<_Tp>(i0, i1, i2);
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::operator std::vector<_Tp>() const
+{
+    std::vector<_Tp> v;
+    copyTo(v);
+    return v;
+}
+
+template<typename _Tp> template<int n> inline
+Mat_<_Tp>::operator Vec<typename DataType<_Tp>::channel_type, n>() const
+{
+    CV_Assert(n % DataType<_Tp>::channels == 0);
+
+#if defined _MSC_VER
+    const Mat* pMat = (const Mat*)this; // workaround for MSVS <= 2012 compiler bugs (but GCC 4.6 dislikes this workaround)
+    return pMat->operator Vec<typename DataType<_Tp>::channel_type, n>();
+#else
+    return this->Mat::operator Vec<typename DataType<_Tp>::channel_type, n>();
+#endif
+}
+
+template<typename _Tp> template<int m, int n> inline
+Mat_<_Tp>::operator Matx<typename DataType<_Tp>::channel_type, m, n>() const
+{
+    CV_Assert(n % DataType<_Tp>::channels == 0);
+
+#if defined _MSC_VER
+    const Mat* pMat = (const Mat*)this; // workaround for MSVS <= 2012 compiler bugs (but GCC 4.6 dislikes this workaround)
+    Matx<typename DataType<_Tp>::channel_type, m, n> res = pMat->operator Matx<typename DataType<_Tp>::channel_type, m, n>();
+    return res;
+#else
+    Matx<typename DataType<_Tp>::channel_type, m, n> res = this->Mat::operator Matx<typename DataType<_Tp>::channel_type, m, n>();
+    return res;
+#endif
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp> Mat_<_Tp>::begin() const
+{
+    return Mat::begin<_Tp>();
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp> Mat_<_Tp>::end() const
+{
+    return Mat::end<_Tp>();
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp> Mat_<_Tp>::begin()
+{
+    return Mat::begin<_Tp>();
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp> Mat_<_Tp>::end()
+{
+    return Mat::end<_Tp>();
+}
+
+template<typename _Tp> template<typename Functor> inline
+void Mat_<_Tp>::forEach(const Functor& operation) {
+    Mat::forEach<_Tp, Functor>(operation);
+}
+
+template<typename _Tp> template<typename Functor> inline
+void Mat_<_Tp>::forEach(const Functor& operation) const {
+    Mat::forEach<_Tp, Functor>(operation);
+}
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(Mat_&& m)
+    : Mat(m)
+{
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>& Mat_<_Tp>::operator = (Mat_&& m)
+{
+    Mat::operator = (m);
+    return *this;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(Mat&& m)
+    : Mat()
+{
+    flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<_Tp>::type;
+    *this = m;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>& Mat_<_Tp>::operator = (Mat&& m)
+{
+    if( DataType<_Tp>::type == m.type() )
+    {
+        Mat::operator = ((Mat&&)m);
+        return *this;
+    }
+    if( DataType<_Tp>::depth == m.depth() )
+    {
+        Mat::operator = ((Mat&&)m.reshape(DataType<_Tp>::channels, m.dims, 0));
+        return *this;
+    }
+    CV_DbgAssert(DataType<_Tp>::channels == m.channels());
+    m.convertTo(*this, type());
+    return *this;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(MatExpr&& e)
+    : Mat()
+{
+    flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<_Tp>::type;
+    *this = Mat(e);
+}
+
+#endif
+
+///////////////////////////// SparseMat /////////////////////////////
+
+inline
+SparseMat::SparseMat()
+    : flags(MAGIC_VAL), hdr(0)
+{}
+
+inline
+SparseMat::SparseMat(int _dims, const int* _sizes, int _type)
+    : flags(MAGIC_VAL), hdr(0)
+{
+    create(_dims, _sizes, _type);
+}
+
+inline
+SparseMat::SparseMat(const SparseMat& m)
+    : flags(m.flags), hdr(m.hdr)
+{
+    addref();
+}
+
+inline
+SparseMat::~SparseMat()
+{
+    release();
+}
+
+inline
+SparseMat& SparseMat::operator = (const SparseMat& m)
+{
+    if( this != &m )
+    {
+        if( m.hdr )
+            CV_XADD(&m.hdr->refcount, 1);
+        release();
+        flags = m.flags;
+        hdr = m.hdr;
+    }
+    return *this;
+}
+
+inline
+SparseMat& SparseMat::operator = (const Mat& m)
+{
+    return (*this = SparseMat(m));
+}
+
+inline
+SparseMat SparseMat::clone() const
+{
+    SparseMat temp;
+    this->copyTo(temp);
+    return temp;
+}
+
+inline
+void SparseMat::assignTo( SparseMat& m, int _type ) const
+{
+    if( _type < 0 )
+        m = *this;
+    else
+        convertTo(m, _type);
+}
+
+inline
+void SparseMat::addref()
+{
+    if( hdr )
+        CV_XADD(&hdr->refcount, 1);
+}
+
+inline
+void SparseMat::release()
+{
+    if( hdr && CV_XADD(&hdr->refcount, -1) == 1 )
+        delete hdr;
+    hdr = 0;
+}
+
+inline
+size_t SparseMat::elemSize() const
+{
+    return CV_ELEM_SIZE(flags);
+}
+
+inline
+size_t SparseMat::elemSize1() const
+{
+    return CV_ELEM_SIZE1(flags);
+}
+
+inline
+int SparseMat::type() const
+{
+    return CV_MAT_TYPE(flags);
+}
+
+inline
+int SparseMat::depth() const
+{
+    return CV_MAT_DEPTH(flags);
+}
+
+inline
+int SparseMat::channels() const
+{
+    return CV_MAT_CN(flags);
+}
+
+inline
+const int* SparseMat::size() const
+{
+    return hdr ? hdr->size : 0;
+}
+
+inline
+int SparseMat::size(int i) const
+{
+    if( hdr )
+    {
+        CV_DbgAssert((unsigned)i < (unsigned)hdr->dims);
+        return hdr->size[i];
+    }
+    return 0;
+}
+
+inline
+int SparseMat::dims() const
+{
+    return hdr ? hdr->dims : 0;
+}
+
+inline
+size_t SparseMat::nzcount() const
+{
+    return hdr ? hdr->nodeCount : 0;
+}
+
+inline
+size_t SparseMat::hash(int i0) const
+{
+    return (size_t)i0;
+}
+
+inline
+size_t SparseMat::hash(int i0, int i1) const
+{
+    return (size_t)(unsigned)i0 * HASH_SCALE + (unsigned)i1;
+}
+
+inline
+size_t SparseMat::hash(int i0, int i1, int i2) const
+{
+    return ((size_t)(unsigned)i0 * HASH_SCALE + (unsigned)i1) * HASH_SCALE + (unsigned)i2;
+}
+
+inline
+size_t SparseMat::hash(const int* idx) const
+{
+    size_t h = (unsigned)idx[0];
+    if( !hdr )
+        return 0;
+    int d = hdr->dims;
+    for(int i = 1; i < d; i++ )
+        h = h * HASH_SCALE + (unsigned)idx[i];
+    return h;
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat::ref(int i0, size_t* hashval)
+{
+    return *(_Tp*)((SparseMat*)this)->ptr(i0, true, hashval);
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat::ref(int i0, int i1, size_t* hashval)
+{
+    return *(_Tp*)((SparseMat*)this)->ptr(i0, i1, true, hashval);
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat::ref(int i0, int i1, int i2, size_t* hashval)
+{
+    return *(_Tp*)((SparseMat*)this)->ptr(i0, i1, i2, true, hashval);
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat::ref(const int* idx, size_t* hashval)
+{
+    return *(_Tp*)((SparseMat*)this)->ptr(idx, true, hashval);
+}
+
+template<typename _Tp> inline
+_Tp SparseMat::value(int i0, size_t* hashval) const
+{
+    const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(i0, false, hashval);
+    return p ? *p : _Tp();
+}
+
+template<typename _Tp> inline
+_Tp SparseMat::value(int i0, int i1, size_t* hashval) const
+{
+    const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(i0, i1, false, hashval);
+    return p ? *p : _Tp();
+}
+
+template<typename _Tp> inline
+_Tp SparseMat::value(int i0, int i1, int i2, size_t* hashval) const
+{
+    const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(i0, i1, i2, false, hashval);
+    return p ? *p : _Tp();
+}
+
+template<typename _Tp> inline
+_Tp SparseMat::value(const int* idx, size_t* hashval) const
+{
+    const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(idx, false, hashval);
+    return p ? *p : _Tp();
+}
+
+template<typename _Tp> inline
+const _Tp* SparseMat::find(int i0, size_t* hashval) const
+{
+    return (const _Tp*)((SparseMat*)this)->ptr(i0, false, hashval);
+}
+
+template<typename _Tp> inline
+const _Tp* SparseMat::find(int i0, int i1, size_t* hashval) const
+{
+    return (const _Tp*)((SparseMat*)this)->ptr(i0, i1, false, hashval);
+}
+
+template<typename _Tp> inline
+const _Tp* SparseMat::find(int i0, int i1, int i2, size_t* hashval) const
+{
+    return (const _Tp*)((SparseMat*)this)->ptr(i0, i1, i2, false, hashval);
+}
+
+template<typename _Tp> inline
+const _Tp* SparseMat::find(const int* idx, size_t* hashval) const
+{
+    return (const _Tp*)((SparseMat*)this)->ptr(idx, false, hashval);
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat::value(Node* n)
+{
+    return *(_Tp*)((uchar*)n + hdr->valueOffset);
+}
+
+template<typename _Tp> inline
+const _Tp& SparseMat::value(const Node* n) const
+{
+    return *(const _Tp*)((const uchar*)n + hdr->valueOffset);
+}
+
+inline
+SparseMat::Node* SparseMat::node(size_t nidx)
+{
+    return (Node*)(void*)&hdr->pool[nidx];
+}
+
+inline
+const SparseMat::Node* SparseMat::node(size_t nidx) const
+{
+    return (const Node*)(const void*)&hdr->pool[nidx];
+}
+
+inline
+SparseMatIterator SparseMat::begin()
+{
+    return SparseMatIterator(this);
+}
+
+inline
+SparseMatConstIterator SparseMat::begin() const
+{
+    return SparseMatConstIterator(this);
+}
+
+inline
+SparseMatIterator SparseMat::end()
+{
+    SparseMatIterator it(this);
+    it.seekEnd();
+    return it;
+}
+
+inline
+SparseMatConstIterator SparseMat::end() const
+{
+    SparseMatConstIterator it(this);
+    it.seekEnd();
+    return it;
+}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp> SparseMat::begin()
+{
+    return SparseMatIterator_<_Tp>(this);
+}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp> SparseMat::begin() const
+{
+    return SparseMatConstIterator_<_Tp>(this);
+}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp> SparseMat::end()
+{
+    SparseMatIterator_<_Tp> it(this);
+    it.seekEnd();
+    return it;
+}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp> SparseMat::end() const
+{
+    SparseMatConstIterator_<_Tp> it(this);
+    it.seekEnd();
+    return it;
+}
+
+
+
+///////////////////////////// SparseMat_ ////////////////////////////
+
+template<typename _Tp> inline
+SparseMat_<_Tp>::SparseMat_()
+{
+    flags = MAGIC_VAL | DataType<_Tp>::type;
+}
+
+template<typename _Tp> inline
+SparseMat_<_Tp>::SparseMat_(int _dims, const int* _sizes)
+    : SparseMat(_dims, _sizes, DataType<_Tp>::type)
+{}
+
+template<typename _Tp> inline
+SparseMat_<_Tp>::SparseMat_(const SparseMat& m)
+{
+    if( m.type() == DataType<_Tp>::type )
+        *this = (const SparseMat_<_Tp>&)m;
+    else
+        m.convertTo(*this, DataType<_Tp>::type);
+}
+
+template<typename _Tp> inline
+SparseMat_<_Tp>::SparseMat_(const SparseMat_<_Tp>& m)
+{
+    this->flags = m.flags;
+    this->hdr = m.hdr;
+    if( this->hdr )
+        CV_XADD(&this->hdr->refcount, 1);
+}
+
+template<typename _Tp> inline
+SparseMat_<_Tp>::SparseMat_(const Mat& m)
+{
+    SparseMat sm(m);
+    *this = sm;
+}
+
+template<typename _Tp> inline
+SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const SparseMat_<_Tp>& m)
+{
+    if( this != &m )
+    {
+        if( m.hdr ) CV_XADD(&m.hdr->refcount, 1);
+        release();
+        flags = m.flags;
+        hdr = m.hdr;
+    }
+    return *this;
+}
+
+template<typename _Tp> inline
+SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const SparseMat& m)
+{
+    if( m.type() == DataType<_Tp>::type )
+        return (*this = (const SparseMat_<_Tp>&)m);
+    m.convertTo(*this, DataType<_Tp>::type);
+    return *this;
+}
+
+template<typename _Tp> inline
+SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const Mat& m)
+{
+    return (*this = SparseMat(m));
+}
+
+template<typename _Tp> inline
+SparseMat_<_Tp> SparseMat_<_Tp>::clone() const
+{
+    SparseMat_<_Tp> m;
+    this->copyTo(m);
+    return m;
+}
+
+template<typename _Tp> inline
+void SparseMat_<_Tp>::create(int _dims, const int* _sizes)
+{
+    SparseMat::create(_dims, _sizes, DataType<_Tp>::type);
+}
+
+template<typename _Tp> inline
+int SparseMat_<_Tp>::type() const
+{
+    return DataType<_Tp>::type;
+}
+
+template<typename _Tp> inline
+int SparseMat_<_Tp>::depth() const
+{
+    return DataType<_Tp>::depth;
+}
+
+template<typename _Tp> inline
+int SparseMat_<_Tp>::channels() const
+{
+    return DataType<_Tp>::channels;
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat_<_Tp>::ref(int i0, size_t* hashval)
+{
+    return SparseMat::ref<_Tp>(i0, hashval);
+}
+
+template<typename _Tp> inline
+_Tp SparseMat_<_Tp>::operator()(int i0, size_t* hashval) const
+{
+    return SparseMat::value<_Tp>(i0, hashval);
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat_<_Tp>::ref(int i0, int i1, size_t* hashval)
+{
+    return SparseMat::ref<_Tp>(i0, i1, hashval);
+}
+
+template<typename _Tp> inline
+_Tp SparseMat_<_Tp>::operator()(int i0, int i1, size_t* hashval) const
+{
+    return SparseMat::value<_Tp>(i0, i1, hashval);
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat_<_Tp>::ref(int i0, int i1, int i2, size_t* hashval)
+{
+    return SparseMat::ref<_Tp>(i0, i1, i2, hashval);
+}
+
+template<typename _Tp> inline
+_Tp SparseMat_<_Tp>::operator()(int i0, int i1, int i2, size_t* hashval) const
+{
+    return SparseMat::value<_Tp>(i0, i1, i2, hashval);
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat_<_Tp>::ref(const int* idx, size_t* hashval)
+{
+    return SparseMat::ref<_Tp>(idx, hashval);
+}
+
+template<typename _Tp> inline
+_Tp SparseMat_<_Tp>::operator()(const int* idx, size_t* hashval) const
+{
+    return SparseMat::value<_Tp>(idx, hashval);
+}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp> SparseMat_<_Tp>::begin()
+{
+    return SparseMatIterator_<_Tp>(this);
+}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp> SparseMat_<_Tp>::begin() const
+{
+    return SparseMatConstIterator_<_Tp>(this);
+}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp> SparseMat_<_Tp>::end()
+{
+    SparseMatIterator_<_Tp> it(this);
+    it.seekEnd();
+    return it;
+}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp> SparseMat_<_Tp>::end() const
+{
+    SparseMatConstIterator_<_Tp> it(this);
+    it.seekEnd();
+    return it;
+}
+
+
+
+////////////////////////// MatConstIterator /////////////////////////
+
+inline
+MatConstIterator::MatConstIterator()
+    : m(0), elemSize(0), ptr(0), sliceStart(0), sliceEnd(0)
+{}
+
+inline
+MatConstIterator::MatConstIterator(const Mat* _m)
+    : m(_m), elemSize(_m->elemSize()), ptr(0), sliceStart(0), sliceEnd(0)
+{
+    if( m && m->isContinuous() )
+    {
+        sliceStart = m->ptr();
+        sliceEnd = sliceStart + m->total()*elemSize;
+    }
+    seek((const int*)0);
+}
+
+inline
+MatConstIterator::MatConstIterator(const Mat* _m, int _row, int _col)
+    : m(_m), elemSize(_m->elemSize()), ptr(0), sliceStart(0), sliceEnd(0)
+{
+    CV_Assert(m && m->dims <= 2);
+    if( m->isContinuous() )
+    {
+        sliceStart = m->ptr();
+        sliceEnd = sliceStart + m->total()*elemSize;
+    }
+    int idx[] = {_row, _col};
+    seek(idx);
+}
+
+inline
+MatConstIterator::MatConstIterator(const Mat* _m, Point _pt)
+    : m(_m), elemSize(_m->elemSize()), ptr(0), sliceStart(0), sliceEnd(0)
+{
+    CV_Assert(m && m->dims <= 2);
+    if( m->isContinuous() )
+    {
+        sliceStart = m->ptr();
+        sliceEnd = sliceStart + m->total()*elemSize;
+    }
+    int idx[] = {_pt.y, _pt.x};
+    seek(idx);
+}
+
+inline
+MatConstIterator::MatConstIterator(const MatConstIterator& it)
+    : m(it.m), elemSize(it.elemSize), ptr(it.ptr), sliceStart(it.sliceStart), sliceEnd(it.sliceEnd)
+{}
+
+inline
+MatConstIterator& MatConstIterator::operator = (const MatConstIterator& it )
+{
+    m = it.m; elemSize = it.elemSize; ptr = it.ptr;
+    sliceStart = it.sliceStart; sliceEnd = it.sliceEnd;
+    return *this;
+}
+
+inline
+const uchar* MatConstIterator::operator *() const
+{
+    return ptr;
+}
+
+inline MatConstIterator& MatConstIterator::operator += (ptrdiff_t ofs)
+{
+    if( !m || ofs == 0 )
+        return *this;
+    ptrdiff_t ofsb = ofs*elemSize;
+    ptr += ofsb;
+    if( ptr < sliceStart || sliceEnd <= ptr )
+    {
+        ptr -= ofsb;
+        seek(ofs, true);
+    }
+    return *this;
+}
+
+inline
+MatConstIterator& MatConstIterator::operator -= (ptrdiff_t ofs)
+{
+    return (*this += -ofs);
+}
+
+inline
+MatConstIterator& MatConstIterator::operator --()
+{
+    if( m && (ptr -= elemSize) < sliceStart )
+    {
+        ptr += elemSize;
+        seek(-1, true);
+    }
+    return *this;
+}
+
+inline
+MatConstIterator MatConstIterator::operator --(int)
+{
+    MatConstIterator b = *this;
+    *this += -1;
+    return b;
+}
+
+inline
+MatConstIterator& MatConstIterator::operator ++()
+{
+    if( m && (ptr += elemSize) >= sliceEnd )
+    {
+        ptr -= elemSize;
+        seek(1, true);
+    }
+    return *this;
+}
+
+inline MatConstIterator MatConstIterator::operator ++(int)
+{
+    MatConstIterator b = *this;
+    *this += 1;
+    return b;
+}
+
+
+static inline
+bool operator == (const MatConstIterator& a, const MatConstIterator& b)
+{
+    return a.m == b.m && a.ptr == b.ptr;
+}
+
+static inline
+bool operator != (const MatConstIterator& a, const MatConstIterator& b)
+{
+    return !(a == b);
+}
+
+static inline
+bool operator < (const MatConstIterator& a, const MatConstIterator& b)
+{
+    return a.ptr < b.ptr;
+}
+
+static inline
+bool operator > (const MatConstIterator& a, const MatConstIterator& b)
+{
+    return a.ptr > b.ptr;
+}
+
+static inline
+bool operator <= (const MatConstIterator& a, const MatConstIterator& b)
+{
+    return a.ptr <= b.ptr;
+}
+
+static inline
+bool operator >= (const MatConstIterator& a, const MatConstIterator& b)
+{
+    return a.ptr >= b.ptr;
+}
+
+static inline
+ptrdiff_t operator - (const MatConstIterator& b, const MatConstIterator& a)
+{
+    if( a.m != b.m )
+        return ((size_t)(-1) >> 1);
+    if( a.sliceEnd == b.sliceEnd )
+        return (b.ptr - a.ptr)/static_cast<ptrdiff_t>(b.elemSize);
+
+    return b.lpos() - a.lpos();
+}
+
+static inline
+MatConstIterator operator + (const MatConstIterator& a, ptrdiff_t ofs)
+{
+    MatConstIterator b = a;
+    return b += ofs;
+}
+
+static inline
+MatConstIterator operator + (ptrdiff_t ofs, const MatConstIterator& a)
+{
+    MatConstIterator b = a;
+    return b += ofs;
+}
+
+static inline
+MatConstIterator operator - (const MatConstIterator& a, ptrdiff_t ofs)
+{
+    MatConstIterator b = a;
+    return b += -ofs;
+}
+
+
+inline
+const uchar* MatConstIterator::operator [](ptrdiff_t i) const
+{
+    return *(*this + i);
+}
+
+
+
+///////////////////////// MatConstIterator_ /////////////////////////
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>::MatConstIterator_()
+{}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>::MatConstIterator_(const Mat_<_Tp>* _m)
+    : MatConstIterator(_m)
+{}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>::MatConstIterator_(const Mat_<_Tp>* _m, int _row, int _col)
+    : MatConstIterator(_m, _row, _col)
+{}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>::MatConstIterator_(const Mat_<_Tp>* _m, Point _pt)
+    : MatConstIterator(_m, _pt)
+{}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>::MatConstIterator_(const MatConstIterator_& it)
+    : MatConstIterator(it)
+{}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator = (const MatConstIterator_& it )
+{
+    MatConstIterator::operator = (it);
+    return *this;
+}
+
+template<typename _Tp> inline
+const _Tp& MatConstIterator_<_Tp>::operator *() const
+{
+    return *(_Tp*)(this->ptr);
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator += (ptrdiff_t ofs)
+{
+    MatConstIterator::operator += (ofs);
+    return *this;
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator -= (ptrdiff_t ofs)
+{
+    return (*this += -ofs);
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator --()
+{
+    MatConstIterator::operator --();
+    return *this;
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp> MatConstIterator_<_Tp>::operator --(int)
+{
+    MatConstIterator_ b = *this;
+    MatConstIterator::operator --();
+    return b;
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator ++()
+{
+    MatConstIterator::operator ++();
+    return *this;
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp> MatConstIterator_<_Tp>::operator ++(int)
+{
+    MatConstIterator_ b = *this;
+    MatConstIterator::operator ++();
+    return b;
+}
+
+
+template<typename _Tp> inline
+Point MatConstIterator_<_Tp>::pos() const
+{
+    if( !m )
+        return Point();
+    CV_DbgAssert( m->dims <= 2 );
+    if( m->isContinuous() )
+    {
+        ptrdiff_t ofs = (const _Tp*)ptr - (const _Tp*)m->data;
+        int y = (int)(ofs / m->cols);
+        int x = (int)(ofs - (ptrdiff_t)y * m->cols);
+        return Point(x, y);
+    }
+    else
+    {
+        ptrdiff_t ofs = (uchar*)ptr - m->data;
+        int y = (int)(ofs / m->step);
+        int x = (int)((ofs - y * m->step)/sizeof(_Tp));
+        return Point(x, y);
+    }
+}
+
+
+template<typename _Tp> static inline
+bool operator == (const MatConstIterator_<_Tp>& a, const MatConstIterator_<_Tp>& b)
+{
+    return a.m == b.m && a.ptr == b.ptr;
+}
+
+template<typename _Tp> static inline
+bool operator != (const MatConstIterator_<_Tp>& a, const MatConstIterator_<_Tp>& b)
+{
+    return a.m != b.m || a.ptr != b.ptr;
+}
+
+template<typename _Tp> static inline
+MatConstIterator_<_Tp> operator + (const MatConstIterator_<_Tp>& a, ptrdiff_t ofs)
+{
+    MatConstIterator t = (const MatConstIterator&)a + ofs;
+    return (MatConstIterator_<_Tp>&)t;
+}
+
+template<typename _Tp> static inline
+MatConstIterator_<_Tp> operator + (ptrdiff_t ofs, const MatConstIterator_<_Tp>& a)
+{
+    MatConstIterator t = (const MatConstIterator&)a + ofs;
+    return (MatConstIterator_<_Tp>&)t;
+}
+
+template<typename _Tp> static inline
+MatConstIterator_<_Tp> operator - (const MatConstIterator_<_Tp>& a, ptrdiff_t ofs)
+{
+    MatConstIterator t = (const MatConstIterator&)a - ofs;
+    return (MatConstIterator_<_Tp>&)t;
+}
+
+template<typename _Tp> inline
+const _Tp& MatConstIterator_<_Tp>::operator [](ptrdiff_t i) const
+{
+    return *(_Tp*)MatConstIterator::operator [](i);
+}
+
+
+
+//////////////////////////// MatIterator_ ///////////////////////////
+
+template<typename _Tp> inline
+MatIterator_<_Tp>::MatIterator_()
+    : MatConstIterator_<_Tp>()
+{}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m)
+    : MatConstIterator_<_Tp>(_m)
+{}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, int _row, int _col)
+    : MatConstIterator_<_Tp>(_m, _row, _col)
+{}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, Point _pt)
+    : MatConstIterator_<_Tp>(_m, _pt)
+{}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, const int* _idx)
+    : MatConstIterator_<_Tp>(_m, _idx)
+{}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>::MatIterator_(const MatIterator_& it)
+    : MatConstIterator_<_Tp>(it)
+{}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>& MatIterator_<_Tp>::operator = (const MatIterator_<_Tp>& it )
+{
+    MatConstIterator::operator = (it);
+    return *this;
+}
+
+template<typename _Tp> inline
+_Tp& MatIterator_<_Tp>::operator *() const
+{
+    return *(_Tp*)(this->ptr);
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>& MatIterator_<_Tp>::operator += (ptrdiff_t ofs)
+{
+    MatConstIterator::operator += (ofs);
+    return *this;
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>& MatIterator_<_Tp>::operator -= (ptrdiff_t ofs)
+{
+    MatConstIterator::operator += (-ofs);
+    return *this;
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>& MatIterator_<_Tp>::operator --()
+{
+    MatConstIterator::operator --();
+    return *this;
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp> MatIterator_<_Tp>::operator --(int)
+{
+    MatIterator_ b = *this;
+    MatConstIterator::operator --();
+    return b;
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>& MatIterator_<_Tp>::operator ++()
+{
+    MatConstIterator::operator ++();
+    return *this;
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp> MatIterator_<_Tp>::operator ++(int)
+{
+    MatIterator_ b = *this;
+    MatConstIterator::operator ++();
+    return b;
+}
+
+template<typename _Tp> inline
+_Tp& MatIterator_<_Tp>::operator [](ptrdiff_t i) const
+{
+    return *(*this + i);
+}
+
+
+template<typename _Tp> static inline
+bool operator == (const MatIterator_<_Tp>& a, const MatIterator_<_Tp>& b)
+{
+    return a.m == b.m && a.ptr == b.ptr;
+}
+
+template<typename _Tp> static inline
+bool operator != (const MatIterator_<_Tp>& a, const MatIterator_<_Tp>& b)
+{
+    return a.m != b.m || a.ptr != b.ptr;
+}
+
+template<typename _Tp> static inline
+MatIterator_<_Tp> operator + (const MatIterator_<_Tp>& a, ptrdiff_t ofs)
+{
+    MatConstIterator t = (const MatConstIterator&)a + ofs;
+    return (MatIterator_<_Tp>&)t;
+}
+
+template<typename _Tp> static inline
+MatIterator_<_Tp> operator + (ptrdiff_t ofs, const MatIterator_<_Tp>& a)
+{
+    MatConstIterator t = (const MatConstIterator&)a + ofs;
+    return (MatIterator_<_Tp>&)t;
+}
+
+template<typename _Tp> static inline
+MatIterator_<_Tp> operator - (const MatIterator_<_Tp>& a, ptrdiff_t ofs)
+{
+    MatConstIterator t = (const MatConstIterator&)a - ofs;
+    return (MatIterator_<_Tp>&)t;
+}
+
+
+
+/////////////////////// SparseMatConstIterator //////////////////////
+
+inline
+SparseMatConstIterator::SparseMatConstIterator()
+    : m(0), hashidx(0), ptr(0)
+{}
+
+inline
+SparseMatConstIterator::SparseMatConstIterator(const SparseMatConstIterator& it)
+    : m(it.m), hashidx(it.hashidx), ptr(it.ptr)
+{}
+
+inline SparseMatConstIterator& SparseMatConstIterator::operator = (const SparseMatConstIterator& it)
+{
+    if( this != &it )
+    {
+        m = it.m;
+        hashidx = it.hashidx;
+        ptr = it.ptr;
+    }
+    return *this;
+}
+
+template<typename _Tp> inline
+const _Tp& SparseMatConstIterator::value() const
+{
+    return *(const _Tp*)ptr;
+}
+
+inline
+const SparseMat::Node* SparseMatConstIterator::node() const
+{
+    return (ptr && m && m->hdr) ? (const SparseMat::Node*)(const void*)(ptr - m->hdr->valueOffset) : 0;
+}
+
+inline
+SparseMatConstIterator SparseMatConstIterator::operator ++(int)
+{
+    SparseMatConstIterator it = *this;
+    ++*this;
+    return it;
+}
+
+inline
+void SparseMatConstIterator::seekEnd()
+{
+    if( m && m->hdr )
+    {
+        hashidx = m->hdr->hashtab.size();
+        ptr = 0;
+    }
+}
+
+
+static inline
+bool operator == (const SparseMatConstIterator& it1, const SparseMatConstIterator& it2)
+{
+    return it1.m == it2.m && it1.ptr == it2.ptr;
+}
+
+static inline
+bool operator != (const SparseMatConstIterator& it1, const SparseMatConstIterator& it2)
+{
+    return !(it1 == it2);
+}
+
+
+
+///////////////////////// SparseMatIterator /////////////////////////
+
+inline
+SparseMatIterator::SparseMatIterator()
+{}
+
+inline
+SparseMatIterator::SparseMatIterator(SparseMat* _m)
+    : SparseMatConstIterator(_m)
+{}
+
+inline
+SparseMatIterator::SparseMatIterator(const SparseMatIterator& it)
+    : SparseMatConstIterator(it)
+{}
+
+inline
+SparseMatIterator& SparseMatIterator::operator = (const SparseMatIterator& it)
+{
+    (SparseMatConstIterator&)*this = it;
+    return *this;
+}
+
+template<typename _Tp> inline
+_Tp& SparseMatIterator::value() const
+{
+    return *(_Tp*)ptr;
+}
+
+inline
+SparseMat::Node* SparseMatIterator::node() const
+{
+    return (SparseMat::Node*)SparseMatConstIterator::node();
+}
+
+inline
+SparseMatIterator& SparseMatIterator::operator ++()
+{
+    SparseMatConstIterator::operator ++();
+    return *this;
+}
+
+inline
+SparseMatIterator SparseMatIterator::operator ++(int)
+{
+    SparseMatIterator it = *this;
+    ++*this;
+    return it;
+}
+
+
+
+////////////////////// SparseMatConstIterator_ //////////////////////
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp>::SparseMatConstIterator_()
+{}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMat_<_Tp>* _m)
+    : SparseMatConstIterator(_m)
+{}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMat* _m)
+    : SparseMatConstIterator(_m)
+{
+    CV_Assert( _m->type() == DataType<_Tp>::type );
+}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMatConstIterator_<_Tp>& it)
+    : SparseMatConstIterator(it)
+{}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp>& SparseMatConstIterator_<_Tp>::operator = (const SparseMatConstIterator_<_Tp>& it)
+{
+    return reinterpret_cast<SparseMatConstIterator_<_Tp>&>
+         (*reinterpret_cast<SparseMatConstIterator*>(this) =
+           reinterpret_cast<const SparseMatConstIterator&>(it));
+}
+
+template<typename _Tp> inline
+const _Tp& SparseMatConstIterator_<_Tp>::operator *() const
+{
+    return *(const _Tp*)this->ptr;
+}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp>& SparseMatConstIterator_<_Tp>::operator ++()
+{
+    SparseMatConstIterator::operator ++();
+    return *this;
+}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp> SparseMatConstIterator_<_Tp>::operator ++(int)
+{
+    SparseMatConstIterator_<_Tp> it = *this;
+    SparseMatConstIterator::operator ++();
+    return it;
+}
+
+
+
+///////////////////////// SparseMatIterator_ ////////////////////////
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp>::SparseMatIterator_()
+{}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp>::SparseMatIterator_(SparseMat_<_Tp>* _m)
+    : SparseMatConstIterator_<_Tp>(_m)
+{}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp>::SparseMatIterator_(SparseMat* _m)
+    : SparseMatConstIterator_<_Tp>(_m)
+{}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp>::SparseMatIterator_(const SparseMatIterator_<_Tp>& it)
+    : SparseMatConstIterator_<_Tp>(it)
+{}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp>& SparseMatIterator_<_Tp>::operator = (const SparseMatIterator_<_Tp>& it)
+{
+    return reinterpret_cast<SparseMatIterator_<_Tp>&>
+         (*reinterpret_cast<SparseMatConstIterator*>(this) =
+           reinterpret_cast<const SparseMatConstIterator&>(it));
+}
+
+template<typename _Tp> inline
+_Tp& SparseMatIterator_<_Tp>::operator *() const
+{
+    return *(_Tp*)this->ptr;
+}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp>& SparseMatIterator_<_Tp>::operator ++()
+{
+    SparseMatConstIterator::operator ++();
+    return *this;
+}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp> SparseMatIterator_<_Tp>::operator ++(int)
+{
+    SparseMatIterator_<_Tp> it = *this;
+    SparseMatConstIterator::operator ++();
+    return it;
+}
+
+
+
+//////////////////////// MatCommaInitializer_ ///////////////////////
+
+template<typename _Tp> inline
+MatCommaInitializer_<_Tp>::MatCommaInitializer_(Mat_<_Tp>* _m)
+    : it(_m)
+{}
+
+template<typename _Tp> template<typename T2> inline
+MatCommaInitializer_<_Tp>& MatCommaInitializer_<_Tp>::operator , (T2 v)
+{
+    CV_DbgAssert( this->it < ((const Mat_<_Tp>*)this->it.m)->end() );
+    *this->it = _Tp(v);
+    ++this->it;
+    return *this;
+}
+
+template<typename _Tp> inline
+MatCommaInitializer_<_Tp>::operator Mat_<_Tp>() const
+{
+    CV_DbgAssert( this->it == ((const Mat_<_Tp>*)this->it.m)->end() );
+    return Mat_<_Tp>(*this->it.m);
+}
+
+
+template<typename _Tp, typename T2> static inline
+MatCommaInitializer_<_Tp> operator << (const Mat_<_Tp>& m, T2 val)
+{
+    MatCommaInitializer_<_Tp> commaInitializer((Mat_<_Tp>*)&m);
+    return (commaInitializer, val);
+}
+
+
+
+///////////////////////// Matrix Expressions ////////////////////////
+
+inline
+Mat& Mat::operator = (const MatExpr& e)
+{
+    e.op->assign(e, *this);
+    return *this;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const MatExpr& e)
+{
+    e.op->assign(e, *this, DataType<_Tp>::type);
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>& Mat_<_Tp>::operator = (const MatExpr& e)
+{
+    e.op->assign(e, *this, DataType<_Tp>::type);
+    return *this;
+}
+
+template<typename _Tp> inline
+MatExpr Mat_<_Tp>::zeros(int rows, int cols)
+{
+    return Mat::zeros(rows, cols, DataType<_Tp>::type);
+}
+
+template<typename _Tp> inline
+MatExpr Mat_<_Tp>::zeros(Size sz)
+{
+    return Mat::zeros(sz, DataType<_Tp>::type);
+}
+
+template<typename _Tp> inline
+MatExpr Mat_<_Tp>::ones(int rows, int cols)
+{
+    return Mat::ones(rows, cols, DataType<_Tp>::type);
+}
+
+template<typename _Tp> inline
+MatExpr Mat_<_Tp>::ones(Size sz)
+{
+    return Mat::ones(sz, DataType<_Tp>::type);
+}
+
+template<typename _Tp> inline
+MatExpr Mat_<_Tp>::eye(int rows, int cols)
+{
+    return Mat::eye(rows, cols, DataType<_Tp>::type);
+}
+
+template<typename _Tp> inline
+MatExpr Mat_<_Tp>::eye(Size sz)
+{
+    return Mat::eye(sz, DataType<_Tp>::type);
+}
+
+inline
+MatExpr::MatExpr()
+    : op(0), flags(0), a(Mat()), b(Mat()), c(Mat()), alpha(0), beta(0), s()
+{}
+
+inline
+MatExpr::MatExpr(const MatOp* _op, int _flags, const Mat& _a, const Mat& _b,
+                 const Mat& _c, double _alpha, double _beta, const Scalar& _s)
+    : op(_op), flags(_flags), a(_a), b(_b), c(_c), alpha(_alpha), beta(_beta), s(_s)
+{}
+
+inline
+MatExpr::operator Mat() const
+{
+    Mat m;
+    op->assign(*this, m);
+    return m;
+}
+
+template<typename _Tp> inline
+MatExpr::operator Mat_<_Tp>() const
+{
+    Mat_<_Tp> m;
+    op->assign(*this, m, DataType<_Tp>::type);
+    return m;
+}
+
+
+template<typename _Tp> static inline
+MatExpr min(const Mat_<_Tp>& a, const Mat_<_Tp>& b)
+{
+    return cv::min((const Mat&)a, (const Mat&)b);
+}
+
+template<typename _Tp> static inline
+MatExpr min(const Mat_<_Tp>& a, double s)
+{
+    return cv::min((const Mat&)a, s);
+}
+
+template<typename _Tp> static inline
+MatExpr min(double s, const Mat_<_Tp>& a)
+{
+    return cv::min((const Mat&)a, s);
+}
+
+template<typename _Tp> static inline
+MatExpr max(const Mat_<_Tp>& a, const Mat_<_Tp>& b)
+{
+    return cv::max((const Mat&)a, (const Mat&)b);
+}
+
+template<typename _Tp> static inline
+MatExpr max(const Mat_<_Tp>& a, double s)
+{
+    return cv::max((const Mat&)a, s);
+}
+
+template<typename _Tp> static inline
+MatExpr max(double s, const Mat_<_Tp>& a)
+{
+    return cv::max((const Mat&)a, s);
+}
+
+template<typename _Tp> static inline
+MatExpr abs(const Mat_<_Tp>& m)
+{
+    return cv::abs((const Mat&)m);
+}
+
+
+static inline
+Mat& operator += (Mat& a, const MatExpr& b)
+{
+    b.op->augAssignAdd(b, a);
+    return a;
+}
+
+static inline
+const Mat& operator += (const Mat& a, const MatExpr& b)
+{
+    b.op->augAssignAdd(b, (Mat&)a);
+    return a;
+}
+
+template<typename _Tp> static inline
+Mat_<_Tp>& operator += (Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignAdd(b, a);
+    return a;
+}
+
+template<typename _Tp> static inline
+const Mat_<_Tp>& operator += (const Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignAdd(b, (Mat&)a);
+    return a;
+}
+
+static inline
+Mat& operator -= (Mat& a, const MatExpr& b)
+{
+    b.op->augAssignSubtract(b, a);
+    return a;
+}
+
+static inline
+const Mat& operator -= (const Mat& a, const MatExpr& b)
+{
+    b.op->augAssignSubtract(b, (Mat&)a);
+    return a;
+}
+
+template<typename _Tp> static inline
+Mat_<_Tp>& operator -= (Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignSubtract(b, a);
+    return a;
+}
+
+template<typename _Tp> static inline
+const Mat_<_Tp>& operator -= (const Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignSubtract(b, (Mat&)a);
+    return a;
+}
+
+static inline
+Mat& operator *= (Mat& a, const MatExpr& b)
+{
+    b.op->augAssignMultiply(b, a);
+    return a;
+}
+
+static inline
+const Mat& operator *= (const Mat& a, const MatExpr& b)
+{
+    b.op->augAssignMultiply(b, (Mat&)a);
+    return a;
+}
+
+template<typename _Tp> static inline
+Mat_<_Tp>& operator *= (Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignMultiply(b, a);
+    return a;
+}
+
+template<typename _Tp> static inline
+const Mat_<_Tp>& operator *= (const Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignMultiply(b, (Mat&)a);
+    return a;
+}
+
+static inline
+Mat& operator /= (Mat& a, const MatExpr& b)
+{
+    b.op->augAssignDivide(b, a);
+    return a;
+}
+
+static inline
+const Mat& operator /= (const Mat& a, const MatExpr& b)
+{
+    b.op->augAssignDivide(b, (Mat&)a);
+    return a;
+}
+
+template<typename _Tp> static inline
+Mat_<_Tp>& operator /= (Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignDivide(b, a);
+    return a;
+}
+
+template<typename _Tp> static inline
+const Mat_<_Tp>& operator /= (const Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignDivide(b, (Mat&)a);
+    return a;
+}
+
+
+//////////////////////////////// UMat ////////////////////////////////
+
+inline
+UMat::UMat(UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{}
+
+inline
+UMat::UMat(int _rows, int _cols, int _type, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create(_rows, _cols, _type);
+}
+
+inline
+UMat::UMat(int _rows, int _cols, int _type, const Scalar& _s, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create(_rows, _cols, _type);
+    *this = _s;
+}
+
+inline
+UMat::UMat(Size _sz, int _type, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create( _sz.height, _sz.width, _type );
+}
+
+inline
+UMat::UMat(Size _sz, int _type, const Scalar& _s, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create(_sz.height, _sz.width, _type);
+    *this = _s;
+}
+
+inline
+UMat::UMat(int _dims, const int* _sz, int _type, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create(_dims, _sz, _type);
+}
+
+inline
+UMat::UMat(int _dims, const int* _sz, int _type, const Scalar& _s, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create(_dims, _sz, _type);
+    *this = _s;
+}
+
+inline
+UMat::UMat(const UMat& m)
+: flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), allocator(m.allocator),
+  usageFlags(m.usageFlags), u(m.u), offset(m.offset), size(&rows)
+{
+    addref();
+    if( m.dims <= 2 )
+    {
+        step[0] = m.step[0]; step[1] = m.step[1];
+    }
+    else
+    {
+        dims = 0;
+        copySize(m);
+    }
+}
+
+
+template<typename _Tp> inline
+UMat::UMat(const std::vector<_Tp>& vec, bool copyData)
+: flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
+cols(1), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
+{
+    if(vec.empty())
+        return;
+    if( !copyData )
+    {
+        // !!!TODO!!!
+        CV_Error(Error::StsNotImplemented, "");
+    }
+    else
+        Mat((int)vec.size(), 1, DataType<_Tp>::type, (uchar*)&vec[0]).copyTo(*this);
+}
+
+
+inline
+UMat& UMat::operator = (const UMat& m)
+{
+    if( this != &m )
+    {
+        const_cast<UMat&>(m).addref();
+        release();
+        flags = m.flags;
+        if( dims <= 2 && m.dims <= 2 )
+        {
+            dims = m.dims;
+            rows = m.rows;
+            cols = m.cols;
+            step[0] = m.step[0];
+            step[1] = m.step[1];
+        }
+        else
+            copySize(m);
+        allocator = m.allocator;
+        if (usageFlags == USAGE_DEFAULT)
+            usageFlags = m.usageFlags;
+        u = m.u;
+        offset = m.offset;
+    }
+    return *this;
+}
+
+inline
+UMat UMat::row(int y) const
+{
+    return UMat(*this, Range(y, y + 1), Range::all());
+}
+
+inline
+UMat UMat::col(int x) const
+{
+    return UMat(*this, Range::all(), Range(x, x + 1));
+}
+
+inline
+UMat UMat::rowRange(int startrow, int endrow) const
+{
+    return UMat(*this, Range(startrow, endrow), Range::all());
+}
+
+inline
+UMat UMat::rowRange(const Range& r) const
+{
+    return UMat(*this, r, Range::all());
+}
+
+inline
+UMat UMat::colRange(int startcol, int endcol) const
+{
+    return UMat(*this, Range::all(), Range(startcol, endcol));
+}
+
+inline
+UMat UMat::colRange(const Range& r) const
+{
+    return UMat(*this, Range::all(), r);
+}
+
+inline
+UMat UMat::clone() const
+{
+    UMat m;
+    copyTo(m);
+    return m;
+}
+
+inline
+void UMat::assignTo( UMat& m, int _type ) const
+{
+    if( _type < 0 )
+        m = *this;
+    else
+        convertTo(m, _type);
+}
+
+inline
+void UMat::create(int _rows, int _cols, int _type, UMatUsageFlags _usageFlags)
+{
+    _type &= TYPE_MASK;
+    if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && u )
+        return;
+    int sz[] = {_rows, _cols};
+    create(2, sz, _type, _usageFlags);
+}
+
+inline
+void UMat::create(Size _sz, int _type, UMatUsageFlags _usageFlags)
+{
+    create(_sz.height, _sz.width, _type, _usageFlags);
+}
+
+inline
+void UMat::addref()
+{
+    if( u )
+        CV_XADD(&(u->urefcount), 1);
+}
+
+inline void UMat::release()
+{
+    if( u && CV_XADD(&(u->urefcount), -1) == 1 )
+        deallocate();
+    for(int i = 0; i < dims; i++)
+        size.p[i] = 0;
+    u = 0;
+}
+
+inline
+UMat UMat::operator()( Range _rowRange, Range _colRange ) const
+{
+    return UMat(*this, _rowRange, _colRange);
+}
+
+inline
+UMat UMat::operator()( const Rect& roi ) const
+{
+    return UMat(*this, roi);
+}
+
+inline
+UMat UMat::operator()(const Range* ranges) const
+{
+    return UMat(*this, ranges);
+}
+
+inline
+UMat UMat::operator()(const std::vector<Range>& ranges) const
+{
+    return UMat(*this, ranges);
+}
+
+inline
+bool UMat::isContinuous() const
+{
+    return (flags & CONTINUOUS_FLAG) != 0;
+}
+
+inline
+bool UMat::isSubmatrix() const
+{
+    return (flags & SUBMATRIX_FLAG) != 0;
+}
+
+inline
+size_t UMat::elemSize() const
+{
+    return dims > 0 ? step.p[dims - 1] : 0;
+}
+
+inline
+size_t UMat::elemSize1() const
+{
+    return CV_ELEM_SIZE1(flags);
+}
+
+inline
+int UMat::type() const
+{
+    return CV_MAT_TYPE(flags);
+}
+
+inline
+int UMat::depth() const
+{
+    return CV_MAT_DEPTH(flags);
+}
+
+inline
+int UMat::channels() const
+{
+    return CV_MAT_CN(flags);
+}
+
+inline
+size_t UMat::step1(int i) const
+{
+    return step.p[i] / elemSize1();
+}
+
+inline
+bool UMat::empty() const
+{
+    return u == 0 || total() == 0;
+}
+
+inline
+size_t UMat::total() const
+{
+    if( dims <= 2 )
+        return (size_t)rows * cols;
+    size_t p = 1;
+    for( int i = 0; i < dims; i++ )
+        p *= size[i];
+    return p;
+}
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+
+inline
+UMat::UMat(UMat&& m)
+: flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), allocator(m.allocator),
+  usageFlags(m.usageFlags), u(m.u), offset(m.offset), size(&rows)
+{
+    if (m.dims <= 2)  // move new step/size info
+    {
+        step[0] = m.step[0];
+        step[1] = m.step[1];
+    }
+    else
+    {
+        CV_DbgAssert(m.step.p != m.step.buf);
+        step.p = m.step.p;
+        size.p = m.size.p;
+        m.step.p = m.step.buf;
+        m.size.p = &m.rows;
+    }
+    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
+    m.allocator = NULL;
+    m.u = NULL;
+    m.offset = 0;
+}
+
+inline
+UMat& UMat::operator = (UMat&& m)
+{
+    if (this == &m)
+      return *this;
+    release();
+    flags = m.flags; dims = m.dims; rows = m.rows; cols = m.cols;
+    allocator = m.allocator; usageFlags = m.usageFlags;
+    u = m.u;
+    offset = m.offset;
+    if (step.p != step.buf) // release self step/size
+    {
+        fastFree(step.p);
+        step.p = step.buf;
+        size.p = &rows;
+    }
+    if (m.dims <= 2) // move new step/size info
+    {
+        step[0] = m.step[0];
+        step[1] = m.step[1];
+    }
+    else
+    {
+        CV_DbgAssert(m.step.p != m.step.buf);
+        step.p = m.step.p;
+        size.p = m.size.p;
+        m.step.p = m.step.buf;
+        m.size.p = &m.rows;
+    }
+    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
+    m.allocator = NULL;
+    m.u = NULL;
+    m.offset = 0;
+    return *this;
+}
+
+#endif
+
+
+inline bool UMatData::hostCopyObsolete() const { return (flags & HOST_COPY_OBSOLETE) != 0; }
+inline bool UMatData::deviceCopyObsolete() const { return (flags & DEVICE_COPY_OBSOLETE) != 0; }
+inline bool UMatData::deviceMemMapped() const { return (flags & DEVICE_MEM_MAPPED) != 0; }
+inline bool UMatData::copyOnMap() const { return (flags & COPY_ON_MAP) != 0; }
+inline bool UMatData::tempUMat() const { return (flags & TEMP_UMAT) != 0; }
+inline bool UMatData::tempCopiedUMat() const { return (flags & TEMP_COPIED_UMAT) == TEMP_COPIED_UMAT; }
+
+inline void UMatData::markDeviceMemMapped(bool flag)
+{
+  if(flag)
+    flags |= DEVICE_MEM_MAPPED;
+  else
+    flags &= ~DEVICE_MEM_MAPPED;
+}
+
+inline void UMatData::markHostCopyObsolete(bool flag)
+{
+    if(flag)
+        flags |= HOST_COPY_OBSOLETE;
+    else
+        flags &= ~HOST_COPY_OBSOLETE;
+}
+inline void UMatData::markDeviceCopyObsolete(bool flag)
+{
+    if(flag)
+        flags |= DEVICE_COPY_OBSOLETE;
+    else
+        flags &= ~DEVICE_COPY_OBSOLETE;
+}
+
+inline UMatDataAutoLock::UMatDataAutoLock(UMatData* _u) : u(_u) { u->lock(); }
+inline UMatDataAutoLock::~UMatDataAutoLock() { u->unlock(); }
+
+//! @endcond
+
+} //cv
+
+#endif
diff --git a/thirdparty/linux/include/opencv2/core/matx.hpp b/thirdparty/linux/include/opencv2/core/matx.hpp
new file mode 100644
index 0000000..0d07c3f
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/matx.hpp
@@ -0,0 +1,1407 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_MATX_HPP
+#define OPENCV_CORE_MATX_HPP
+
+#ifndef __cplusplus
+#  error matx.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/base.hpp"
+#include "opencv2/core/traits.hpp"
+#include "opencv2/core/saturate.hpp"
+
+namespace cv
+{
+
+//! @addtogroup core_basic
+//! @{
+
+////////////////////////////// Small Matrix ///////////////////////////
+
+//! @cond IGNORED
+struct CV_EXPORTS Matx_AddOp {};
+struct CV_EXPORTS Matx_SubOp {};
+struct CV_EXPORTS Matx_ScaleOp {};
+struct CV_EXPORTS Matx_MulOp {};
+struct CV_EXPORTS Matx_DivOp {};
+struct CV_EXPORTS Matx_MatMulOp {};
+struct CV_EXPORTS Matx_TOp {};
+//! @endcond
+
+/** @brief Template class for small matrices whose type and size are known at compilation time
+
+If you need a more flexible type, use Mat . The elements of the matrix M are accessible using the
+M(i,j) notation. Most of the common matrix operations (see also @ref MatrixExpressions ) are
+available. To do an operation on Matx that is not implemented, you can easily convert the matrix to
+Mat and backwards:
+@code
+    Matx33f m(1, 2, 3,
+              4, 5, 6,
+              7, 8, 9);
+    cout << sum(Mat(m*m.t())) << endl;
+ @endcode
+ */
+template<typename _Tp, int m, int n> class Matx
+{
+public:
+    enum { depth    = DataType<_Tp>::depth,
+           rows     = m,
+           cols     = n,
+           channels = rows*cols,
+           type     = CV_MAKETYPE(depth, channels),
+           shortdim = (m < n ? m : n)
+         };
+
+    typedef _Tp                           value_type;
+    typedef Matx<_Tp, m, n>               mat_type;
+    typedef Matx<_Tp, shortdim, 1> diag_type;
+
+    //! default constructor
+    Matx();
+
+    Matx(_Tp v0); //!< 1x1 matrix
+    Matx(_Tp v0, _Tp v1); //!< 1x2 or 2x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2); //!< 1x3 or 3x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3); //!< 1x4, 2x2 or 4x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4); //!< 1x5 or 5x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5); //!< 1x6, 2x3, 3x2 or 6x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6); //!< 1x7 or 7x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7); //!< 1x8, 2x4, 4x2 or 8x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8); //!< 1x9, 3x3 or 9x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9); //!< 1x10, 2x5 or 5x2 or 10x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3,
+         _Tp v4, _Tp v5, _Tp v6, _Tp v7,
+         _Tp v8, _Tp v9, _Tp v10, _Tp v11); //!< 1x12, 2x6, 3x4, 4x3, 6x2 or 12x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3,
+         _Tp v4, _Tp v5, _Tp v6, _Tp v7,
+         _Tp v8, _Tp v9, _Tp v10, _Tp v11,
+         _Tp v12, _Tp v13); //!< 1x14, 2x7, 7x2 or 14x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3,
+         _Tp v4, _Tp v5, _Tp v6, _Tp v7,
+         _Tp v8, _Tp v9, _Tp v10, _Tp v11,
+         _Tp v12, _Tp v13, _Tp v14, _Tp v15); //!< 1x16, 4x4 or 16x1 matrix
+    explicit Matx(const _Tp* vals); //!< initialize from a plain array
+
+    static Matx all(_Tp alpha);
+    static Matx zeros();
+    static Matx ones();
+    static Matx eye();
+    static Matx diag(const diag_type& d);
+    static Matx randu(_Tp a, _Tp b);
+    static Matx randn(_Tp a, _Tp b);
+
+    //! dot product computed with the default precision
+    _Tp dot(const Matx<_Tp, m, n>& v) const;
+
+    //! dot product computed in double-precision arithmetics
+    double ddot(const Matx<_Tp, m, n>& v) const;
+
+    //! conversion to another data type
+    template<typename T2> operator Matx<T2, m, n>() const;
+
+    //! change the matrix shape
+    template<int m1, int n1> Matx<_Tp, m1, n1> reshape() const;
+
+    //! extract part of the matrix
+    template<int m1, int n1> Matx<_Tp, m1, n1> get_minor(int i, int j) const;
+
+    //! extract the matrix row
+    Matx<_Tp, 1, n> row(int i) const;
+
+    //! extract the matrix column
+    Matx<_Tp, m, 1> col(int i) const;
+
+    //! extract the matrix diagonal
+    diag_type diag() const;
+
+    //! transpose the matrix
+    Matx<_Tp, n, m> t() const;
+
+    //! invert the matrix
+    Matx<_Tp, n, m> inv(int method=DECOMP_LU, bool *p_is_ok = NULL) const;
+
+    //! solve linear system
+    template<int l> Matx<_Tp, n, l> solve(const Matx<_Tp, m, l>& rhs, int flags=DECOMP_LU) const;
+    Vec<_Tp, n> solve(const Vec<_Tp, m>& rhs, int method) const;
+
+    //! multiply two matrices element-wise
+    Matx<_Tp, m, n> mul(const Matx<_Tp, m, n>& a) const;
+
+    //! divide two matrices element-wise
+    Matx<_Tp, m, n> div(const Matx<_Tp, m, n>& a) const;
+
+    //! element access
+    const _Tp& operator ()(int i, int j) const;
+    _Tp& operator ()(int i, int j);
+
+    //! 1D element access
+    const _Tp& operator ()(int i) const;
+    _Tp& operator ()(int i);
+
+    Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_AddOp);
+    Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_SubOp);
+    template<typename _T2> Matx(const Matx<_Tp, m, n>& a, _T2 alpha, Matx_ScaleOp);
+    Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_MulOp);
+    Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_DivOp);
+    template<int l> Matx(const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b, Matx_MatMulOp);
+    Matx(const Matx<_Tp, n, m>& a, Matx_TOp);
+
+    _Tp val[m*n]; //< matrix elements
+};
+
+typedef Matx<float, 1, 2> Matx12f;
+typedef Matx<double, 1, 2> Matx12d;
+typedef Matx<float, 1, 3> Matx13f;
+typedef Matx<double, 1, 3> Matx13d;
+typedef Matx<float, 1, 4> Matx14f;
+typedef Matx<double, 1, 4> Matx14d;
+typedef Matx<float, 1, 6> Matx16f;
+typedef Matx<double, 1, 6> Matx16d;
+
+typedef Matx<float, 2, 1> Matx21f;
+typedef Matx<double, 2, 1> Matx21d;
+typedef Matx<float, 3, 1> Matx31f;
+typedef Matx<double, 3, 1> Matx31d;
+typedef Matx<float, 4, 1> Matx41f;
+typedef Matx<double, 4, 1> Matx41d;
+typedef Matx<float, 6, 1> Matx61f;
+typedef Matx<double, 6, 1> Matx61d;
+
+typedef Matx<float, 2, 2> Matx22f;
+typedef Matx<double, 2, 2> Matx22d;
+typedef Matx<float, 2, 3> Matx23f;
+typedef Matx<double, 2, 3> Matx23d;
+typedef Matx<float, 3, 2> Matx32f;
+typedef Matx<double, 3, 2> Matx32d;
+
+typedef Matx<float, 3, 3> Matx33f;
+typedef Matx<double, 3, 3> Matx33d;
+
+typedef Matx<float, 3, 4> Matx34f;
+typedef Matx<double, 3, 4> Matx34d;
+typedef Matx<float, 4, 3> Matx43f;
+typedef Matx<double, 4, 3> Matx43d;
+
+typedef Matx<float, 4, 4> Matx44f;
+typedef Matx<double, 4, 4> Matx44d;
+typedef Matx<float, 6, 6> Matx66f;
+typedef Matx<double, 6, 6> Matx66d;
+
+/*!
+  traits
+*/
+template<typename _Tp, int m, int n> class DataType< Matx<_Tp, m, n> >
+{
+public:
+    typedef Matx<_Tp, m, n>                               value_type;
+    typedef Matx<typename DataType<_Tp>::work_type, m, n> work_type;
+    typedef _Tp                                           channel_type;
+    typedef value_type                                    vec_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = m * n,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+/** @brief  Comma-separated Matrix Initializer
+*/
+template<typename _Tp, int m, int n> class MatxCommaInitializer
+{
+public:
+    MatxCommaInitializer(Matx<_Tp, m, n>* _mtx);
+    template<typename T2> MatxCommaInitializer<_Tp, m, n>& operator , (T2 val);
+    Matx<_Tp, m, n> operator *() const;
+
+    Matx<_Tp, m, n>* dst;
+    int idx;
+};
+
+/*
+ Utility methods
+*/
+template<typename _Tp, int m> static double determinant(const Matx<_Tp, m, m>& a);
+template<typename _Tp, int m, int n> static double trace(const Matx<_Tp, m, n>& a);
+template<typename _Tp, int m, int n> static double norm(const Matx<_Tp, m, n>& M);
+template<typename _Tp, int m, int n> static double norm(const Matx<_Tp, m, n>& M, int normType);
+
+
+
+/////////////////////// Vec (used as element of multi-channel images /////////////////////
+
+/** @brief Template class for short numerical vectors, a partial case of Matx
+
+This template class represents short numerical vectors (of 1, 2, 3, 4 ... elements) on which you
+can perform basic arithmetical operations, access individual elements using [] operator etc. The
+vectors are allocated on stack, as opposite to std::valarray, std::vector, cv::Mat etc., which
+elements are dynamically allocated in the heap.
+
+The template takes 2 parameters:
+@tparam _Tp element type
+@tparam cn the number of elements
+
+In addition to the universal notation like Vec<float, 3>, you can use shorter aliases
+for the most popular specialized variants of Vec, e.g. Vec3f ~ Vec<float, 3>.
+
+It is possible to convert Vec\<T,2\> to/from Point_, Vec\<T,3\> to/from Point3_ , and Vec\<T,4\>
+to CvScalar or Scalar_. Use operator[] to access the elements of Vec.
+
+All the expected vector operations are also implemented:
+-   v1 = v2 + v3
+-   v1 = v2 - v3
+-   v1 = v2 \* scale
+-   v1 = scale \* v2
+-   v1 = -v2
+-   v1 += v2 and other augmenting operations
+-   v1 == v2, v1 != v2
+-   norm(v1) (euclidean norm)
+The Vec class is commonly used to describe pixel types of multi-channel arrays. See Mat for details.
+*/
+template<typename _Tp, int cn> class Vec : public Matx<_Tp, cn, 1>
+{
+public:
+    typedef _Tp value_type;
+    enum { depth    = Matx<_Tp, cn, 1>::depth,
+           channels = cn,
+           type     = CV_MAKETYPE(depth, channels)
+         };
+
+    //! default constructor
+    Vec();
+
+    Vec(_Tp v0); //!< 1-element vector constructor
+    Vec(_Tp v0, _Tp v1); //!< 2-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2); //!< 3-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3); //!< 4-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4); //!< 5-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5); //!< 6-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6); //!< 7-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7); //!< 8-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8); //!< 9-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9); //!< 10-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13); //!< 14-element vector constructor
+    explicit Vec(const _Tp* values);
+
+    Vec(const Vec<_Tp, cn>& v);
+
+    static Vec all(_Tp alpha);
+
+    //! per-element multiplication
+    Vec mul(const Vec<_Tp, cn>& v) const;
+
+    //! conjugation (makes sense for complex numbers and quaternions)
+    Vec conj() const;
+
+    /*!
+      cross product of the two 3D vectors.
+
+      For other dimensionalities the exception is raised
+    */
+    Vec cross(const Vec& v) const;
+    //! conversion to another data type
+    template<typename T2> operator Vec<T2, cn>() const;
+
+    /*! element access */
+    const _Tp& operator [](int i) const;
+    _Tp& operator[](int i);
+    const _Tp& operator ()(int i) const;
+    _Tp& operator ()(int i);
+
+    Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp);
+    Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp);
+    template<typename _T2> Vec(const Matx<_Tp, cn, 1>& a, _T2 alpha, Matx_ScaleOp);
+};
+
+/** @name Shorter aliases for the most popular specializations of Vec<T,n>
+  @{
+*/
+typedef Vec<uchar, 2> Vec2b;
+typedef Vec<uchar, 3> Vec3b;
+typedef Vec<uchar, 4> Vec4b;
+
+typedef Vec<short, 2> Vec2s;
+typedef Vec<short, 3> Vec3s;
+typedef Vec<short, 4> Vec4s;
+
+typedef Vec<ushort, 2> Vec2w;
+typedef Vec<ushort, 3> Vec3w;
+typedef Vec<ushort, 4> Vec4w;
+
+typedef Vec<int, 2> Vec2i;
+typedef Vec<int, 3> Vec3i;
+typedef Vec<int, 4> Vec4i;
+typedef Vec<int, 6> Vec6i;
+typedef Vec<int, 8> Vec8i;
+
+typedef Vec<float, 2> Vec2f;
+typedef Vec<float, 3> Vec3f;
+typedef Vec<float, 4> Vec4f;
+typedef Vec<float, 6> Vec6f;
+
+typedef Vec<double, 2> Vec2d;
+typedef Vec<double, 3> Vec3d;
+typedef Vec<double, 4> Vec4d;
+typedef Vec<double, 6> Vec6d;
+/** @} */
+
+/*!
+  traits
+*/
+template<typename _Tp, int cn> class DataType< Vec<_Tp, cn> >
+{
+public:
+    typedef Vec<_Tp, cn>                               value_type;
+    typedef Vec<typename DataType<_Tp>::work_type, cn> work_type;
+    typedef _Tp                                        channel_type;
+    typedef value_type                                 vec_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = cn,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+/** @brief  Comma-separated Vec Initializer
+*/
+template<typename _Tp, int m> class VecCommaInitializer : public MatxCommaInitializer<_Tp, m, 1>
+{
+public:
+    VecCommaInitializer(Vec<_Tp, m>* _vec);
+    template<typename T2> VecCommaInitializer<_Tp, m>& operator , (T2 val);
+    Vec<_Tp, m> operator *() const;
+};
+
+template<typename _Tp, int cn> static Vec<_Tp, cn> normalize(const Vec<_Tp, cn>& v);
+
+//! @} core_basic
+
+//! @cond IGNORED
+
+///////////////////////////////////// helper classes /////////////////////////////////////
+namespace internal
+{
+
+template<typename _Tp, int m> struct Matx_DetOp
+{
+    double operator ()(const Matx<_Tp, m, m>& a) const
+    {
+        Matx<_Tp, m, m> temp = a;
+        double p = LU(temp.val, m*sizeof(_Tp), m, 0, 0, 0);
+        if( p == 0 )
+            return p;
+        for( int i = 0; i < m; i++ )
+            p *= temp(i, i);
+        return p;
+    }
+};
+
+template<typename _Tp> struct Matx_DetOp<_Tp, 1>
+{
+    double operator ()(const Matx<_Tp, 1, 1>& a) const
+    {
+        return a(0,0);
+    }
+};
+
+template<typename _Tp> struct Matx_DetOp<_Tp, 2>
+{
+    double operator ()(const Matx<_Tp, 2, 2>& a) const
+    {
+        return a(0,0)*a(1,1) - a(0,1)*a(1,0);
+    }
+};
+
+template<typename _Tp> struct Matx_DetOp<_Tp, 3>
+{
+    double operator ()(const Matx<_Tp, 3, 3>& a) const
+    {
+        return a(0,0)*(a(1,1)*a(2,2) - a(2,1)*a(1,2)) -
+            a(0,1)*(a(1,0)*a(2,2) - a(2,0)*a(1,2)) +
+            a(0,2)*(a(1,0)*a(2,1) - a(2,0)*a(1,1));
+    }
+};
+
+template<typename _Tp> Vec<_Tp, 2> inline conjugate(const Vec<_Tp, 2>& v)
+{
+    return Vec<_Tp, 2>(v[0], -v[1]);
+}
+
+template<typename _Tp> Vec<_Tp, 4> inline conjugate(const Vec<_Tp, 4>& v)
+{
+    return Vec<_Tp, 4>(v[0], -v[1], -v[2], -v[3]);
+}
+
+} // internal
+
+
+
+////////////////////////////////// Matx Implementation ///////////////////////////////////
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx()
+{
+    for(int i = 0; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0)
+{
+    val[0] = v0;
+    for(int i = 1; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1)
+{
+    CV_StaticAssert(channels >= 2, "Matx should have at least 2 elements.");
+    val[0] = v0; val[1] = v1;
+    for(int i = 2; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2)
+{
+    CV_StaticAssert(channels >= 3, "Matx should have at least 3 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2;
+    for(int i = 3; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3)
+{
+    CV_StaticAssert(channels >= 4, "Matx should have at least 4 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    for(int i = 4; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4)
+{
+    CV_StaticAssert(channels >= 5, "Matx should have at least 5 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; val[4] = v4;
+    for(int i = 5; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5)
+{
+    CV_StaticAssert(channels >= 6, "Matx should have at least 6 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5;
+    for(int i = 6; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6)
+{
+    CV_StaticAssert(channels >= 7, "Matx should have at least 7 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5; val[6] = v6;
+    for(int i = 7; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7)
+{
+    CV_StaticAssert(channels >= 8, "Matx should have at least 8 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+    for(int i = 8; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8)
+{
+    CV_StaticAssert(channels >= 9, "Matx should have at least 9 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+    val[8] = v8;
+    for(int i = 9; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9)
+{
+    CV_StaticAssert(channels >= 10, "Matx should have at least 10 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+    val[8] = v8; val[9] = v9;
+    for(int i = 10; i < channels; i++) val[i] = _Tp(0);
+}
+
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11)
+{
+    CV_StaticAssert(channels >= 12, "Matx should have at least 12 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+    val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11;
+    for(int i = 12; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13)
+{
+    CV_StaticAssert(channels == 14, "Matx should have at least 14 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+    val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11;
+    val[12] = v12; val[13] = v13;
+}
+
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13, _Tp v14, _Tp v15)
+{
+    CV_StaticAssert(channels >= 16, "Matx should have at least 16 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+    val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11;
+    val[12] = v12; val[13] = v13; val[14] = v14; val[15] = v15;
+    for(int i = 16; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(const _Tp* values)
+{
+    for( int i = 0; i < channels; i++ ) val[i] = values[i];
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n> Matx<_Tp, m, n>::all(_Tp alpha)
+{
+    Matx<_Tp, m, n> M;
+    for( int i = 0; i < m*n; i++ ) M.val[i] = alpha;
+    return M;
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n> Matx<_Tp,m,n>::zeros()
+{
+    return all(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n> Matx<_Tp,m,n>::ones()
+{
+    return all(1);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n> Matx<_Tp,m,n>::eye()
+{
+    Matx<_Tp,m,n> M;
+    for(int i = 0; i < shortdim; i++)
+        M(i,i) = 1;
+    return M;
+}
+
+template<typename _Tp, int m, int n> inline
+_Tp Matx<_Tp, m, n>::dot(const Matx<_Tp, m, n>& M) const
+{
+    _Tp s = 0;
+    for( int i = 0; i < channels; i++ ) s += val[i]*M.val[i];
+    return s;
+}
+
+template<typename _Tp, int m, int n> inline
+double Matx<_Tp, m, n>::ddot(const Matx<_Tp, m, n>& M) const
+{
+    double s = 0;
+    for( int i = 0; i < channels; i++ ) s += (double)val[i]*M.val[i];
+    return s;
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n> Matx<_Tp,m,n>::diag(const typename Matx<_Tp,m,n>::diag_type& d)
+{
+    Matx<_Tp,m,n> M;
+    for(int i = 0; i < shortdim; i++)
+        M(i,i) = d(i, 0);
+    return M;
+}
+
+template<typename _Tp, int m, int n> template<typename T2>
+inline Matx<_Tp, m, n>::operator Matx<T2, m, n>() const
+{
+    Matx<T2, m, n> M;
+    for( int i = 0; i < m*n; i++ ) M.val[i] = saturate_cast<T2>(val[i]);
+    return M;
+}
+
+template<typename _Tp, int m, int n> template<int m1, int n1> inline
+Matx<_Tp, m1, n1> Matx<_Tp, m, n>::reshape() const
+{
+    CV_StaticAssert(m1*n1 == m*n, "Input and destnarion matrices must have the same number of elements");
+    return (const Matx<_Tp, m1, n1>&)*this;
+}
+
+template<typename _Tp, int m, int n>
+template<int m1, int n1> inline
+Matx<_Tp, m1, n1> Matx<_Tp, m, n>::get_minor(int i, int j) const
+{
+    CV_DbgAssert(0 <= i && i+m1 <= m && 0 <= j && j+n1 <= n);
+    Matx<_Tp, m1, n1> s;
+    for( int di = 0; di < m1; di++ )
+        for( int dj = 0; dj < n1; dj++ )
+            s(di, dj) = (*this)(i+di, j+dj);
+    return s;
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, 1, n> Matx<_Tp, m, n>::row(int i) const
+{
+    CV_DbgAssert((unsigned)i < (unsigned)m);
+    return Matx<_Tp, 1, n>(&val[i*n]);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, 1> Matx<_Tp, m, n>::col(int j) const
+{
+    CV_DbgAssert((unsigned)j < (unsigned)n);
+    Matx<_Tp, m, 1> v;
+    for( int i = 0; i < m; i++ )
+        v.val[i] = val[i*n + j];
+    return v;
+}
+
+template<typename _Tp, int m, int n> inline
+typename Matx<_Tp, m, n>::diag_type Matx<_Tp, m, n>::diag() const
+{
+    diag_type d;
+    for( int i = 0; i < shortdim; i++ )
+        d.val[i] = val[i*n + i];
+    return d;
+}
+
+template<typename _Tp, int m, int n> inline
+const _Tp& Matx<_Tp, m, n>::operator()(int i, int j) const
+{
+    CV_DbgAssert( (unsigned)i < (unsigned)m && (unsigned)j < (unsigned)n );
+    return this->val[i*n + j];
+}
+
+template<typename _Tp, int m, int n> inline
+_Tp& Matx<_Tp, m, n>::operator ()(int i, int j)
+{
+    CV_DbgAssert( (unsigned)i < (unsigned)m && (unsigned)j < (unsigned)n );
+    return val[i*n + j];
+}
+
+template<typename _Tp, int m, int n> inline
+const _Tp& Matx<_Tp, m, n>::operator ()(int i) const
+{
+    CV_StaticAssert(m == 1 || n == 1, "Single index indexation requires matrix to be a column or a row");
+    CV_DbgAssert( (unsigned)i < (unsigned)(m+n-1) );
+    return val[i];
+}
+
+template<typename _Tp, int m, int n> inline
+_Tp& Matx<_Tp, m, n>::operator ()(int i)
+{
+    CV_StaticAssert(m == 1 || n == 1, "Single index indexation requires matrix to be a column or a row");
+    CV_DbgAssert( (unsigned)i < (unsigned)(m+n-1) );
+    return val[i];
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_AddOp)
+{
+    for( int i = 0; i < channels; i++ )
+        val[i] = saturate_cast<_Tp>(a.val[i] + b.val[i]);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_SubOp)
+{
+    for( int i = 0; i < channels; i++ )
+        val[i] = saturate_cast<_Tp>(a.val[i] - b.val[i]);
+}
+
+template<typename _Tp, int m, int n> template<typename _T2> inline
+Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, _T2 alpha, Matx_ScaleOp)
+{
+    for( int i = 0; i < channels; i++ )
+        val[i] = saturate_cast<_Tp>(a.val[i] * alpha);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_MulOp)
+{
+    for( int i = 0; i < channels; i++ )
+        val[i] = saturate_cast<_Tp>(a.val[i] * b.val[i]);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_DivOp)
+{
+    for( int i = 0; i < channels; i++ )
+        val[i] = saturate_cast<_Tp>(a.val[i] / b.val[i]);
+}
+
+template<typename _Tp, int m, int n> template<int l> inline
+Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b, Matx_MatMulOp)
+{
+    for( int i = 0; i < m; i++ )
+        for( int j = 0; j < n; j++ )
+        {
+            _Tp s = 0;
+            for( int k = 0; k < l; k++ )
+                s += a(i, k) * b(k, j);
+            val[i*n + j] = s;
+        }
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(const Matx<_Tp, n, m>& a, Matx_TOp)
+{
+    for( int i = 0; i < m; i++ )
+        for( int j = 0; j < n; j++ )
+            val[i*n + j] = a(j, i);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n> Matx<_Tp, m, n>::mul(const Matx<_Tp, m, n>& a) const
+{
+    return Matx<_Tp, m, n>(*this, a, Matx_MulOp());
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n> Matx<_Tp, m, n>::div(const Matx<_Tp, m, n>& a) const
+{
+    return Matx<_Tp, m, n>(*this, a, Matx_DivOp());
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, n, m> Matx<_Tp, m, n>::t() const
+{
+    return Matx<_Tp, n, m>(*this, Matx_TOp());
+}
+
+template<typename _Tp, int m, int n> inline
+Vec<_Tp, n> Matx<_Tp, m, n>::solve(const Vec<_Tp, m>& rhs, int method) const
+{
+    Matx<_Tp, n, 1> x = solve((const Matx<_Tp, m, 1>&)(rhs), method);
+    return (Vec<_Tp, n>&)(x);
+}
+
+template<typename _Tp, int m> static inline
+double determinant(const Matx<_Tp, m, m>& a)
+{
+    return cv::internal::Matx_DetOp<_Tp, m>()(a);
+}
+
+template<typename _Tp, int m, int n> static inline
+double trace(const Matx<_Tp, m, n>& a)
+{
+    _Tp s = 0;
+    for( int i = 0; i < std::min(m, n); i++ )
+        s += a(i,i);
+    return s;
+}
+
+template<typename _Tp, int m, int n> static inline
+double norm(const Matx<_Tp, m, n>& M)
+{
+    return std::sqrt(normL2Sqr<_Tp, double>(M.val, m*n));
+}
+
+template<typename _Tp, int m, int n> static inline
+double norm(const Matx<_Tp, m, n>& M, int normType)
+{
+    switch(normType) {
+    case NORM_INF:
+        return (double)normInf<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n);
+    case NORM_L1:
+        return (double)normL1<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n);
+    case NORM_L2SQR:
+        return (double)normL2Sqr<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n);
+    default:
+    case NORM_L2:
+        return std::sqrt((double)normL2Sqr<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n));
+    }
+}
+
+
+
+//////////////////////////////// matx comma initializer //////////////////////////////////
+
+template<typename _Tp, typename _T2, int m, int n> static inline
+MatxCommaInitializer<_Tp, m, n> operator << (const Matx<_Tp, m, n>& mtx, _T2 val)
+{
+    MatxCommaInitializer<_Tp, m, n> commaInitializer((Matx<_Tp, m, n>*)&mtx);
+    return (commaInitializer, val);
+}
+
+template<typename _Tp, int m, int n> inline
+MatxCommaInitializer<_Tp, m, n>::MatxCommaInitializer(Matx<_Tp, m, n>* _mtx)
+    : dst(_mtx), idx(0)
+{}
+
+template<typename _Tp, int m, int n> template<typename _T2> inline
+MatxCommaInitializer<_Tp, m, n>& MatxCommaInitializer<_Tp, m, n>::operator , (_T2 value)
+{
+    CV_DbgAssert( idx < m*n );
+    dst->val[idx++] = saturate_cast<_Tp>(value);
+    return *this;
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n> MatxCommaInitializer<_Tp, m, n>::operator *() const
+{
+    CV_DbgAssert( idx == n*m );
+    return *dst;
+}
+
+
+
+/////////////////////////////////// Vec Implementation ///////////////////////////////////
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec() {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0)
+    : Matx<_Tp, cn, 1>(v0) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1)
+    : Matx<_Tp, cn, 1>(v0, v1) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2)
+    : Matx<_Tp, cn, 1>(v0, v1, v2) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7, v8) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(const _Tp* values)
+    : Matx<_Tp, cn, 1>(values) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(const Vec<_Tp, cn>& m)
+    : Matx<_Tp, cn, 1>(m.val) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp op)
+    : Matx<_Tp, cn, 1>(a, b, op) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp op)
+    : Matx<_Tp, cn, 1>(a, b, op) {}
+
+template<typename _Tp, int cn> template<typename _T2> inline
+Vec<_Tp, cn>::Vec(const Matx<_Tp, cn, 1>& a, _T2 alpha, Matx_ScaleOp op)
+    : Matx<_Tp, cn, 1>(a, alpha, op) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn> Vec<_Tp, cn>::all(_Tp alpha)
+{
+    Vec v;
+    for( int i = 0; i < cn; i++ ) v.val[i] = alpha;
+    return v;
+}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn> Vec<_Tp, cn>::mul(const Vec<_Tp, cn>& v) const
+{
+    Vec<_Tp, cn> w;
+    for( int i = 0; i < cn; i++ ) w.val[i] = saturate_cast<_Tp>(this->val[i]*v.val[i]);
+    return w;
+}
+
+template<> inline
+Vec<float, 2> Vec<float, 2>::conj() const
+{
+    return cv::internal::conjugate(*this);
+}
+
+template<> inline
+Vec<double, 2> Vec<double, 2>::conj() const
+{
+    return cv::internal::conjugate(*this);
+}
+
+template<> inline
+Vec<float, 4> Vec<float, 4>::conj() const
+{
+    return cv::internal::conjugate(*this);
+}
+
+template<> inline
+Vec<double, 4> Vec<double, 4>::conj() const
+{
+    return cv::internal::conjugate(*this);
+}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn> Vec<_Tp, cn>::cross(const Vec<_Tp, cn>&) const
+{
+    CV_StaticAssert(cn == 3, "for arbitrary-size vector there is no cross-product defined");
+    return Vec<_Tp, cn>();
+}
+
+template<> inline
+Vec<float, 3> Vec<float, 3>::cross(const Vec<float, 3>& v) const
+{
+    return Vec<float,3>(this->val[1]*v.val[2] - this->val[2]*v.val[1],
+                     this->val[2]*v.val[0] - this->val[0]*v.val[2],
+                     this->val[0]*v.val[1] - this->val[1]*v.val[0]);
+}
+
+template<> inline
+Vec<double, 3> Vec<double, 3>::cross(const Vec<double, 3>& v) const
+{
+    return Vec<double,3>(this->val[1]*v.val[2] - this->val[2]*v.val[1],
+                     this->val[2]*v.val[0] - this->val[0]*v.val[2],
+                     this->val[0]*v.val[1] - this->val[1]*v.val[0]);
+}
+
+template<typename _Tp, int cn> template<typename T2> inline
+Vec<_Tp, cn>::operator Vec<T2, cn>() const
+{
+    Vec<T2, cn> v;
+    for( int i = 0; i < cn; i++ ) v.val[i] = saturate_cast<T2>(this->val[i]);
+    return v;
+}
+
+template<typename _Tp, int cn> inline
+const _Tp& Vec<_Tp, cn>::operator [](int i) const
+{
+    CV_DbgAssert( (unsigned)i < (unsigned)cn );
+    return this->val[i];
+}
+
+template<typename _Tp, int cn> inline
+_Tp& Vec<_Tp, cn>::operator [](int i)
+{
+    CV_DbgAssert( (unsigned)i < (unsigned)cn );
+    return this->val[i];
+}
+
+template<typename _Tp, int cn> inline
+const _Tp& Vec<_Tp, cn>::operator ()(int i) const
+{
+    CV_DbgAssert( (unsigned)i < (unsigned)cn );
+    return this->val[i];
+}
+
+template<typename _Tp, int cn> inline
+_Tp& Vec<_Tp, cn>::operator ()(int i)
+{
+    CV_DbgAssert( (unsigned)i < (unsigned)cn );
+    return this->val[i];
+}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn> normalize(const Vec<_Tp, cn>& v)
+{
+    double nv = norm(v);
+    return v * (nv ? 1./nv : 0.);
+}
+
+
+
+//////////////////////////////// matx comma initializer //////////////////////////////////
+
+
+template<typename _Tp, typename _T2, int cn> static inline
+VecCommaInitializer<_Tp, cn> operator << (const Vec<_Tp, cn>& vec, _T2 val)
+{
+    VecCommaInitializer<_Tp, cn> commaInitializer((Vec<_Tp, cn>*)&vec);
+    return (commaInitializer, val);
+}
+
+template<typename _Tp, int cn> inline
+VecCommaInitializer<_Tp, cn>::VecCommaInitializer(Vec<_Tp, cn>* _vec)
+    : MatxCommaInitializer<_Tp, cn, 1>(_vec)
+{}
+
+template<typename _Tp, int cn> template<typename _T2> inline
+VecCommaInitializer<_Tp, cn>& VecCommaInitializer<_Tp, cn>::operator , (_T2 value)
+{
+    CV_DbgAssert( this->idx < cn );
+    this->dst->val[this->idx++] = saturate_cast<_Tp>(value);
+    return *this;
+}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn> VecCommaInitializer<_Tp, cn>::operator *() const
+{
+    CV_DbgAssert( this->idx == cn );
+    return *this->dst;
+}
+
+//! @endcond
+
+///////////////////////////// Matx out-of-class operators ////////////////////////////////
+
+//! @relates cv::Matx
+//! @{
+
+template<typename _Tp1, typename _Tp2, int m, int n> static inline
+Matx<_Tp1, m, n>& operator += (Matx<_Tp1, m, n>& a, const Matx<_Tp2, m, n>& b)
+{
+    for( int i = 0; i < m*n; i++ )
+        a.val[i] = saturate_cast<_Tp1>(a.val[i] + b.val[i]);
+    return a;
+}
+
+template<typename _Tp1, typename _Tp2, int m, int n> static inline
+Matx<_Tp1, m, n>& operator -= (Matx<_Tp1, m, n>& a, const Matx<_Tp2, m, n>& b)
+{
+    for( int i = 0; i < m*n; i++ )
+        a.val[i] = saturate_cast<_Tp1>(a.val[i] - b.val[i]);
+    return a;
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator + (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b)
+{
+    return Matx<_Tp, m, n>(a, b, Matx_AddOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator - (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b)
+{
+    return Matx<_Tp, m, n>(a, b, Matx_SubOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n>& operator *= (Matx<_Tp, m, n>& a, int alpha)
+{
+    for( int i = 0; i < m*n; i++ )
+        a.val[i] = saturate_cast<_Tp>(a.val[i] * alpha);
+    return a;
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n>& operator *= (Matx<_Tp, m, n>& a, float alpha)
+{
+    for( int i = 0; i < m*n; i++ )
+        a.val[i] = saturate_cast<_Tp>(a.val[i] * alpha);
+    return a;
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n>& operator *= (Matx<_Tp, m, n>& a, double alpha)
+{
+    for( int i = 0; i < m*n; i++ )
+        a.val[i] = saturate_cast<_Tp>(a.val[i] * alpha);
+    return a;
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator * (const Matx<_Tp, m, n>& a, int alpha)
+{
+    return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator * (const Matx<_Tp, m, n>& a, float alpha)
+{
+    return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator * (const Matx<_Tp, m, n>& a, double alpha)
+{
+    return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator * (int alpha, const Matx<_Tp, m, n>& a)
+{
+    return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator * (float alpha, const Matx<_Tp, m, n>& a)
+{
+    return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator * (double alpha, const Matx<_Tp, m, n>& a)
+{
+    return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator - (const Matx<_Tp, m, n>& a)
+{
+    return Matx<_Tp, m, n>(a, -1, Matx_ScaleOp());
+}
+
+template<typename _Tp, int m, int n, int l> static inline
+Matx<_Tp, m, n> operator * (const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b)
+{
+    return Matx<_Tp, m, n>(a, b, Matx_MatMulOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Vec<_Tp, m> operator * (const Matx<_Tp, m, n>& a, const Vec<_Tp, n>& b)
+{
+    Matx<_Tp, m, 1> c(a, b, Matx_MatMulOp());
+    return (const Vec<_Tp, m>&)(c);
+}
+
+template<typename _Tp, int m, int n> static inline
+bool operator == (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b)
+{
+    for( int i = 0; i < m*n; i++ )
+        if( a.val[i] != b.val[i] ) return false;
+    return true;
+}
+
+template<typename _Tp, int m, int n> static inline
+bool operator != (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b)
+{
+    return !(a == b);
+}
+
+//! @}
+
+////////////////////////////// Vec out-of-class operators ////////////////////////////////
+
+//! @relates cv::Vec
+//! @{
+
+template<typename _Tp1, typename _Tp2, int cn> static inline
+Vec<_Tp1, cn>& operator += (Vec<_Tp1, cn>& a, const Vec<_Tp2, cn>& b)
+{
+    for( int i = 0; i < cn; i++ )
+        a.val[i] = saturate_cast<_Tp1>(a.val[i] + b.val[i]);
+    return a;
+}
+
+template<typename _Tp1, typename _Tp2, int cn> static inline
+Vec<_Tp1, cn>& operator -= (Vec<_Tp1, cn>& a, const Vec<_Tp2, cn>& b)
+{
+    for( int i = 0; i < cn; i++ )
+        a.val[i] = saturate_cast<_Tp1>(a.val[i] - b.val[i]);
+    return a;
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator + (const Vec<_Tp, cn>& a, const Vec<_Tp, cn>& b)
+{
+    return Vec<_Tp, cn>(a, b, Matx_AddOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator - (const Vec<_Tp, cn>& a, const Vec<_Tp, cn>& b)
+{
+    return Vec<_Tp, cn>(a, b, Matx_SubOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn>& operator *= (Vec<_Tp, cn>& a, int alpha)
+{
+    for( int i = 0; i < cn; i++ )
+        a[i] = saturate_cast<_Tp>(a[i]*alpha);
+    return a;
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn>& operator *= (Vec<_Tp, cn>& a, float alpha)
+{
+    for( int i = 0; i < cn; i++ )
+        a[i] = saturate_cast<_Tp>(a[i]*alpha);
+    return a;
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn>& operator *= (Vec<_Tp, cn>& a, double alpha)
+{
+    for( int i = 0; i < cn; i++ )
+        a[i] = saturate_cast<_Tp>(a[i]*alpha);
+    return a;
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn>& operator /= (Vec<_Tp, cn>& a, int alpha)
+{
+    double ialpha = 1./alpha;
+    for( int i = 0; i < cn; i++ )
+        a[i] = saturate_cast<_Tp>(a[i]*ialpha);
+    return a;
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn>& operator /= (Vec<_Tp, cn>& a, float alpha)
+{
+    float ialpha = 1.f/alpha;
+    for( int i = 0; i < cn; i++ )
+        a[i] = saturate_cast<_Tp>(a[i]*ialpha);
+    return a;
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn>& operator /= (Vec<_Tp, cn>& a, double alpha)
+{
+    double ialpha = 1./alpha;
+    for( int i = 0; i < cn; i++ )
+        a[i] = saturate_cast<_Tp>(a[i]*ialpha);
+    return a;
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator * (const Vec<_Tp, cn>& a, int alpha)
+{
+    return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator * (int alpha, const Vec<_Tp, cn>& a)
+{
+    return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator * (const Vec<_Tp, cn>& a, float alpha)
+{
+    return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator * (float alpha, const Vec<_Tp, cn>& a)
+{
+    return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator * (const Vec<_Tp, cn>& a, double alpha)
+{
+    return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator * (double alpha, const Vec<_Tp, cn>& a)
+{
+    return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator / (const Vec<_Tp, cn>& a, int alpha)
+{
+    return Vec<_Tp, cn>(a, 1./alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator / (const Vec<_Tp, cn>& a, float alpha)
+{
+    return Vec<_Tp, cn>(a, 1.f/alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator / (const Vec<_Tp, cn>& a, double alpha)
+{
+    return Vec<_Tp, cn>(a, 1./alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator - (const Vec<_Tp, cn>& a)
+{
+    Vec<_Tp,cn> t;
+    for( int i = 0; i < cn; i++ ) t.val[i] = saturate_cast<_Tp>(-a.val[i]);
+    return t;
+}
+
+template<typename _Tp> inline Vec<_Tp, 4> operator * (const Vec<_Tp, 4>& v1, const Vec<_Tp, 4>& v2)
+{
+    return Vec<_Tp, 4>(saturate_cast<_Tp>(v1[0]*v2[0] - v1[1]*v2[1] - v1[2]*v2[2] - v1[3]*v2[3]),
+                       saturate_cast<_Tp>(v1[0]*v2[1] + v1[1]*v2[0] + v1[2]*v2[3] - v1[3]*v2[2]),
+                       saturate_cast<_Tp>(v1[0]*v2[2] - v1[1]*v2[3] + v1[2]*v2[0] + v1[3]*v2[1]),
+                       saturate_cast<_Tp>(v1[0]*v2[3] + v1[1]*v2[2] - v1[2]*v2[1] + v1[3]*v2[0]));
+}
+
+template<typename _Tp> inline Vec<_Tp, 4>& operator *= (Vec<_Tp, 4>& v1, const Vec<_Tp, 4>& v2)
+{
+    v1 = v1 * v2;
+    return v1;
+}
+
+//! @}
+
+} // cv
+
+#endif // OPENCV_CORE_MATX_HPP
diff --git a/thirdparty/linux/include/opencv2/core/neon_utils.hpp b/thirdparty/linux/include/opencv2/core/neon_utils.hpp
new file mode 100644
index 0000000..573ba99
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/neon_utils.hpp
@@ -0,0 +1,128 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_NEON_UTILS_HPP
+#define OPENCV_HAL_NEON_UTILS_HPP
+
+#include "opencv2/core/cvdef.h"
+
+//! @addtogroup core_utils_neon
+//! @{
+
+#if CV_NEON
+
+inline int32x2_t cv_vrnd_s32_f32(float32x2_t v)
+{
+    static int32x2_t v_sign = vdup_n_s32(1 << 31),
+        v_05 = vreinterpret_s32_f32(vdup_n_f32(0.5f));
+
+    int32x2_t v_addition = vorr_s32(v_05, vand_s32(v_sign, vreinterpret_s32_f32(v)));
+    return vcvt_s32_f32(vadd_f32(v, vreinterpret_f32_s32(v_addition)));
+}
+
+inline int32x4_t cv_vrndq_s32_f32(float32x4_t v)
+{
+    static int32x4_t v_sign = vdupq_n_s32(1 << 31),
+        v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f));
+
+    int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(v)));
+    return vcvtq_s32_f32(vaddq_f32(v, vreinterpretq_f32_s32(v_addition)));
+}
+
+inline uint32x2_t cv_vrnd_u32_f32(float32x2_t v)
+{
+    static float32x2_t v_05 = vdup_n_f32(0.5f);
+    return vcvt_u32_f32(vadd_f32(v, v_05));
+}
+
+inline uint32x4_t cv_vrndq_u32_f32(float32x4_t v)
+{
+    static float32x4_t v_05 = vdupq_n_f32(0.5f);
+    return vcvtq_u32_f32(vaddq_f32(v, v_05));
+}
+
+inline float32x4_t cv_vrecpq_f32(float32x4_t val)
+{
+    float32x4_t reciprocal = vrecpeq_f32(val);
+    reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
+    reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
+    return reciprocal;
+}
+
+inline float32x2_t cv_vrecp_f32(float32x2_t val)
+{
+    float32x2_t reciprocal = vrecpe_f32(val);
+    reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
+    reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
+    return reciprocal;
+}
+
+inline float32x4_t cv_vrsqrtq_f32(float32x4_t val)
+{
+    float32x4_t e = vrsqrteq_f32(val);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
+    return e;
+}
+
+inline float32x2_t cv_vrsqrt_f32(float32x2_t val)
+{
+    float32x2_t e = vrsqrte_f32(val);
+    e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
+    e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
+    return e;
+}
+
+inline float32x4_t cv_vsqrtq_f32(float32x4_t val)
+{
+    return cv_vrecpq_f32(cv_vrsqrtq_f32(val));
+}
+
+inline float32x2_t cv_vsqrt_f32(float32x2_t val)
+{
+    return cv_vrecp_f32(cv_vrsqrt_f32(val));
+}
+
+#endif
+
+//! @}
+
+#endif // OPENCV_HAL_NEON_UTILS_HPP
diff --git a/thirdparty/linux/include/opencv2/core/ocl.hpp b/thirdparty/linux/include/opencv2/core/ocl.hpp
new file mode 100644
index 0000000..1a9549d
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/ocl.hpp
@@ -0,0 +1,757 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OPENCL_HPP
+#define OPENCV_OPENCL_HPP
+
+#include "opencv2/core.hpp"
+
+namespace cv { namespace ocl {
+
+//! @addtogroup core_opencl
+//! @{
+
+CV_EXPORTS_W bool haveOpenCL();
+CV_EXPORTS_W bool useOpenCL();
+CV_EXPORTS_W bool haveAmdBlas();
+CV_EXPORTS_W bool haveAmdFft();
+CV_EXPORTS_W void setUseOpenCL(bool flag);
+CV_EXPORTS_W void finish();
+
+CV_EXPORTS bool haveSVM();
+
+class CV_EXPORTS Context;
+class CV_EXPORTS Device;
+class CV_EXPORTS Kernel;
+class CV_EXPORTS Program;
+class CV_EXPORTS ProgramSource;
+class CV_EXPORTS Queue;
+class CV_EXPORTS PlatformInfo;
+class CV_EXPORTS Image2D;
+
+class CV_EXPORTS Device
+{
+public:
+    Device();
+    explicit Device(void* d);
+    Device(const Device& d);
+    Device& operator = (const Device& d);
+    ~Device();
+
+    void set(void* d);
+
+    enum
+    {
+        TYPE_DEFAULT     = (1 << 0),
+        TYPE_CPU         = (1 << 1),
+        TYPE_GPU         = (1 << 2),
+        TYPE_ACCELERATOR = (1 << 3),
+        TYPE_DGPU        = TYPE_GPU + (1 << 16),
+        TYPE_IGPU        = TYPE_GPU + (1 << 17),
+        TYPE_ALL         = 0xFFFFFFFF
+    };
+
+    String name() const;
+    String extensions() const;
+    String version() const;
+    String vendorName() const;
+    String OpenCL_C_Version() const;
+    String OpenCLVersion() const;
+    int deviceVersionMajor() const;
+    int deviceVersionMinor() const;
+    String driverVersion() const;
+    void* ptr() const;
+
+    int type() const;
+
+    int addressBits() const;
+    bool available() const;
+    bool compilerAvailable() const;
+    bool linkerAvailable() const;
+
+    enum
+    {
+        FP_DENORM=(1 << 0),
+        FP_INF_NAN=(1 << 1),
+        FP_ROUND_TO_NEAREST=(1 << 2),
+        FP_ROUND_TO_ZERO=(1 << 3),
+        FP_ROUND_TO_INF=(1 << 4),
+        FP_FMA=(1 << 5),
+        FP_SOFT_FLOAT=(1 << 6),
+        FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7)
+    };
+    int doubleFPConfig() const;
+    int singleFPConfig() const;
+    int halfFPConfig() const;
+
+    bool endianLittle() const;
+    bool errorCorrectionSupport() const;
+
+    enum
+    {
+        EXEC_KERNEL=(1 << 0),
+        EXEC_NATIVE_KERNEL=(1 << 1)
+    };
+    int executionCapabilities() const;
+
+    size_t globalMemCacheSize() const;
+
+    enum
+    {
+        NO_CACHE=0,
+        READ_ONLY_CACHE=1,
+        READ_WRITE_CACHE=2
+    };
+    int globalMemCacheType() const;
+    int globalMemCacheLineSize() const;
+    size_t globalMemSize() const;
+
+    size_t localMemSize() const;
+    enum
+    {
+        NO_LOCAL_MEM=0,
+        LOCAL_IS_LOCAL=1,
+        LOCAL_IS_GLOBAL=2
+    };
+    int localMemType() const;
+    bool hostUnifiedMemory() const;
+
+    bool imageSupport() const;
+
+    bool imageFromBufferSupport() const;
+    uint imagePitchAlignment() const;
+    uint imageBaseAddressAlignment() const;
+
+    size_t image2DMaxWidth() const;
+    size_t image2DMaxHeight() const;
+
+    size_t image3DMaxWidth() const;
+    size_t image3DMaxHeight() const;
+    size_t image3DMaxDepth() const;
+
+    size_t imageMaxBufferSize() const;
+    size_t imageMaxArraySize() const;
+
+    enum
+    {
+        UNKNOWN_VENDOR=0,
+        VENDOR_AMD=1,
+        VENDOR_INTEL=2,
+        VENDOR_NVIDIA=3
+    };
+    int vendorID() const;
+    // FIXIT
+    // dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform.
+    // This method should use platform name instead of vendor name.
+    // After fix restore code in arithm.cpp: ocl_compare()
+    inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
+    inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
+    inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }
+
+    int maxClockFrequency() const;
+    int maxComputeUnits() const;
+    int maxConstantArgs() const;
+    size_t maxConstantBufferSize() const;
+
+    size_t maxMemAllocSize() const;
+    size_t maxParameterSize() const;
+
+    int maxReadImageArgs() const;
+    int maxWriteImageArgs() const;
+    int maxSamplers() const;
+
+    size_t maxWorkGroupSize() const;
+    int maxWorkItemDims() const;
+    void maxWorkItemSizes(size_t*) const;
+
+    int memBaseAddrAlign() const;
+
+    int nativeVectorWidthChar() const;
+    int nativeVectorWidthShort() const;
+    int nativeVectorWidthInt() const;
+    int nativeVectorWidthLong() const;
+    int nativeVectorWidthFloat() const;
+    int nativeVectorWidthDouble() const;
+    int nativeVectorWidthHalf() const;
+
+    int preferredVectorWidthChar() const;
+    int preferredVectorWidthShort() const;
+    int preferredVectorWidthInt() const;
+    int preferredVectorWidthLong() const;
+    int preferredVectorWidthFloat() const;
+    int preferredVectorWidthDouble() const;
+    int preferredVectorWidthHalf() const;
+
+    size_t printfBufferSize() const;
+    size_t profilingTimerResolution() const;
+
+    static const Device& getDefault();
+
+protected:
+    struct Impl;
+    Impl* p;
+};
+
+
+class CV_EXPORTS Context
+{
+public:
+    Context();
+    explicit Context(int dtype);
+    ~Context();
+    Context(const Context& c);
+    Context& operator = (const Context& c);
+
+    bool create();
+    bool create(int dtype);
+    size_t ndevices() const;
+    const Device& device(size_t idx) const;
+    Program getProg(const ProgramSource& prog,
+                    const String& buildopt, String& errmsg);
+
+    static Context& getDefault(bool initialize = true);
+    void* ptr() const;
+
+    friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
+
+    bool useSVM() const;
+    void setUseSVM(bool enabled);
+
+    struct Impl;
+    Impl* p;
+};
+
+class CV_EXPORTS Platform
+{
+public:
+    Platform();
+    ~Platform();
+    Platform(const Platform& p);
+    Platform& operator = (const Platform& p);
+
+    void* ptr() const;
+    static Platform& getDefault();
+
+    friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
+protected:
+    struct Impl;
+    Impl* p;
+};
+
+/*
+//! @brief Attaches OpenCL context to OpenCV
+//
+//! @note Note:
+//    OpenCV will check if available OpenCL platform has platformName name,
+//    then assign context to OpenCV and call clRetainContext function.
+//    The deviceID device will be used as target device and new command queue
+//    will be created.
+//
+// Params:
+//! @param platformName - name of OpenCL platform to attach,
+//!                       this string is used to check if platform is available
+//!                       to OpenCV at runtime
+//! @param platfromID   - ID of platform attached context was created for
+//! @param context      - OpenCL context to be attached to OpenCV
+//! @param deviceID     - ID of device, must be created from attached context
+*/
+CV_EXPORTS void attachContext(const String& platformName, void* platformID, void* context, void* deviceID);
+
+/*
+//! @brief Convert OpenCL buffer to UMat
+//
+//! @note Note:
+//   OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV.
+//   Memory content is not copied from clBuffer to UMat. Instead, buffer handle assigned
+//   to UMat and clRetainMemObject is called.
+//
+// Params:
+//! @param  cl_mem_buffer - source clBuffer handle
+//! @param  step          - num of bytes in single row
+//! @param  rows          - number of rows
+//! @param  cols          - number of cols
+//! @param  type          - OpenCV type of image
+//! @param  dst           - destination UMat
+*/
+CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst);
+
+/*
+//! @brief Convert OpenCL image2d_t to UMat
+//
+//! @note Note:
+//   OpenCL image2d_t (cl_mem_image), should be compatible with OpenCV
+//   UMat formats.
+//   Memory content is copied from image to UMat with
+//   clEnqueueCopyImageToBuffer function.
+//
+// Params:
+//! @param  cl_mem_image - source image2d_t handle
+//! @param  dst          - destination UMat
+*/
+CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst);
+
+// TODO Move to internal header
+void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
+
+class CV_EXPORTS Queue
+{
+public:
+    Queue();
+    explicit Queue(const Context& c, const Device& d=Device());
+    ~Queue();
+    Queue(const Queue& q);
+    Queue& operator = (const Queue& q);
+
+    bool create(const Context& c=Context(), const Device& d=Device());
+    void finish();
+    void* ptr() const;
+    static Queue& getDefault();
+
+protected:
+    struct Impl;
+    Impl* p;
+};
+
+
+class CV_EXPORTS KernelArg
+{
+public:
+    enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, PTR_ONLY = 16, NO_SIZE=256 };
+    KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0);
+    KernelArg();
+
+    static KernelArg Local() { return KernelArg(LOCAL, 0); }
+    static KernelArg PtrWriteOnly(const UMat& m)
+    { return KernelArg(PTR_ONLY+WRITE_ONLY, (UMat*)&m); }
+    static KernelArg PtrReadOnly(const UMat& m)
+    { return KernelArg(PTR_ONLY+READ_ONLY, (UMat*)&m); }
+    static KernelArg PtrReadWrite(const UMat& m)
+    { return KernelArg(PTR_ONLY+READ_WRITE, (UMat*)&m); }
+    static KernelArg ReadWrite(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(READ_WRITE, (UMat*)&m, wscale, iwscale); }
+    static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale, iwscale); }
+    static KernelArg ReadOnly(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(READ_ONLY, (UMat*)&m, wscale, iwscale); }
+    static KernelArg WriteOnly(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(WRITE_ONLY, (UMat*)&m, wscale, iwscale); }
+    static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
+    static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
+    static KernelArg Constant(const Mat& m);
+    template<typename _Tp> static KernelArg Constant(const _Tp* arr, size_t n)
+    { return KernelArg(CONSTANT, 0, 1, 1, (void*)arr, n); }
+
+    int flags;
+    UMat* m;
+    const void* obj;
+    size_t sz;
+    int wscale, iwscale;
+};
+
+
+class CV_EXPORTS Kernel
+{
+public:
+    Kernel();
+    Kernel(const char* kname, const Program& prog);
+    Kernel(const char* kname, const ProgramSource& prog,
+           const String& buildopts = String(), String* errmsg=0);
+    ~Kernel();
+    Kernel(const Kernel& k);
+    Kernel& operator = (const Kernel& k);
+
+    bool empty() const;
+    bool create(const char* kname, const Program& prog);
+    bool create(const char* kname, const ProgramSource& prog,
+                const String& buildopts, String* errmsg=0);
+
+    int set(int i, const void* value, size_t sz);
+    int set(int i, const Image2D& image2D);
+    int set(int i, const UMat& m);
+    int set(int i, const KernelArg& arg);
+    template<typename _Tp> int set(int i, const _Tp& value)
+    { return set(i, &value, sizeof(value)); }
+
+    template<typename _Tp0>
+    Kernel& args(const _Tp0& a0)
+    {
+        set(0, a0); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1)
+    {
+        int i = set(0, a0); set(i, a1); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2)
+    {
+        int i = set(0, a0); i = set(i, a1); set(i, a2); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
+                 const _Tp3& a3, const _Tp4& a4)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2);
+        i = set(i, a3); set(i, a4); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2,
+             typename _Tp3, typename _Tp4, typename _Tp5>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
+                 const _Tp3& a3, const _Tp4& a4, const _Tp5& a5)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2);
+        i = set(i, a3); i = set(i, a4); set(i, a5); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
+        i = set(i, a4); i = set(i, a5); set(i, a6); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
+        i = set(i, a4); i = set(i, a5); i = set(i, a6); set(i, a7); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
+             typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
+        i = set(i, a5); i = set(i, a6); i = set(i, a7); set(i, a8); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
+             typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); set(i, a9); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); set(i, a10); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); set(i, a11); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
+                 const _Tp12& a12)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
+        set(i, a12); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
+             typename _Tp13>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
+                 const _Tp12& a12, const _Tp13& a13)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
+        i = set(i, a12); set(i, a13); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
+             typename _Tp13, typename _Tp14>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
+                 const _Tp12& a12, const _Tp13& a13, const _Tp14& a14)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
+        i = set(i, a12); i = set(i, a13); set(i, a14); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
+             typename _Tp13, typename _Tp14, typename _Tp15>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
+                 const _Tp12& a12, const _Tp13& a13, const _Tp14& a14, const _Tp15& a15)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
+        i = set(i, a12); i = set(i, a13); i = set(i, a14); set(i, a15); return *this;
+    }
+    /*
+    Run the OpenCL kernel.
+    @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3.
+    @param globalsize work items for each dimension.
+    It is not the final globalsize passed to OpenCL.
+    Each dimension will be adjusted to the nearest integer divisible by the corresponding value in localsize.
+    If localsize is NULL, it will still be adjusted depending on dims.
+    The adjusted values are greater than or equal to the original values.
+    @param localsize work-group size for each dimension.
+    @param sync specify whether to wait for OpenCL computation to finish before return.
+    @param q command queue
+    */
+    bool run(int dims, size_t globalsize[],
+             size_t localsize[], bool sync, const Queue& q=Queue());
+    bool runTask(bool sync, const Queue& q=Queue());
+
+    size_t workGroupSize() const;
+    size_t preferedWorkGroupSizeMultiple() const;
+    bool compileWorkGroupSize(size_t wsz[]) const;
+    size_t localMemSize() const;
+
+    void* ptr() const;
+    struct Impl;
+
+protected:
+    Impl* p;
+};
+
+class CV_EXPORTS Program
+{
+public:
+    Program();
+    Program(const ProgramSource& src,
+            const String& buildflags, String& errmsg);
+    explicit Program(const String& buf);
+    Program(const Program& prog);
+
+    Program& operator = (const Program& prog);
+    ~Program();
+
+    bool create(const ProgramSource& src,
+                const String& buildflags, String& errmsg);
+    bool read(const String& buf, const String& buildflags);
+    bool write(String& buf) const;
+
+    const ProgramSource& source() const;
+    void* ptr() const;
+
+    String getPrefix() const;
+    static String getPrefix(const String& buildflags);
+
+protected:
+    struct Impl;
+    Impl* p;
+};
+
+
+class CV_EXPORTS ProgramSource
+{
+public:
+    typedef uint64 hash_t;
+
+    ProgramSource();
+    explicit ProgramSource(const String& prog);
+    explicit ProgramSource(const char* prog);
+    ~ProgramSource();
+    ProgramSource(const ProgramSource& prog);
+    ProgramSource& operator = (const ProgramSource& prog);
+
+    const String& source() const;
+    hash_t hash() const;
+
+protected:
+    struct Impl;
+    Impl* p;
+};
+
+class CV_EXPORTS PlatformInfo
+{
+public:
+    PlatformInfo();
+    explicit PlatformInfo(void* id);
+    ~PlatformInfo();
+
+    PlatformInfo(const PlatformInfo& i);
+    PlatformInfo& operator =(const PlatformInfo& i);
+
+    String name() const;
+    String vendor() const;
+    String version() const;
+    int deviceNumber() const;
+    void getDevice(Device& device, int d) const;
+
+protected:
+    struct Impl;
+    Impl* p;
+};
+
+CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf);
+CV_EXPORTS const char* typeToStr(int t);
+CV_EXPORTS const char* memopTypeToStr(int t);
+CV_EXPORTS const char* vecopTypeToStr(int t);
+CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL);
+CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info);
+
+
+enum OclVectorStrategy
+{
+    // all matrices have its own vector width
+    OCL_VECTOR_OWN = 0,
+    // all matrices have maximal vector width among all matrices
+    // (useful for cases when matrices have different data types)
+    OCL_VECTOR_MAX = 1,
+
+    // default strategy
+    OCL_VECTOR_DEFAULT = OCL_VECTOR_OWN
+};
+
+CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
+                                         InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
+                                         InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
+                                         OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
+
+CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths,
+                                       InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
+                                       InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
+                                       InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
+                                       OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
+
+// with OCL_VECTOR_MAX strategy
+CV_EXPORTS int predictOptimalVectorWidthMax(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
+                                            InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
+                                            InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray());
+
+CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m);
+
+class CV_EXPORTS Image2D
+{
+public:
+    Image2D();
+
+    // src:     The UMat from which to get image properties and data
+    // norm:    Flag to enable the use of normalized channel data types
+    // alias:   Flag indicating that the image should alias the src UMat.
+    //          If true, changes to the image or src will be reflected in
+    //          both objects.
+    explicit Image2D(const UMat &src, bool norm = false, bool alias = false);
+    Image2D(const Image2D & i);
+    ~Image2D();
+
+    Image2D & operator = (const Image2D & i);
+
+    // Indicates if creating an aliased image should succeed.  Depends on the
+    // underlying platform and the dimensions of the UMat.
+    static bool canCreateAlias(const UMat &u);
+
+    // Indicates if the image format is supported.
+    static bool isFormatSupported(int depth, int cn, bool norm);
+
+    void* ptr() const;
+protected:
+    struct Impl;
+    Impl* p;
+};
+
+
+CV_EXPORTS MatAllocator* getOpenCLAllocator();
+
+
+#ifdef __OPENCV_BUILD
+namespace internal {
+
+CV_EXPORTS bool isOpenCLForced();
+#define OCL_FORCE_CHECK(condition) (cv::ocl::internal::isOpenCLForced() || (condition))
+
+CV_EXPORTS bool isPerformanceCheckBypassed();
+#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
+
+CV_EXPORTS bool isCLBuffer(UMat& u);
+
+} // namespace internal
+#endif
+
+//! @}
+
+}}
+
+#endif
diff --git a/thirdparty/linux/include/opencv2/core/ocl_genbase.hpp b/thirdparty/linux/include/opencv2/core/ocl_genbase.hpp
new file mode 100644
index 0000000..5408958
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/ocl_genbase.hpp
@@ -0,0 +1,64 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OPENCL_GENBASE_HPP
+#define OPENCV_OPENCL_GENBASE_HPP
+
+namespace cv
+{
+namespace ocl
+{
+
+//! @cond IGNORED
+
+struct ProgramEntry
+{
+    const char* name;
+    const char* programStr;
+    const char* programHash;
+};
+
+//! @endcond
+
+}
+}
+
+#endif
diff --git a/thirdparty/linux/include/opencv2/core/opengl.hpp b/thirdparty/linux/include/opencv2/core/opengl.hpp
new file mode 100644
index 0000000..8b63d6c
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/opengl.hpp
@@ -0,0 +1,729 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_OPENGL_HPP
+#define OPENCV_CORE_OPENGL_HPP
+
+#ifndef __cplusplus
+#  error opengl.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core.hpp"
+#include "ocl.hpp"
+
+namespace cv { namespace ogl {
+
+/** @addtogroup core_opengl
+This section describes OpenGL interoperability.
+
+To enable OpenGL support, configure OpenCV using CMake with WITH_OPENGL=ON . Currently OpenGL is
+supported only with WIN32, GTK and Qt backends on Windows and Linux (MacOS and Android are not
+supported). For GTK backend gtkglext-1.0 library is required.
+
+To use OpenGL functionality you should first create OpenGL context (window or frame buffer). You can
+do this with namedWindow function or with other OpenGL toolkit (GLUT, for example).
+*/
+//! @{
+
+/////////////////// OpenGL Objects ///////////////////
+
+/** @brief Smart pointer for OpenGL buffer object with reference counting.
+
+Buffer Objects are OpenGL objects that store an array of unformatted memory allocated by the OpenGL
+context. These can be used to store vertex data, pixel data retrieved from images or the
+framebuffer, and a variety of other things.
+
+ogl::Buffer has interface similar with Mat interface and represents 2D array memory.
+
+ogl::Buffer supports memory transfers between host and device and also can be mapped to CUDA memory.
+ */
+class CV_EXPORTS Buffer
+{
+public:
+    /** @brief The target defines how you intend to use the buffer object.
+    */
+    enum Target
+    {
+        ARRAY_BUFFER         = 0x8892, //!< The buffer will be used as a source for vertex data
+        ELEMENT_ARRAY_BUFFER = 0x8893, //!< The buffer will be used for indices (in glDrawElements, for example)
+        PIXEL_PACK_BUFFER    = 0x88EB, //!< The buffer will be used for reading from OpenGL textures
+        PIXEL_UNPACK_BUFFER  = 0x88EC  //!< The buffer will be used for writing to OpenGL textures
+    };
+
+    enum Access
+    {
+        READ_ONLY  = 0x88B8,
+        WRITE_ONLY = 0x88B9,
+        READ_WRITE = 0x88BA
+    };
+
+    /** @brief The constructors.
+
+    Creates empty ogl::Buffer object, creates ogl::Buffer object from existed buffer ( abufId
+    parameter), allocates memory for ogl::Buffer object or copies from host/device memory.
+     */
+    Buffer();
+
+    /** @overload
+    @param arows Number of rows in a 2D array.
+    @param acols Number of columns in a 2D array.
+    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
+    @param abufId Buffer object name.
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    Buffer(int arows, int acols, int atype, unsigned int abufId, bool autoRelease = false);
+
+    /** @overload
+    @param asize 2D array size.
+    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
+    @param abufId Buffer object name.
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    Buffer(Size asize, int atype, unsigned int abufId, bool autoRelease = false);
+
+    /** @overload
+    @param arows Number of rows in a 2D array.
+    @param acols Number of columns in a 2D array.
+    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
+    @param target Buffer usage. See cv::ogl::Buffer::Target .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    Buffer(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
+
+    /** @overload
+    @param asize 2D array size.
+    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
+    @param target Buffer usage. See cv::ogl::Buffer::Target .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    Buffer(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
+
+    /** @overload
+    @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or std::vector ).
+    @param target Buffer usage. See cv::ogl::Buffer::Target .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    explicit Buffer(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false);
+
+    /** @brief Allocates memory for ogl::Buffer object.
+
+    @param arows Number of rows in a 2D array.
+    @param acols Number of columns in a 2D array.
+    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
+    @param target Buffer usage. See cv::ogl::Buffer::Target .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+     */
+    void create(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
+
+    /** @overload
+    @param asize 2D array size.
+    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
+    @param target Buffer usage. See cv::ogl::Buffer::Target .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    void create(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
+
+    /** @brief Decrements the reference counter and destroys the buffer object if needed.
+
+    The function will call setAutoRelease(true) .
+     */
+    void release();
+
+    /** @brief Sets auto release mode.
+
+    The lifetime of the OpenGL object is tied to the lifetime of the context. If OpenGL context was
+    bound to a window it could be released at any time (user can close a window). If object's destructor
+    is called after destruction of the context it will cause an error. Thus ogl::Buffer doesn't destroy
+    OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL context).
+    This function can force ogl::Buffer destructor to destroy OpenGL object.
+    @param flag Auto release mode (if true, release will be called in object's destructor).
+     */
+    void setAutoRelease(bool flag);
+
+    /** @brief Copies from host/device memory to OpenGL buffer.
+    @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or std::vector ).
+    @param target Buffer usage. See cv::ogl::Buffer::Target .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+     */
+    void copyFrom(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false);
+
+    /** @overload */
+    void copyFrom(InputArray arr, cuda::Stream& stream, Target target = ARRAY_BUFFER, bool autoRelease = false);
+
+    /** @brief Copies from OpenGL buffer to host/device memory or another OpenGL buffer object.
+
+    @param arr Destination array (host or device memory, can be Mat , cuda::GpuMat , std::vector or
+    ogl::Buffer ).
+     */
+    void copyTo(OutputArray arr) const;
+
+    /** @overload */
+    void copyTo(OutputArray arr, cuda::Stream& stream) const;
+
+    /** @brief Creates a full copy of the buffer object and the underlying data.
+
+    @param target Buffer usage for destination buffer.
+    @param autoRelease Auto release mode for destination buffer.
+     */
+    Buffer clone(Target target = ARRAY_BUFFER, bool autoRelease = false) const;
+
+    /** @brief Binds OpenGL buffer to the specified buffer binding point.
+
+    @param target Binding point. See cv::ogl::Buffer::Target .
+     */
+    void bind(Target target) const;
+
+    /** @brief Unbind any buffers from the specified binding point.
+
+    @param target Binding point. See cv::ogl::Buffer::Target .
+     */
+    static void unbind(Target target);
+
+    /** @brief Maps OpenGL buffer to host memory.
+
+    mapHost maps to the client's address space the entire data store of the buffer object. The data can
+    then be directly read and/or written relative to the returned pointer, depending on the specified
+    access policy.
+
+    A mapped data store must be unmapped with ogl::Buffer::unmapHost before its buffer object is used.
+
+    This operation can lead to memory transfers between host and device.
+
+    Only one buffer object can be mapped at a time.
+    @param access Access policy, indicating whether it will be possible to read from, write to, or both
+    read from and write to the buffer object's mapped data store. The symbolic constant must be
+    ogl::Buffer::READ_ONLY , ogl::Buffer::WRITE_ONLY or ogl::Buffer::READ_WRITE .
+     */
+    Mat mapHost(Access access);
+
+    /** @brief Unmaps OpenGL buffer.
+    */
+    void unmapHost();
+
+    //! map to device memory (blocking)
+    cuda::GpuMat mapDevice();
+    void unmapDevice();
+
+    /** @brief Maps OpenGL buffer to CUDA device memory.
+
+    This operatation doesn't copy data. Several buffer objects can be mapped to CUDA memory at a time.
+
+    A mapped data store must be unmapped with ogl::Buffer::unmapDevice before its buffer object is used.
+     */
+    cuda::GpuMat mapDevice(cuda::Stream& stream);
+
+    /** @brief Unmaps OpenGL buffer.
+    */
+    void unmapDevice(cuda::Stream& stream);
+
+    int rows() const;
+    int cols() const;
+    Size size() const;
+    bool empty() const;
+
+    int type() const;
+    int depth() const;
+    int channels() const;
+    int elemSize() const;
+    int elemSize1() const;
+
+    //! get OpenGL opject id
+    unsigned int bufId() const;
+
+    class Impl;
+
+private:
+    Ptr<Impl> impl_;
+    int rows_;
+    int cols_;
+    int type_;
+};
+
+/** @brief Smart pointer for OpenGL 2D texture memory with reference counting.
+ */
+class CV_EXPORTS Texture2D
+{
+public:
+    /** @brief An Image Format describes the way that the images in Textures store their data.
+    */
+    enum Format
+    {
+        NONE            = 0,
+        DEPTH_COMPONENT = 0x1902, //!< Depth
+        RGB             = 0x1907, //!< Red, Green, Blue
+        RGBA            = 0x1908  //!< Red, Green, Blue, Alpha
+    };
+
+    /** @brief The constructors.
+
+    Creates empty ogl::Texture2D object, allocates memory for ogl::Texture2D object or copies from
+    host/device memory.
+     */
+    Texture2D();
+
+    /** @overload */
+    Texture2D(int arows, int acols, Format aformat, unsigned int atexId, bool autoRelease = false);
+
+    /** @overload */
+    Texture2D(Size asize, Format aformat, unsigned int atexId, bool autoRelease = false);
+
+    /** @overload
+    @param arows Number of rows.
+    @param acols Number of columns.
+    @param aformat Image format. See cv::ogl::Texture2D::Format .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    Texture2D(int arows, int acols, Format aformat, bool autoRelease = false);
+
+    /** @overload
+    @param asize 2D array size.
+    @param aformat Image format. See cv::ogl::Texture2D::Format .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    Texture2D(Size asize, Format aformat, bool autoRelease = false);
+
+    /** @overload
+    @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or ogl::Buffer ).
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    explicit Texture2D(InputArray arr, bool autoRelease = false);
+
+    /** @brief Allocates memory for ogl::Texture2D object.
+
+    @param arows Number of rows.
+    @param acols Number of columns.
+    @param aformat Image format. See cv::ogl::Texture2D::Format .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+     */
+    void create(int arows, int acols, Format aformat, bool autoRelease = false);
+    /** @overload
+    @param asize 2D array size.
+    @param aformat Image format. See cv::ogl::Texture2D::Format .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    void create(Size asize, Format aformat, bool autoRelease = false);
+
+    /** @brief Decrements the reference counter and destroys the texture object if needed.
+
+    The function will call setAutoRelease(true) .
+     */
+    void release();
+
+    /** @brief Sets auto release mode.
+
+    @param flag Auto release mode (if true, release will be called in object's destructor).
+
+    The lifetime of the OpenGL object is tied to the lifetime of the context. If OpenGL context was
+    bound to a window it could be released at any time (user can close a window). If object's destructor
+    is called after destruction of the context it will cause an error. Thus ogl::Texture2D doesn't
+    destroy OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL
+    context). This function can force ogl::Texture2D destructor to destroy OpenGL object.
+     */
+    void setAutoRelease(bool flag);
+
+    /** @brief Copies from host/device memory to OpenGL texture.
+
+    @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or ogl::Buffer ).
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+     */
+    void copyFrom(InputArray arr, bool autoRelease = false);
+
+    /** @brief Copies from OpenGL texture to host/device memory or another OpenGL texture object.
+
+    @param arr Destination array (host or device memory, can be Mat , cuda::GpuMat , ogl::Buffer or
+    ogl::Texture2D ).
+    @param ddepth Destination depth.
+    @param autoRelease Auto release mode for destination buffer (if arr is OpenGL buffer or texture).
+     */
+    void copyTo(OutputArray arr, int ddepth = CV_32F, bool autoRelease = false) const;
+
+    /** @brief Binds texture to current active texture unit for GL_TEXTURE_2D target.
+    */
+    void bind() const;
+
+    int rows() const;
+    int cols() const;
+    Size size() const;
+    bool empty() const;
+
+    Format format() const;
+
+    //! get OpenGL opject id
+    unsigned int texId() const;
+
+    class Impl;
+
+private:
+    Ptr<Impl> impl_;
+    int rows_;
+    int cols_;
+    Format format_;
+};
+
+/** @brief Wrapper for OpenGL Client-Side Vertex arrays.
+
+ogl::Arrays stores vertex data in ogl::Buffer objects.
+ */
+class CV_EXPORTS Arrays
+{
+public:
+    /** @brief Default constructor
+     */
+    Arrays();
+
+    /** @brief Sets an array of vertex coordinates.
+    @param vertex array with vertex coordinates, can be both host and device memory.
+    */
+    void setVertexArray(InputArray vertex);
+
+    /** @brief Resets vertex coordinates.
+    */
+    void resetVertexArray();
+
+    /** @brief Sets an array of vertex colors.
+    @param color array with vertex colors, can be both host and device memory.
+     */
+    void setColorArray(InputArray color);
+
+    /** @brief Resets vertex colors.
+    */
+    void resetColorArray();
+
+    /** @brief Sets an array of vertex normals.
+    @param normal array with vertex normals, can be both host and device memory.
+     */
+    void setNormalArray(InputArray normal);
+
+    /** @brief Resets vertex normals.
+    */
+    void resetNormalArray();
+
+    /** @brief Sets an array of vertex texture coordinates.
+    @param texCoord array with vertex texture coordinates, can be both host and device memory.
+     */
+    void setTexCoordArray(InputArray texCoord);
+
+    /** @brief Resets vertex texture coordinates.
+    */
+    void resetTexCoordArray();
+
+    /** @brief Releases all inner buffers.
+    */
+    void release();
+
+    /** @brief Sets auto release mode all inner buffers.
+    @param flag Auto release mode.
+     */
+    void setAutoRelease(bool flag);
+
+    /** @brief Binds all vertex arrays.
+    */
+    void bind() const;
+
+    /** @brief Returns the vertex count.
+    */
+    int size() const;
+    bool empty() const;
+
+private:
+    int size_;
+    Buffer vertex_;
+    Buffer color_;
+    Buffer normal_;
+    Buffer texCoord_;
+};
+
+/////////////////// Render Functions ///////////////////
+
+//! render mode
+enum RenderModes {
+    POINTS         = 0x0000,
+    LINES          = 0x0001,
+    LINE_LOOP      = 0x0002,
+    LINE_STRIP     = 0x0003,
+    TRIANGLES      = 0x0004,
+    TRIANGLE_STRIP = 0x0005,
+    TRIANGLE_FAN   = 0x0006,
+    QUADS          = 0x0007,
+    QUAD_STRIP     = 0x0008,
+    POLYGON        = 0x0009
+};
+
+/** @brief Render OpenGL texture or primitives.
+@param tex Texture to draw.
+@param wndRect Region of window, where to draw a texture (normalized coordinates).
+@param texRect Region of texture to draw (normalized coordinates).
+ */
+CV_EXPORTS void render(const Texture2D& tex,
+    Rect_<double> wndRect = Rect_<double>(0.0, 0.0, 1.0, 1.0),
+    Rect_<double> texRect = Rect_<double>(0.0, 0.0, 1.0, 1.0));
+
+/** @overload
+@param arr Array of privitives vertices.
+@param mode Render mode. One of cv::ogl::RenderModes
+@param color Color for all vertices. Will be used if arr doesn't contain color array.
+*/
+CV_EXPORTS void render(const Arrays& arr, int mode = POINTS, Scalar color = Scalar::all(255));
+
+/** @overload
+@param arr Array of privitives vertices.
+@param indices Array of vertices indices (host or device memory).
+@param mode Render mode. One of cv::ogl::RenderModes
+@param color Color for all vertices. Will be used if arr doesn't contain color array.
+*/
+CV_EXPORTS void render(const Arrays& arr, InputArray indices, int mode = POINTS, Scalar color = Scalar::all(255));
+
+/////////////////// CL-GL Interoperability Functions ///////////////////
+
+namespace ocl {
+using namespace cv::ocl;
+
+// TODO static functions in the Context class
+/** @brief Creates OpenCL context from GL.
+@return Returns reference to OpenCL Context
+ */
+CV_EXPORTS Context& initializeContextFromGL();
+
+} // namespace cv::ogl::ocl
+
+/** @brief Converts InputArray to Texture2D object.
+@param src     - source InputArray.
+@param texture - destination Texture2D object.
+ */
+CV_EXPORTS void convertToGLTexture2D(InputArray src, Texture2D& texture);
+
+/** @brief Converts Texture2D object to OutputArray.
+@param texture - source Texture2D object.
+@param dst     - destination OutputArray.
+ */
+CV_EXPORTS void convertFromGLTexture2D(const Texture2D& texture, OutputArray dst);
+
+/** @brief Maps Buffer object to process on CL side (convert to UMat).
+
+Function creates CL buffer from GL one, and then constructs UMat that can be used
+to process buffer data with OpenCV functions. Note that in current implementation
+UMat constructed this way doesn't own corresponding GL buffer object, so it is
+the user responsibility to close down CL/GL buffers relationships by explicitly
+calling unmapGLBuffer() function.
+@param buffer      - source Buffer object.
+@param accessFlags - data access flags (ACCESS_READ|ACCESS_WRITE).
+@return Returns UMat object
+ */
+CV_EXPORTS UMat mapGLBuffer(const Buffer& buffer, int accessFlags = ACCESS_READ|ACCESS_WRITE);
+
+/** @brief Unmaps Buffer object (releases UMat, previously mapped from Buffer).
+
+Function must be called explicitly by the user for each UMat previously constructed
+by the call to mapGLBuffer() function.
+@param u           - source UMat, created by mapGLBuffer().
+ */
+CV_EXPORTS void unmapGLBuffer(UMat& u);
+
+}} // namespace cv::ogl
+
+namespace cv { namespace cuda {
+
+//! @addtogroup cuda
+//! @{
+
+/** @brief Sets a CUDA device and initializes it for the current thread with OpenGL interoperability.
+
+This function should be explicitly called after OpenGL context creation and before any CUDA calls.
+@param device System index of a CUDA device starting with 0.
+@ingroup core_opengl
+ */
+CV_EXPORTS void setGlDevice(int device = 0);
+
+//! @}
+
+}}
+
+//! @cond IGNORED
+
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+
+inline
+cv::ogl::Buffer::Buffer(int arows, int acols, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
+{
+    create(arows, acols, atype, target, autoRelease);
+}
+
+inline
+cv::ogl::Buffer::Buffer(Size asize, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
+{
+    create(asize, atype, target, autoRelease);
+}
+
+inline
+void cv::ogl::Buffer::create(Size asize, int atype, Target target, bool autoRelease)
+{
+    create(asize.height, asize.width, atype, target, autoRelease);
+}
+
+inline
+int cv::ogl::Buffer::rows() const
+{
+    return rows_;
+}
+
+inline
+int cv::ogl::Buffer::cols() const
+{
+    return cols_;
+}
+
+inline
+cv::Size cv::ogl::Buffer::size() const
+{
+    return Size(cols_, rows_);
+}
+
+inline
+bool cv::ogl::Buffer::empty() const
+{
+    return rows_ == 0 || cols_ == 0;
+}
+
+inline
+int cv::ogl::Buffer::type() const
+{
+    return type_;
+}
+
+inline
+int cv::ogl::Buffer::depth() const
+{
+    return CV_MAT_DEPTH(type_);
+}
+
+inline
+int cv::ogl::Buffer::channels() const
+{
+    return CV_MAT_CN(type_);
+}
+
+inline
+int cv::ogl::Buffer::elemSize() const
+{
+    return CV_ELEM_SIZE(type_);
+}
+
+inline
+int cv::ogl::Buffer::elemSize1() const
+{
+    return CV_ELEM_SIZE1(type_);
+}
+
+///////
+
+inline
+cv::ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
+{
+    create(arows, acols, aformat, autoRelease);
+}
+
+inline
+cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
+{
+    create(asize, aformat, autoRelease);
+}
+
+inline
+void cv::ogl::Texture2D::create(Size asize, Format aformat, bool autoRelease)
+{
+    create(asize.height, asize.width, aformat, autoRelease);
+}
+
+inline
+int cv::ogl::Texture2D::rows() const
+{
+    return rows_;
+}
+
+inline
+int cv::ogl::Texture2D::cols() const
+{
+    return cols_;
+}
+
+inline
+cv::Size cv::ogl::Texture2D::size() const
+{
+    return Size(cols_, rows_);
+}
+
+inline
+bool cv::ogl::Texture2D::empty() const
+{
+    return rows_ == 0 || cols_ == 0;
+}
+
+inline
+cv::ogl::Texture2D::Format cv::ogl::Texture2D::format() const
+{
+    return format_;
+}
+
+///////
+
+inline
+cv::ogl::Arrays::Arrays() : size_(0)
+{
+}
+
+inline
+int cv::ogl::Arrays::size() const
+{
+    return size_;
+}
+
+inline
+bool cv::ogl::Arrays::empty() const
+{
+    return size_ == 0;
+}
+
+//! @endcond
+
+#endif /* OPENCV_CORE_OPENGL_HPP */
diff --git a/thirdparty/linux/include/opencv2/core/operations.hpp b/thirdparty/linux/include/opencv2/core/operations.hpp
new file mode 100644
index 0000000..4a4ad9e
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/operations.hpp
@@ -0,0 +1,530 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_OPERATIONS_HPP
+#define OPENCV_CORE_OPERATIONS_HPP
+
+#ifndef __cplusplus
+#  error operations.hpp header must be compiled as C++
+#endif
+
+#include <cstdio>
+
+//! @cond IGNORED
+
+namespace cv
+{
+
+////////////////////////////// Matx methods depending on core API /////////////////////////////
+
+namespace internal
+{
+
+template<typename _Tp, int m> struct Matx_FastInvOp
+{
+    bool operator()(const Matx<_Tp, m, m>& a, Matx<_Tp, m, m>& b, int method) const
+    {
+        Matx<_Tp, m, m> temp = a;
+
+        // assume that b is all 0's on input => make it a unity matrix
+        for( int i = 0; i < m; i++ )
+            b(i, i) = (_Tp)1;
+
+        if( method == DECOMP_CHOLESKY )
+            return Cholesky(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m);
+
+        return LU(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m) != 0;
+    }
+};
+
+template<typename _Tp> struct Matx_FastInvOp<_Tp, 2>
+{
+    bool operator()(const Matx<_Tp, 2, 2>& a, Matx<_Tp, 2, 2>& b, int) const
+    {
+        _Tp d = determinant(a);
+        if( d == 0 )
+            return false;
+        d = 1/d;
+        b(1,1) = a(0,0)*d;
+        b(0,0) = a(1,1)*d;
+        b(0,1) = -a(0,1)*d;
+        b(1,0) = -a(1,0)*d;
+        return true;
+    }
+};
+
+template<typename _Tp> struct Matx_FastInvOp<_Tp, 3>
+{
+    bool operator()(const Matx<_Tp, 3, 3>& a, Matx<_Tp, 3, 3>& b, int) const
+    {
+        _Tp d = (_Tp)determinant(a);
+        if( d == 0 )
+            return false;
+        d = 1/d;
+        b(0,0) = (a(1,1) * a(2,2) - a(1,2) * a(2,1)) * d;
+        b(0,1) = (a(0,2) * a(2,1) - a(0,1) * a(2,2)) * d;
+        b(0,2) = (a(0,1) * a(1,2) - a(0,2) * a(1,1)) * d;
+
+        b(1,0) = (a(1,2) * a(2,0) - a(1,0) * a(2,2)) * d;
+        b(1,1) = (a(0,0) * a(2,2) - a(0,2) * a(2,0)) * d;
+        b(1,2) = (a(0,2) * a(1,0) - a(0,0) * a(1,2)) * d;
+
+        b(2,0) = (a(1,0) * a(2,1) - a(1,1) * a(2,0)) * d;
+        b(2,1) = (a(0,1) * a(2,0) - a(0,0) * a(2,1)) * d;
+        b(2,2) = (a(0,0) * a(1,1) - a(0,1) * a(1,0)) * d;
+        return true;
+    }
+};
+
+
+template<typename _Tp, int m, int n> struct Matx_FastSolveOp
+{
+    bool operator()(const Matx<_Tp, m, m>& a, const Matx<_Tp, m, n>& b,
+                    Matx<_Tp, m, n>& x, int method) const
+    {
+        Matx<_Tp, m, m> temp = a;
+        x = b;
+        if( method == DECOMP_CHOLESKY )
+            return Cholesky(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n);
+
+        return LU(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n) != 0;
+    }
+};
+
+template<typename _Tp> struct Matx_FastSolveOp<_Tp, 2, 1>
+{
+    bool operator()(const Matx<_Tp, 2, 2>& a, const Matx<_Tp, 2, 1>& b,
+                    Matx<_Tp, 2, 1>& x, int) const
+    {
+        _Tp d = determinant(a);
+        if( d == 0 )
+            return false;
+        d = 1/d;
+        x(0) = (b(0)*a(1,1) - b(1)*a(0,1))*d;
+        x(1) = (b(1)*a(0,0) - b(0)*a(1,0))*d;
+        return true;
+    }
+};
+
+template<typename _Tp> struct Matx_FastSolveOp<_Tp, 3, 1>
+{
+    bool operator()(const Matx<_Tp, 3, 3>& a, const Matx<_Tp, 3, 1>& b,
+                    Matx<_Tp, 3, 1>& x, int) const
+    {
+        _Tp d = (_Tp)determinant(a);
+        if( d == 0 )
+            return false;
+        d = 1/d;
+        x(0) = d*(b(0)*(a(1,1)*a(2,2) - a(1,2)*a(2,1)) -
+                a(0,1)*(b(1)*a(2,2) - a(1,2)*b(2)) +
+                a(0,2)*(b(1)*a(2,1) - a(1,1)*b(2)));
+
+        x(1) = d*(a(0,0)*(b(1)*a(2,2) - a(1,2)*b(2)) -
+                b(0)*(a(1,0)*a(2,2) - a(1,2)*a(2,0)) +
+                a(0,2)*(a(1,0)*b(2) - b(1)*a(2,0)));
+
+        x(2) = d*(a(0,0)*(a(1,1)*b(2) - b(1)*a(2,1)) -
+                a(0,1)*(a(1,0)*b(2) - b(1)*a(2,0)) +
+                b(0)*(a(1,0)*a(2,1) - a(1,1)*a(2,0)));
+        return true;
+    }
+};
+
+} // internal
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n> Matx<_Tp,m,n>::randu(_Tp a, _Tp b)
+{
+    Matx<_Tp,m,n> M;
+    cv::randu(M, Scalar(a), Scalar(b));
+    return M;
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n> Matx<_Tp,m,n>::randn(_Tp a, _Tp b)
+{
+    Matx<_Tp,m,n> M;
+    cv::randn(M, Scalar(a), Scalar(b));
+    return M;
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, n, m> Matx<_Tp, m, n>::inv(int method, bool *p_is_ok /*= NULL*/) const
+{
+    Matx<_Tp, n, m> b;
+    bool ok;
+    if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
+        ok = cv::internal::Matx_FastInvOp<_Tp, m>()(*this, b, method);
+    else
+    {
+        Mat A(*this, false), B(b, false);
+        ok = (invert(A, B, method) != 0);
+    }
+    if( NULL != p_is_ok ) { *p_is_ok = ok; }
+    return ok ? b : Matx<_Tp, n, m>::zeros();
+}
+
+template<typename _Tp, int m, int n> template<int l> inline
+Matx<_Tp, n, l> Matx<_Tp, m, n>::solve(const Matx<_Tp, m, l>& rhs, int method) const
+{
+    Matx<_Tp, n, l> x;
+    bool ok;
+    if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
+        ok = cv::internal::Matx_FastSolveOp<_Tp, m, l>()(*this, rhs, x, method);
+    else
+    {
+        Mat A(*this, false), B(rhs, false), X(x, false);
+        ok = cv::solve(A, B, X, method);
+    }
+
+    return ok ? x : Matx<_Tp, n, l>::zeros();
+}
+
+
+
+////////////////////////// Augmenting algebraic & logical operations //////////////////////////
+
+#define CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \
+    static inline A& operator op (A& a, const B& b) { cvop; return a; }
+
+#define CV_MAT_AUG_OPERATOR(op, cvop, A, B)   \
+    CV_MAT_AUG_OPERATOR1(op, cvop, A, B)      \
+    CV_MAT_AUG_OPERATOR1(op, cvop, const A, B)
+
+#define CV_MAT_AUG_OPERATOR_T(op, cvop, A, B)                   \
+    template<typename _Tp> CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \
+    template<typename _Tp> CV_MAT_AUG_OPERATOR1(op, cvop, const A, B)
+
+CV_MAT_AUG_OPERATOR  (+=, cv::add(a,b,a), Mat, Mat)
+CV_MAT_AUG_OPERATOR  (+=, cv::add(a,b,a), Mat, Scalar)
+CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Scalar)
+CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
+
+CV_MAT_AUG_OPERATOR  (-=, cv::subtract(a,b,a), Mat, Mat)
+CV_MAT_AUG_OPERATOR  (-=, cv::subtract(a,b,a), Mat, Scalar)
+CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Scalar)
+CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
+
+CV_MAT_AUG_OPERATOR  (*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat, Mat)
+CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat_<_Tp>)
+CV_MAT_AUG_OPERATOR  (*=, a.convertTo(a, -1, b), Mat, double)
+CV_MAT_AUG_OPERATOR_T(*=, a.convertTo(a, -1, b), Mat_<_Tp>, double)
+
+CV_MAT_AUG_OPERATOR  (/=, cv::divide(a,b,a), Mat, Mat)
+CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a,b,a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
+CV_MAT_AUG_OPERATOR  (/=, a.convertTo((Mat&)a, -1, 1./b), Mat, double)
+CV_MAT_AUG_OPERATOR_T(/=, a.convertTo((Mat&)a, -1, 1./b), Mat_<_Tp>, double)
+
+CV_MAT_AUG_OPERATOR  (&=, cv::bitwise_and(a,b,a), Mat, Mat)
+CV_MAT_AUG_OPERATOR  (&=, cv::bitwise_and(a,b,a), Mat, Scalar)
+CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Scalar)
+CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
+
+CV_MAT_AUG_OPERATOR  (|=, cv::bitwise_or(a,b,a), Mat, Mat)
+CV_MAT_AUG_OPERATOR  (|=, cv::bitwise_or(a,b,a), Mat, Scalar)
+CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Scalar)
+CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
+
+CV_MAT_AUG_OPERATOR  (^=, cv::bitwise_xor(a,b,a), Mat, Mat)
+CV_MAT_AUG_OPERATOR  (^=, cv::bitwise_xor(a,b,a), Mat, Scalar)
+CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Scalar)
+CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
+
+#undef CV_MAT_AUG_OPERATOR_T
+#undef CV_MAT_AUG_OPERATOR
+#undef CV_MAT_AUG_OPERATOR1
+
+
+
+///////////////////////////////////////////// SVD /////////////////////////////////////////////
+
+inline SVD::SVD() {}
+inline SVD::SVD( InputArray m, int flags ) { operator ()(m, flags); }
+inline void SVD::solveZ( InputArray m, OutputArray _dst )
+{
+    Mat mtx = m.getMat();
+    SVD svd(mtx, (mtx.rows >= mtx.cols ? 0 : SVD::FULL_UV));
+    _dst.create(svd.vt.cols, 1, svd.vt.type());
+    Mat dst = _dst.getMat();
+    svd.vt.row(svd.vt.rows-1).reshape(1,svd.vt.cols).copyTo(dst);
+}
+
+template<typename _Tp, int m, int n, int nm> inline void
+    SVD::compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w, Matx<_Tp, m, nm>& u, Matx<_Tp, n, nm>& vt )
+{
+    CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector.");
+    Mat _a(a, false), _u(u, false), _w(w, false), _vt(vt, false);
+    SVD::compute(_a, _w, _u, _vt);
+    CV_Assert(_w.data == (uchar*)&w.val[0] && _u.data == (uchar*)&u.val[0] && _vt.data == (uchar*)&vt.val[0]);
+}
+
+template<typename _Tp, int m, int n, int nm> inline void
+SVD::compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w )
+{
+    CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector.");
+    Mat _a(a, false), _w(w, false);
+    SVD::compute(_a, _w);
+    CV_Assert(_w.data == (uchar*)&w.val[0]);
+}
+
+template<typename _Tp, int m, int n, int nm, int nb> inline void
+SVD::backSubst( const Matx<_Tp, nm, 1>& w, const Matx<_Tp, m, nm>& u,
+                const Matx<_Tp, n, nm>& vt, const Matx<_Tp, m, nb>& rhs,
+                Matx<_Tp, n, nb>& dst )
+{
+    CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector.");
+    Mat _u(u, false), _w(w, false), _vt(vt, false), _rhs(rhs, false), _dst(dst, false);
+    SVD::backSubst(_w, _u, _vt, _rhs, _dst);
+    CV_Assert(_dst.data == (uchar*)&dst.val[0]);
+}
+
+
+
+/////////////////////////////////// Multiply-with-Carry RNG ///////////////////////////////////
+
+inline RNG::RNG()              { state = 0xffffffff; }
+inline RNG::RNG(uint64 _state) { state = _state ? _state : 0xffffffff; }
+
+inline RNG::operator uchar()    { return (uchar)next(); }
+inline RNG::operator schar()    { return (schar)next(); }
+inline RNG::operator ushort()   { return (ushort)next(); }
+inline RNG::operator short()    { return (short)next(); }
+inline RNG::operator int()      { return (int)next(); }
+inline RNG::operator unsigned() { return next(); }
+inline RNG::operator float()    { return next()*2.3283064365386962890625e-10f; }
+inline RNG::operator double()   { unsigned t = next(); return (((uint64)t << 32) | next()) * 5.4210108624275221700372640043497e-20; }
+
+inline unsigned RNG::operator ()(unsigned N) { return (unsigned)uniform(0,N); }
+inline unsigned RNG::operator ()()           { return next(); }
+
+inline int    RNG::uniform(int a, int b)       { return a == b ? a : (int)(next() % (b - a) + a); }
+inline float  RNG::uniform(float a, float b)   { return ((float)*this)*(b - a) + a; }
+inline double RNG::uniform(double a, double b) { return ((double)*this)*(b - a) + a; }
+
+inline unsigned RNG::next()
+{
+    state = (uint64)(unsigned)state* /*CV_RNG_COEFF*/ 4164903690U + (unsigned)(state >> 32);
+    return (unsigned)state;
+}
+
+//! returns the next unifomly-distributed random number of the specified type
+template<typename _Tp> static inline _Tp randu()
+{
+  return (_Tp)theRNG();
+}
+
+///////////////////////////////// Formatted string generation /////////////////////////////////
+
+CV_EXPORTS String format( const char* fmt, ... );
+
+///////////////////////////////// Formatted output of cv::Mat /////////////////////////////////
+
+static inline
+Ptr<Formatted> format(InputArray mtx, int fmt)
+{
+    return Formatter::get(fmt)->format(mtx.getMat());
+}
+
+static inline
+int print(Ptr<Formatted> fmtd, FILE* stream = stdout)
+{
+    int written = 0;
+    fmtd->reset();
+    for(const char* str = fmtd->next(); str; str = fmtd->next())
+        written += fputs(str, stream);
+
+    return written;
+}
+
+static inline
+int print(const Mat& mtx, FILE* stream = stdout)
+{
+    return print(Formatter::get()->format(mtx), stream);
+}
+
+static inline
+int print(const UMat& mtx, FILE* stream = stdout)
+{
+    return print(Formatter::get()->format(mtx.getMat(ACCESS_READ)), stream);
+}
+
+template<typename _Tp> static inline
+int print(const std::vector<Point_<_Tp> >& vec, FILE* stream = stdout)
+{
+    return print(Formatter::get()->format(Mat(vec)), stream);
+}
+
+template<typename _Tp> static inline
+int print(const std::vector<Point3_<_Tp> >& vec, FILE* stream = stdout)
+{
+    return print(Formatter::get()->format(Mat(vec)), stream);
+}
+
+template<typename _Tp, int m, int n> static inline
+int print(const Matx<_Tp, m, n>& matx, FILE* stream = stdout)
+{
+    return print(Formatter::get()->format(cv::Mat(matx)), stream);
+}
+
+//! @endcond
+
+/****************************************************************************************\
+*                                  Auxiliary algorithms                                  *
+\****************************************************************************************/
+
+/** @brief Splits an element set into equivalency classes.
+
+The generic function partition implements an \f$O(N^2)\f$ algorithm for splitting a set of \f$N\f$ elements
+into one or more equivalency classes, as described in
+<http://en.wikipedia.org/wiki/Disjoint-set_data_structure> . The function returns the number of
+equivalency classes.
+@param _vec Set of elements stored as a vector.
+@param labels Output vector of labels. It contains as many elements as vec. Each label labels[i] is
+a 0-based cluster index of `vec[i]`.
+@param predicate Equivalence predicate (pointer to a boolean function of two arguments or an
+instance of the class that has the method bool operator()(const _Tp& a, const _Tp& b) ). The
+predicate returns true when the elements are certainly in the same class, and returns false if they
+may or may not be in the same class.
+@ingroup core_cluster
+*/
+template<typename _Tp, class _EqPredicate> int
+partition( const std::vector<_Tp>& _vec, std::vector<int>& labels,
+          _EqPredicate predicate=_EqPredicate())
+{
+    int i, j, N = (int)_vec.size();
+    const _Tp* vec = &_vec[0];
+
+    const int PARENT=0;
+    const int RANK=1;
+
+    std::vector<int> _nodes(N*2);
+    int (*nodes)[2] = (int(*)[2])&_nodes[0];
+
+    // The first O(N) pass: create N single-vertex trees
+    for(i = 0; i < N; i++)
+    {
+        nodes[i][PARENT]=-1;
+        nodes[i][RANK] = 0;
+    }
+
+    // The main O(N^2) pass: merge connected components
+    for( i = 0; i < N; i++ )
+    {
+        int root = i;
+
+        // find root
+        while( nodes[root][PARENT] >= 0 )
+            root = nodes[root][PARENT];
+
+        for( j = 0; j < N; j++ )
+        {
+            if( i == j || !predicate(vec[i], vec[j]))
+                continue;
+            int root2 = j;
+
+            while( nodes[root2][PARENT] >= 0 )
+                root2 = nodes[root2][PARENT];
+
+            if( root2 != root )
+            {
+                // unite both trees
+                int rank = nodes[root][RANK], rank2 = nodes[root2][RANK];
+                if( rank > rank2 )
+                    nodes[root2][PARENT] = root;
+                else
+                {
+                    nodes[root][PARENT] = root2;
+                    nodes[root2][RANK] += rank == rank2;
+                    root = root2;
+                }
+                CV_Assert( nodes[root][PARENT] < 0 );
+
+                int k = j, parent;
+
+                // compress the path from node2 to root
+                while( (parent = nodes[k][PARENT]) >= 0 )
+                {
+                    nodes[k][PARENT] = root;
+                    k = parent;
+                }
+
+                // compress the path from node to root
+                k = i;
+                while( (parent = nodes[k][PARENT]) >= 0 )
+                {
+                    nodes[k][PARENT] = root;
+                    k = parent;
+                }
+            }
+        }
+    }
+
+    // Final O(N) pass: enumerate classes
+    labels.resize(N);
+    int nclasses = 0;
+
+    for( i = 0; i < N; i++ )
+    {
+        int root = i;
+        while( nodes[root][PARENT] >= 0 )
+            root = nodes[root][PARENT];
+        // re-use the rank as the class label
+        if( nodes[root][RANK] >= 0 )
+            nodes[root][RANK] = ~nclasses++;
+        labels[i] = ~nodes[root][RANK];
+    }
+
+    return nclasses;
+}
+
+} // cv
+
+#endif
diff --git a/thirdparty/linux/include/opencv2/core/optim.hpp b/thirdparty/linux/include/opencv2/core/optim.hpp
new file mode 100644
index 0000000..7249e0f
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/optim.hpp
@@ -0,0 +1,302 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OPTIM_HPP
+#define OPENCV_OPTIM_HPP
+
+#include "opencv2/core.hpp"
+
+namespace cv
+{
+
+/** @addtogroup core_optim
+The algorithms in this section minimize or maximize function value within specified constraints or
+without any constraints.
+@{
+*/
+
+/** @brief Basic interface for all solvers
+ */
+class CV_EXPORTS MinProblemSolver : public Algorithm
+{
+public:
+    /** @brief Represents function being optimized
+     */
+    class CV_EXPORTS Function
+    {
+    public:
+        virtual ~Function() {}
+        virtual int getDims() const = 0;
+        virtual double getGradientEps() const;
+        virtual double calc(const double* x) const = 0;
+        virtual void getGradient(const double* x,double* grad);
+    };
+
+    /** @brief Getter for the optimized function.
+
+    The optimized function is represented by Function interface, which requires derivatives to
+    implement the sole method calc(double*) to evaluate the function.
+
+    @return Smart-pointer to an object that implements Function interface - it represents the
+    function that is being optimized. It can be empty, if no function was given so far.
+     */
+    virtual Ptr<Function> getFunction() const = 0;
+
+    /** @brief Setter for the optimized function.
+
+    *It should be called at least once before the call to* minimize(), as default value is not usable.
+
+    @param f The new function to optimize.
+     */
+    virtual void setFunction(const Ptr<Function>& f) = 0;
+
+    /** @brief Getter for the previously set terminal criteria for this algorithm.
+
+    @return Deep copy of the terminal criteria used at the moment.
+     */
+    virtual TermCriteria getTermCriteria() const = 0;
+
+    /** @brief Set terminal criteria for solver.
+
+    This method *is not necessary* to be called before the first call to minimize(), as the default
+    value is sensible.
+
+    Algorithm stops when the number of function evaluations done exceeds termcrit.maxCount, when
+    the function values at the vertices of simplex are within termcrit.epsilon range or simplex
+    becomes so small that it can enclosed in a box with termcrit.epsilon sides, whatever comes
+    first.
+    @param termcrit Terminal criteria to be used, represented as cv::TermCriteria structure.
+     */
+    virtual void setTermCriteria(const TermCriteria& termcrit) = 0;
+
+    /** @brief actually runs the algorithm and performs the minimization.
+
+    The sole input parameter determines the centroid of the starting simplex (roughly, it tells
+    where to start), all the others (terminal criteria, initial step, function to be minimized) are
+    supposed to be set via the setters before the call to this method or the default values (not
+    always sensible) will be used.
+
+    @param x The initial point, that will become a centroid of an initial simplex. After the algorithm
+    will terminate, it will be setted to the point where the algorithm stops, the point of possible
+    minimum.
+    @return The value of a function at the point found.
+     */
+    virtual double minimize(InputOutputArray x) = 0;
+};
+
+/** @brief This class is used to perform the non-linear non-constrained minimization of a function,
+
+defined on an `n`-dimensional Euclidean space, using the **Nelder-Mead method**, also known as
+**downhill simplex method**. The basic idea about the method can be obtained from
+<http://en.wikipedia.org/wiki/Nelder-Mead_method>.
+
+It should be noted, that this method, although deterministic, is rather a heuristic and therefore
+may converge to a local minima, not necessary a global one. It is iterative optimization technique,
+which at each step uses an information about the values of a function evaluated only at `n+1`
+points, arranged as a *simplex* in `n`-dimensional space (hence the second name of the method). At
+each step new point is chosen to evaluate function at, obtained value is compared with previous
+ones and based on this information simplex changes it's shape , slowly moving to the local minimum.
+Thus this method is using *only* function values to make decision, on contrary to, say, Nonlinear
+Conjugate Gradient method (which is also implemented in optim).
+
+Algorithm stops when the number of function evaluations done exceeds termcrit.maxCount, when the
+function values at the vertices of simplex are within termcrit.epsilon range or simplex becomes so
+small that it can enclosed in a box with termcrit.epsilon sides, whatever comes first, for some
+defined by user positive integer termcrit.maxCount and positive non-integer termcrit.epsilon.
+
+@note DownhillSolver is a derivative of the abstract interface
+cv::MinProblemSolver, which in turn is derived from the Algorithm interface and is used to
+encapsulate the functionality, common to all non-linear optimization algorithms in the optim
+module.
+
+@note term criteria should meet following condition:
+@code
+    termcrit.type == (TermCriteria::MAX_ITER + TermCriteria::EPS) && termcrit.epsilon > 0 && termcrit.maxCount > 0
+@endcode
+ */
+class CV_EXPORTS DownhillSolver : public MinProblemSolver
+{
+public:
+    /** @brief Returns the initial step that will be used in downhill simplex algorithm.
+
+    @param step Initial step that will be used in algorithm. Note, that although corresponding setter
+    accepts column-vectors as well as row-vectors, this method will return a row-vector.
+    @see DownhillSolver::setInitStep
+     */
+    virtual void getInitStep(OutputArray step) const=0;
+
+    /** @brief Sets the initial step that will be used in downhill simplex algorithm.
+
+    Step, together with initial point (givin in DownhillSolver::minimize) are two `n`-dimensional
+    vectors that are used to determine the shape of initial simplex. Roughly said, initial point
+    determines the position of a simplex (it will become simplex's centroid), while step determines the
+    spread (size in each dimension) of a simplex. To be more precise, if \f$s,x_0\in\mathbb{R}^n\f$ are
+    the initial step and initial point respectively, the vertices of a simplex will be:
+    \f$v_0:=x_0-\frac{1}{2} s\f$ and \f$v_i:=x_0+s_i\f$ for \f$i=1,2,\dots,n\f$ where \f$s_i\f$ denotes
+    projections of the initial step of *n*-th coordinate (the result of projection is treated to be
+    vector given by \f$s_i:=e_i\cdot\left<e_i\cdot s\right>\f$, where \f$e_i\f$ form canonical basis)
+
+    @param step Initial step that will be used in algorithm. Roughly said, it determines the spread
+    (size in each dimension) of an initial simplex.
+     */
+    virtual void setInitStep(InputArray step)=0;
+
+    /** @brief This function returns the reference to the ready-to-use DownhillSolver object.
+
+    All the parameters are optional, so this procedure can be called even without parameters at
+    all. In this case, the default values will be used. As default value for terminal criteria are
+    the only sensible ones, MinProblemSolver::setFunction() and DownhillSolver::setInitStep()
+    should be called upon the obtained object, if the respective parameters were not given to
+    create(). Otherwise, the two ways (give parameters to createDownhillSolver() or miss them out
+    and call the MinProblemSolver::setFunction() and DownhillSolver::setInitStep()) are absolutely
+    equivalent (and will drop the same errors in the same way, should invalid input be detected).
+    @param f Pointer to the function that will be minimized, similarly to the one you submit via
+    MinProblemSolver::setFunction.
+    @param initStep Initial step, that will be used to construct the initial simplex, similarly to the one
+    you submit via MinProblemSolver::setInitStep.
+    @param termcrit Terminal criteria to the algorithm, similarly to the one you submit via
+    MinProblemSolver::setTermCriteria.
+     */
+    static Ptr<DownhillSolver> create(const Ptr<MinProblemSolver::Function>& f=Ptr<MinProblemSolver::Function>(),
+                                      InputArray initStep=Mat_<double>(1,1,0.0),
+                                      TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5000,0.000001));
+};
+
+/** @brief This class is used to perform the non-linear non-constrained minimization of a function
+with known gradient,
+
+defined on an *n*-dimensional Euclidean space, using the **Nonlinear Conjugate Gradient method**.
+The implementation was done based on the beautifully clear explanatory article [An Introduction to
+the Conjugate Gradient Method Without the Agonizing
+Pain](http://www.cs.cmu.edu/~quake-papers/painless-conjugate-gradient.pdf) by Jonathan Richard
+Shewchuk. The method can be seen as an adaptation of a standard Conjugate Gradient method (see, for
+example <http://en.wikipedia.org/wiki/Conjugate_gradient_method>) for numerically solving the
+systems of linear equations.
+
+It should be noted, that this method, although deterministic, is rather a heuristic method and
+therefore may converge to a local minima, not necessary a global one. What is even more disastrous,
+most of its behaviour is ruled by gradient, therefore it essentially cannot distinguish between
+local minima and maxima. Therefore, if it starts sufficiently near to the local maximum, it may
+converge to it. Another obvious restriction is that it should be possible to compute the gradient of
+a function at any point, thus it is preferable to have analytic expression for gradient and
+computational burden should be born by the user.
+
+The latter responsibility is accompilished via the getGradient method of a
+MinProblemSolver::Function interface (which represents function being optimized). This method takes
+point a point in *n*-dimensional space (first argument represents the array of coordinates of that
+point) and comput its gradient (it should be stored in the second argument as an array).
+
+@note class ConjGradSolver thus does not add any new methods to the basic MinProblemSolver interface.
+
+@note term criteria should meet following condition:
+@code
+    termcrit.type == (TermCriteria::MAX_ITER + TermCriteria::EPS) && termcrit.epsilon > 0 && termcrit.maxCount > 0
+    // or
+    termcrit.type == TermCriteria::MAX_ITER) && termcrit.maxCount > 0
+@endcode
+ */
+class CV_EXPORTS ConjGradSolver : public MinProblemSolver
+{
+public:
+    /** @brief This function returns the reference to the ready-to-use ConjGradSolver object.
+
+    All the parameters are optional, so this procedure can be called even without parameters at
+    all. In this case, the default values will be used. As default value for terminal criteria are
+    the only sensible ones, MinProblemSolver::setFunction() should be called upon the obtained
+    object, if the function was not given to create(). Otherwise, the two ways (submit it to
+    create() or miss it out and call the MinProblemSolver::setFunction()) are absolutely equivalent
+    (and will drop the same errors in the same way, should invalid input be detected).
+    @param f Pointer to the function that will be minimized, similarly to the one you submit via
+    MinProblemSolver::setFunction.
+    @param termcrit Terminal criteria to the algorithm, similarly to the one you submit via
+    MinProblemSolver::setTermCriteria.
+    */
+    static Ptr<ConjGradSolver> create(const Ptr<MinProblemSolver::Function>& f=Ptr<ConjGradSolver::Function>(),
+                                      TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5000,0.000001));
+};
+
+//! return codes for cv::solveLP() function
+enum SolveLPResult
+{
+    SOLVELP_UNBOUNDED    = -2, //!< problem is unbounded (target function can achieve arbitrary high values)
+    SOLVELP_UNFEASIBLE    = -1, //!< problem is unfeasible (there are no points that satisfy all the constraints imposed)
+    SOLVELP_SINGLE    = 0, //!< there is only one maximum for target function
+    SOLVELP_MULTI    = 1 //!< there are multiple maxima for target function - the arbitrary one is returned
+};
+
+/** @brief Solve given (non-integer) linear programming problem using the Simplex Algorithm (Simplex Method).
+
+What we mean here by "linear programming problem" (or LP problem, for short) can be formulated as:
+
+\f[\mbox{Maximize } c\cdot x\\
+ \mbox{Subject to:}\\
+ Ax\leq b\\
+ x\geq 0\f]
+
+Where \f$c\f$ is fixed `1`-by-`n` row-vector, \f$A\f$ is fixed `m`-by-`n` matrix, \f$b\f$ is fixed `m`-by-`1`
+column vector and \f$x\f$ is an arbitrary `n`-by-`1` column vector, which satisfies the constraints.
+
+Simplex algorithm is one of many algorithms that are designed to handle this sort of problems
+efficiently. Although it is not optimal in theoretical sense (there exist algorithms that can solve
+any problem written as above in polynomial time, while simplex method degenerates to exponential
+time for some special cases), it is well-studied, easy to implement and is shown to work well for
+real-life purposes.
+
+The particular implementation is taken almost verbatim from **Introduction to Algorithms, third
+edition** by T. H. Cormen, C. E. Leiserson, R. L. Rivest and Clifford Stein. In particular, the
+Bland's rule <http://en.wikipedia.org/wiki/Bland%27s_rule> is used to prevent cycling.
+
+@param Func This row-vector corresponds to \f$c\f$ in the LP problem formulation (see above). It should
+contain 32- or 64-bit floating point numbers. As a convenience, column-vector may be also submitted,
+in the latter case it is understood to correspond to \f$c^T\f$.
+@param Constr `m`-by-`n+1` matrix, whose rightmost column corresponds to \f$b\f$ in formulation above
+and the remaining to \f$A\f$. It should containt 32- or 64-bit floating point numbers.
+@param z The solution will be returned here as a column-vector - it corresponds to \f$c\f$ in the
+formulation above. It will contain 64-bit floating point numbers.
+@return One of cv::SolveLPResult
+ */
+CV_EXPORTS_W int solveLP(const Mat& Func, const Mat& Constr, Mat& z);
+
+//! @}
+
+}// cv
+
+#endif
diff --git a/thirdparty/linux/include/opencv2/core/ovx.hpp b/thirdparty/linux/include/opencv2/core/ovx.hpp
new file mode 100644
index 0000000..8bb7d54
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/ovx.hpp
@@ -0,0 +1,28 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2016, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+// OpenVX related definitions and declarations
+
+#pragma once
+#ifndef OPENCV_OVX_HPP
+#define OPENCV_OVX_HPP
+
+#include "cvdef.h"
+
+namespace cv
+{
+/// Check if use of OpenVX is possible
+CV_EXPORTS_W bool haveOpenVX();
+
+/// Check if use of OpenVX is enabled
+CV_EXPORTS_W bool useOpenVX();
+
+/// Enable/disable use of OpenVX
+CV_EXPORTS_W void setUseOpenVX(bool flag);
+} // namespace cv
+
+#endif // OPENCV_OVX_HPP
diff --git a/thirdparty/linux/include/opencv2/core/persistence.hpp b/thirdparty/linux/include/opencv2/core/persistence.hpp
new file mode 100644
index 0000000..3f18d54
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/persistence.hpp
@@ -0,0 +1,1274 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_PERSISTENCE_HPP
+#define OPENCV_CORE_PERSISTENCE_HPP
+
+#ifndef __cplusplus
+#  error persistence.hpp header must be compiled as C++
+#endif
+
+//! @addtogroup core_c
+//! @{
+
+/** @brief "black box" representation of the file storage associated with a file on disk.
+
+Several functions that are described below take CvFileStorage\* as inputs and allow the user to
+save or to load hierarchical collections that consist of scalar values, standard CXCore objects
+(such as matrices, sequences, graphs), and user-defined objects.
+
+OpenCV can read and write data in XML (<http://www.w3c.org/XML>), YAML (<http://www.yaml.org>) or
+JSON (<http://www.json.org/>) formats. Below is an example of 3x3 floating-point identity matrix A,
+stored in XML and YAML files
+using CXCore functions:
+XML:
+@code{.xml}
+    <?xml version="1.0">
+    <opencv_storage>
+    <A type_id="opencv-matrix">
+      <rows>3</rows>
+      <cols>3</cols>
+      <dt>f</dt>
+      <data>1. 0. 0. 0. 1. 0. 0. 0. 1.</data>
+    </A>
+    </opencv_storage>
+@endcode
+YAML:
+@code{.yaml}
+    %YAML:1.0
+    A: !!opencv-matrix
+      rows: 3
+      cols: 3
+      dt: f
+      data: [ 1., 0., 0., 0., 1., 0., 0., 0., 1.]
+@endcode
+As it can be seen from the examples, XML uses nested tags to represent hierarchy, while YAML uses
+indentation for that purpose (similar to the Python programming language).
+
+The same functions can read and write data in both formats; the particular format is determined by
+the extension of the opened file, ".xml" for XML files, ".yml" or ".yaml" for YAML and ".json" for
+JSON.
+ */
+typedef struct CvFileStorage CvFileStorage;
+typedef struct CvFileNode CvFileNode;
+typedef struct CvMat CvMat;
+typedef struct CvMatND CvMatND;
+
+//! @} core_c
+
+#include "opencv2/core/types.hpp"
+#include "opencv2/core/mat.hpp"
+
+namespace cv {
+
+/** @addtogroup core_xml
+
+XML/YAML/JSON file storages.     {#xml_storage}
+=======================
+Writing to a file storage.
+--------------------------
+You can store and then restore various OpenCV data structures to/from XML (<http://www.w3c.org/XML>),
+YAML (<http://www.yaml.org>) or JSON (<http://www.json.org/>) formats. Also, it is possible store
+and load arbitrarily complex data structures, which include OpenCV data structures, as well as
+primitive data types (integer and floating-point numbers and text strings) as their elements.
+
+Use the following procedure to write something to XML, YAML or JSON:
+-# Create new FileStorage and open it for writing. It can be done with a single call to
+FileStorage::FileStorage constructor that takes a filename, or you can use the default constructor
+and then call FileStorage::open. Format of the file (XML, YAML or JSON) is determined from the filename
+extension (".xml", ".yml"/".yaml" and ".json", respectively)
+-# Write all the data you want using the streaming operator `<<`, just like in the case of STL
+streams.
+-# Close the file using FileStorage::release. FileStorage destructor also closes the file.
+
+Here is an example:
+@code
+    #include "opencv2/opencv.hpp"
+    #include <time.h>
+
+    using namespace cv;
+
+    int main(int, char** argv)
+    {
+        FileStorage fs("test.yml", FileStorage::WRITE);
+
+        fs << "frameCount" << 5;
+        time_t rawtime; time(&rawtime);
+        fs << "calibrationDate" << asctime(localtime(&rawtime));
+        Mat cameraMatrix = (Mat_<double>(3,3) << 1000, 0, 320, 0, 1000, 240, 0, 0, 1);
+        Mat distCoeffs = (Mat_<double>(5,1) << 0.1, 0.01, -0.001, 0, 0);
+        fs << "cameraMatrix" << cameraMatrix << "distCoeffs" << distCoeffs;
+        fs << "features" << "[";
+        for( int i = 0; i < 3; i++ )
+        {
+            int x = rand() % 640;
+            int y = rand() % 480;
+            uchar lbp = rand() % 256;
+
+            fs << "{:" << "x" << x << "y" << y << "lbp" << "[:";
+            for( int j = 0; j < 8; j++ )
+                fs << ((lbp >> j) & 1);
+            fs << "]" << "}";
+        }
+        fs << "]";
+        fs.release();
+        return 0;
+    }
+@endcode
+The sample above stores to XML and integer, text string (calibration date), 2 matrices, and a custom
+structure "feature", which includes feature coordinates and LBP (local binary pattern) value. Here
+is output of the sample:
+@code{.yaml}
+%YAML:1.0
+frameCount: 5
+calibrationDate: "Fri Jun 17 14:09:29 2011\n"
+cameraMatrix: !!opencv-matrix
+   rows: 3
+   cols: 3
+   dt: d
+   data: [ 1000., 0., 320., 0., 1000., 240., 0., 0., 1. ]
+distCoeffs: !!opencv-matrix
+   rows: 5
+   cols: 1
+   dt: d
+   data: [ 1.0000000000000001e-01, 1.0000000000000000e-02,
+       -1.0000000000000000e-03, 0., 0. ]
+features:
+   - { x:167, y:49, lbp:[ 1, 0, 0, 1, 1, 0, 1, 1 ] }
+   - { x:298, y:130, lbp:[ 0, 0, 0, 1, 0, 0, 1, 1 ] }
+   - { x:344, y:158, lbp:[ 1, 1, 0, 0, 0, 0, 1, 0 ] }
+@endcode
+
+As an exercise, you can replace ".yml" with ".xml" or ".json" in the sample above and see, how the
+corresponding XML file will look like.
+
+Several things can be noted by looking at the sample code and the output:
+
+-   The produced YAML (and XML/JSON) consists of heterogeneous collections that can be nested. There are
+    2 types of collections: named collections (mappings) and unnamed collections (sequences). In mappings
+    each element has a name and is accessed by name. This is similar to structures and std::map in
+    C/C++ and dictionaries in Python. In sequences elements do not have names, they are accessed by
+    indices. This is similar to arrays and std::vector in C/C++ and lists, tuples in Python.
+    "Heterogeneous" means that elements of each single collection can have different types.
+
+    Top-level collection in YAML/XML/JSON is a mapping. Each matrix is stored as a mapping, and the matrix
+    elements are stored as a sequence. Then, there is a sequence of features, where each feature is
+    represented a mapping, and lbp value in a nested sequence.
+
+-   When you write to a mapping (a structure), you write element name followed by its value. When you
+    write to a sequence, you simply write the elements one by one. OpenCV data structures (such as
+    cv::Mat) are written in absolutely the same way as simple C data structures - using `<<`
+    operator.
+
+-   To write a mapping, you first write the special string `{` to the storage, then write the
+    elements as pairs (`fs << <element_name> << <element_value>`) and then write the closing
+    `}`.
+
+-   To write a sequence, you first write the special string `[`, then write the elements, then
+    write the closing `]`.
+
+-   In YAML/JSON (but not XML), mappings and sequences can be written in a compact Python-like inline
+    form. In the sample above matrix elements, as well as each feature, including its lbp value, is
+    stored in such inline form. To store a mapping/sequence in a compact form, put `:` after the
+    opening character, e.g. use `{:` instead of `{` and `[:` instead of `[`. When the
+    data is written to XML, those extra `:` are ignored.
+
+Reading data from a file storage.
+---------------------------------
+To read the previously written XML, YAML or JSON file, do the following:
+-#  Open the file storage using FileStorage::FileStorage constructor or FileStorage::open method.
+    In the current implementation the whole file is parsed and the whole representation of file
+    storage is built in memory as a hierarchy of file nodes (see FileNode)
+
+-#  Read the data you are interested in. Use FileStorage::operator [], FileNode::operator []
+    and/or FileNodeIterator.
+
+-#  Close the storage using FileStorage::release.
+
+Here is how to read the file created by the code sample above:
+@code
+    FileStorage fs2("test.yml", FileStorage::READ);
+
+    // first method: use (type) operator on FileNode.
+    int frameCount = (int)fs2["frameCount"];
+
+    String date;
+    // second method: use FileNode::operator >>
+    fs2["calibrationDate"] >> date;
+
+    Mat cameraMatrix2, distCoeffs2;
+    fs2["cameraMatrix"] >> cameraMatrix2;
+    fs2["distCoeffs"] >> distCoeffs2;
+
+    cout << "frameCount: " << frameCount << endl
+         << "calibration date: " << date << endl
+         << "camera matrix: " << cameraMatrix2 << endl
+         << "distortion coeffs: " << distCoeffs2 << endl;
+
+    FileNode features = fs2["features"];
+    FileNodeIterator it = features.begin(), it_end = features.end();
+    int idx = 0;
+    std::vector<uchar> lbpval;
+
+    // iterate through a sequence using FileNodeIterator
+    for( ; it != it_end; ++it, idx++ )
+    {
+        cout << "feature #" << idx << ": ";
+        cout << "x=" << (int)(*it)["x"] << ", y=" << (int)(*it)["y"] << ", lbp: (";
+        // you can also easily read numerical arrays using FileNode >> std::vector operator.
+        (*it)["lbp"] >> lbpval;
+        for( int i = 0; i < (int)lbpval.size(); i++ )
+            cout << " " << (int)lbpval[i];
+        cout << ")" << endl;
+    }
+    fs2.release();
+@endcode
+
+Format specification    {#format_spec}
+--------------------
+`([count]{u|c|w|s|i|f|d})`... where the characters correspond to fundamental C++ types:
+-   `u` 8-bit unsigned number
+-   `c` 8-bit signed number
+-   `w` 16-bit unsigned number
+-   `s` 16-bit signed number
+-   `i` 32-bit signed number
+-   `f` single precision floating-point number
+-   `d` double precision floating-point number
+-   `r` pointer, 32 lower bits of which are written as a signed integer. The type can be used to
+    store structures with links between the elements.
+
+`count` is the optional counter of values of a given type. For example, `2if` means that each array
+element is a structure of 2 integers, followed by a single-precision floating-point number. The
+equivalent notations of the above specification are `iif`, `2i1f` and so forth. Other examples: `u`
+means that the array consists of bytes, and `2d` means the array consists of pairs of doubles.
+
+@see @ref filestorage.cpp
+*/
+
+//! @{
+
+/** @example filestorage.cpp
+A complete example using the FileStorage interface
+*/
+
+////////////////////////// XML & YAML I/O //////////////////////////
+
+class CV_EXPORTS FileNode;
+class CV_EXPORTS FileNodeIterator;
+
+/** @brief XML/YAML/JSON file storage class that encapsulates all the information necessary for writing or
+reading data to/from a file.
+ */
+class CV_EXPORTS_W FileStorage
+{
+public:
+    //! file storage mode
+    enum Mode
+    {
+        READ        = 0, //!< value, open the file for reading
+        WRITE       = 1, //!< value, open the file for writing
+        APPEND      = 2, //!< value, open the file for appending
+        MEMORY      = 4, //!< flag, read data from source or write data to the internal buffer (which is
+                         //!< returned by FileStorage::release)
+        FORMAT_MASK = (7<<3), //!< mask for format flags
+        FORMAT_AUTO = 0,      //!< flag, auto format
+        FORMAT_XML  = (1<<3), //!< flag, XML format
+        FORMAT_YAML = (2<<3), //!< flag, YAML format
+        FORMAT_JSON = (3<<3), //!< flag, JSON format
+
+        BASE64      = 64,     //!< flag, write rawdata in Base64 by default. (consider using WRITE_BASE64)
+        WRITE_BASE64 = BASE64 | WRITE, //!< flag, enable both WRITE and BASE64
+    };
+    enum
+    {
+        UNDEFINED      = 0,
+        VALUE_EXPECTED = 1,
+        NAME_EXPECTED  = 2,
+        INSIDE_MAP     = 4
+    };
+
+    /** @brief The constructors.
+
+    The full constructor opens the file. Alternatively you can use the default constructor and then
+    call FileStorage::open.
+     */
+    CV_WRAP FileStorage();
+
+    /** @overload
+    @param source Name of the file to open or the text string to read the data from. Extension of the
+    file (.xml, .yml/.yaml, or .json) determines its format (XML, YAML or JSON respectively). Also you can
+    append .gz to work with compressed files, for example myHugeMatrix.xml.gz. If both FileStorage::WRITE
+    and FileStorage::MEMORY flags are specified, source is used just to specify the output file format (e.g.
+    mydata.xml, .yml etc.).
+    @param flags Mode of operation. See  FileStorage::Mode
+    @param encoding Encoding of the file. Note that UTF-16 XML encoding is not supported currently and
+    you should use 8-bit encoding instead of it.
+    */
+    CV_WRAP FileStorage(const String& source, int flags, const String& encoding=String());
+
+    /** @overload */
+    FileStorage(CvFileStorage* fs, bool owning=true);
+
+    //! the destructor. calls release()
+    virtual ~FileStorage();
+
+    /** @brief Opens a file.
+
+    See description of parameters in FileStorage::FileStorage. The method calls FileStorage::release
+    before opening the file.
+    @param filename Name of the file to open or the text string to read the data from.
+       Extension of the file (.xml, .yml/.yaml or .json) determines its format (XML, YAML or JSON
+        respectively). Also you can append .gz to work with compressed files, for example myHugeMatrix.xml.gz. If both
+        FileStorage::WRITE and FileStorage::MEMORY flags are specified, source is used just to specify
+        the output file format (e.g. mydata.xml, .yml etc.). A file name can also contain parameters.
+        You can use this format, "*?base64" (e.g. "file.json?base64" (case sensitive)), as an alternative to
+        FileStorage::BASE64 flag.
+    @param flags Mode of operation. One of FileStorage::Mode
+    @param encoding Encoding of the file. Note that UTF-16 XML encoding is not supported currently and
+    you should use 8-bit encoding instead of it.
+     */
+    CV_WRAP virtual bool open(const String& filename, int flags, const String& encoding=String());
+
+    /** @brief Checks whether the file is opened.
+
+    @returns true if the object is associated with the current file and false otherwise. It is a
+    good practice to call this method after you tried to open a file.
+     */
+    CV_WRAP virtual bool isOpened() const;
+
+    /** @brief Closes the file and releases all the memory buffers.
+
+    Call this method after all I/O operations with the storage are finished.
+     */
+    CV_WRAP virtual void release();
+
+    /** @brief Closes the file and releases all the memory buffers.
+
+    Call this method after all I/O operations with the storage are finished. If the storage was
+    opened for writing data and FileStorage::WRITE was specified
+     */
+    CV_WRAP virtual String releaseAndGetString();
+
+    /** @brief Returns the first element of the top-level mapping.
+    @returns The first element of the top-level mapping.
+     */
+    CV_WRAP FileNode getFirstTopLevelNode() const;
+
+    /** @brief Returns the top-level mapping
+    @param streamidx Zero-based index of the stream. In most cases there is only one stream in the file.
+    However, YAML supports multiple streams and so there can be several.
+    @returns The top-level mapping.
+     */
+    CV_WRAP FileNode root(int streamidx=0) const;
+
+    /** @brief Returns the specified element of the top-level mapping.
+    @param nodename Name of the file node.
+    @returns Node with the given name.
+     */
+    FileNode operator[](const String& nodename) const;
+
+    /** @overload */
+    CV_WRAP_AS(getNode) FileNode operator[](const char* nodename) const;
+
+    /** @brief Returns the obsolete C FileStorage structure.
+    @returns Pointer to the underlying C FileStorage structure
+     */
+    CvFileStorage* operator *() { return fs.get(); }
+
+    /** @overload */
+    const CvFileStorage* operator *() const { return fs.get(); }
+
+    /** @brief Writes multiple numbers.
+
+    Writes one or more numbers of the specified format to the currently written structure. Usually it is
+    more convenient to use operator `<<` instead of this method.
+    @param fmt Specification of each array element, see @ref format_spec "format specification"
+    @param vec Pointer to the written array.
+    @param len Number of the uchar elements to write.
+     */
+    void writeRaw( const String& fmt, const uchar* vec, size_t len );
+
+    /** @brief Writes the registered C structure (CvMat, CvMatND, CvSeq).
+    @param name Name of the written object.
+    @param obj Pointer to the object.
+    @see ocvWrite for details.
+     */
+    void writeObj( const String& name, const void* obj );
+
+    /**
+     * @brief Simplified writing API to use with bindings.
+     * @param name Name of the written object
+     * @param val Value of the written object
+     */
+    CV_WRAP void write(const String& name, double val);
+    /// @overload
+    CV_WRAP void write(const String& name, const String& val);
+    /// @overload
+    CV_WRAP void write(const String& name, InputArray val);
+
+    /** @brief Writes a comment.
+
+    The function writes a comment into file storage. The comments are skipped when the storage is read.
+    @param comment The written comment, single-line or multi-line
+    @param append If true, the function tries to put the comment at the end of current line.
+    Else if the comment is multi-line, or if it does not fit at the end of the current
+    line, the comment starts a new line.
+     */
+    CV_WRAP void writeComment(const String& comment, bool append = false);
+
+    /** @brief Returns the normalized object name for the specified name of a file.
+    @param filename Name of a file
+    @returns The normalized object name.
+     */
+    static String getDefaultObjectName(const String& filename);
+
+    Ptr<CvFileStorage> fs; //!< the underlying C FileStorage structure
+    String elname; //!< the currently written element
+    std::vector<char> structs; //!< the stack of written structures
+    int state; //!< the writer state
+};
+
+template<> CV_EXPORTS void DefaultDeleter<CvFileStorage>::operator ()(CvFileStorage* obj) const;
+
+/** @brief File Storage Node class.
+
+The node is used to store each and every element of the file storage opened for reading. When
+XML/YAML file is read, it is first parsed and stored in the memory as a hierarchical collection of
+nodes. Each node can be a “leaf” that is contain a single number or a string, or be a collection of
+other nodes. There can be named collections (mappings) where each element has a name and it is
+accessed by a name, and ordered collections (sequences) where elements do not have names but rather
+accessed by index. Type of the file node can be determined using FileNode::type method.
+
+Note that file nodes are only used for navigating file storages opened for reading. When a file
+storage is opened for writing, no data is stored in memory after it is written.
+ */
+class CV_EXPORTS_W_SIMPLE FileNode
+{
+public:
+    //! type of the file storage node
+    enum Type
+    {
+        NONE      = 0, //!< empty node
+        INT       = 1, //!< an integer
+        REAL      = 2, //!< floating-point number
+        FLOAT     = REAL, //!< synonym or REAL
+        STR       = 3, //!< text string in UTF-8 encoding
+        STRING    = STR, //!< synonym for STR
+        REF       = 4, //!< integer of size size_t. Typically used for storing complex dynamic structures where some elements reference the others
+        SEQ       = 5, //!< sequence
+        MAP       = 6, //!< mapping
+        TYPE_MASK = 7,
+        FLOW      = 8,  //!< compact representation of a sequence or mapping. Used only by YAML writer
+        USER      = 16, //!< a registered object (e.g. a matrix)
+        EMPTY     = 32, //!< empty structure (sequence or mapping)
+        NAMED     = 64  //!< the node has a name (i.e. it is element of a mapping)
+    };
+    /** @brief The constructors.
+
+    These constructors are used to create a default file node, construct it from obsolete structures or
+    from the another file node.
+     */
+    CV_WRAP FileNode();
+
+    /** @overload
+    @param fs Pointer to the obsolete file storage structure.
+    @param node File node to be used as initialization for the created file node.
+    */
+    FileNode(const CvFileStorage* fs, const CvFileNode* node);
+
+    /** @overload
+    @param node File node to be used as initialization for the created file node.
+    */
+    FileNode(const FileNode& node);
+
+    /** @brief Returns element of a mapping node or a sequence node.
+    @param nodename Name of an element in the mapping node.
+    @returns Returns the element with the given identifier.
+     */
+    FileNode operator[](const String& nodename) const;
+
+    /** @overload
+    @param nodename Name of an element in the mapping node.
+    */
+    CV_WRAP_AS(getNode) FileNode operator[](const char* nodename) const;
+
+    /** @overload
+    @param i Index of an element in the sequence node.
+    */
+    CV_WRAP_AS(at) FileNode operator[](int i) const;
+
+    /** @brief Returns type of the node.
+    @returns Type of the node. See FileNode::Type
+     */
+    CV_WRAP int type() const;
+
+    //! returns true if the node is empty
+    CV_WRAP bool empty() const;
+    //! returns true if the node is a "none" object
+    CV_WRAP bool isNone() const;
+    //! returns true if the node is a sequence
+    CV_WRAP bool isSeq() const;
+    //! returns true if the node is a mapping
+    CV_WRAP bool isMap() const;
+    //! returns true if the node is an integer
+    CV_WRAP bool isInt() const;
+    //! returns true if the node is a floating-point number
+    CV_WRAP bool isReal() const;
+    //! returns true if the node is a text string
+    CV_WRAP bool isString() const;
+    //! returns true if the node has a name
+    CV_WRAP bool isNamed() const;
+    //! returns the node name or an empty string if the node is nameless
+    CV_WRAP String name() const;
+    //! returns the number of elements in the node, if it is a sequence or mapping, or 1 otherwise.
+    CV_WRAP size_t size() const;
+    //! returns the node content as an integer. If the node stores floating-point number, it is rounded.
+    operator int() const;
+    //! returns the node content as float
+    operator float() const;
+    //! returns the node content as double
+    operator double() const;
+    //! returns the node content as text string
+    operator String() const;
+#ifndef OPENCV_NOSTL
+    operator std::string() const;
+#endif
+
+    //! returns pointer to the underlying file node
+    CvFileNode* operator *();
+    //! returns pointer to the underlying file node
+    const CvFileNode* operator* () const;
+
+    //! returns iterator pointing to the first node element
+    FileNodeIterator begin() const;
+    //! returns iterator pointing to the element following the last node element
+    FileNodeIterator end() const;
+
+    /** @brief Reads node elements to the buffer with the specified format.
+
+    Usually it is more convenient to use operator `>>` instead of this method.
+    @param fmt Specification of each array element. See @ref format_spec "format specification"
+    @param vec Pointer to the destination array.
+    @param len Number of elements to read. If it is greater than number of remaining elements then all
+    of them will be read.
+     */
+    void readRaw( const String& fmt, uchar* vec, size_t len ) const;
+
+    //! reads the registered object and returns pointer to it
+    void* readObj() const;
+
+    //! Simplified reading API to use with bindings.
+    CV_WRAP double real() const;
+    //! Simplified reading API to use with bindings.
+    CV_WRAP String string() const;
+    //! Simplified reading API to use with bindings.
+    CV_WRAP Mat mat() const;
+
+    // do not use wrapper pointer classes for better efficiency
+    const CvFileStorage* fs;
+    const CvFileNode* node;
+};
+
+
+/** @brief used to iterate through sequences and mappings.
+
+A standard STL notation, with node.begin(), node.end() denoting the beginning and the end of a
+sequence, stored in node. See the data reading sample in the beginning of the section.
+ */
+class CV_EXPORTS FileNodeIterator
+{
+public:
+    /** @brief The constructors.
+
+    These constructors are used to create a default iterator, set it to specific element in a file node
+    or construct it from another iterator.
+     */
+    FileNodeIterator();
+
+    /** @overload
+    @param fs File storage for the iterator.
+    @param node File node for the iterator.
+    @param ofs Index of the element in the node. The created iterator will point to this element.
+    */
+    FileNodeIterator(const CvFileStorage* fs, const CvFileNode* node, size_t ofs=0);
+
+    /** @overload
+    @param it Iterator to be used as initialization for the created iterator.
+    */
+    FileNodeIterator(const FileNodeIterator& it);
+
+    //! returns the currently observed element
+    FileNode operator *() const;
+    //! accesses the currently observed element methods
+    FileNode operator ->() const;
+
+    //! moves iterator to the next node
+    FileNodeIterator& operator ++ ();
+    //! moves iterator to the next node
+    FileNodeIterator operator ++ (int);
+    //! moves iterator to the previous node
+    FileNodeIterator& operator -- ();
+    //! moves iterator to the previous node
+    FileNodeIterator operator -- (int);
+    //! moves iterator forward by the specified offset (possibly negative)
+    FileNodeIterator& operator += (int ofs);
+    //! moves iterator backward by the specified offset (possibly negative)
+    FileNodeIterator& operator -= (int ofs);
+
+    /** @brief Reads node elements to the buffer with the specified format.
+
+    Usually it is more convenient to use operator `>>` instead of this method.
+    @param fmt Specification of each array element. See @ref format_spec "format specification"
+    @param vec Pointer to the destination array.
+    @param maxCount Number of elements to read. If it is greater than number of remaining elements then
+    all of them will be read.
+     */
+    FileNodeIterator& readRaw( const String& fmt, uchar* vec,
+                               size_t maxCount=(size_t)INT_MAX );
+
+    struct SeqReader
+    {
+      int          header_size;
+      void*        seq;        /* sequence, beign read; CvSeq      */
+      void*        block;      /* current block;        CvSeqBlock */
+      schar*       ptr;        /* pointer to element be read next */
+      schar*       block_min;  /* pointer to the beginning of block */
+      schar*       block_max;  /* pointer to the end of block */
+      int          delta_index;/* = seq->first->start_index   */
+      schar*       prev_elem;  /* pointer to previous element */
+    };
+
+    const CvFileStorage* fs;
+    const CvFileNode* container;
+    SeqReader reader;
+    size_t remaining;
+};
+
+//! @} core_xml
+
+/////////////////// XML & YAML I/O implementation //////////////////
+
+//! @relates cv::FileStorage
+//! @{
+
+CV_EXPORTS void write( FileStorage& fs, const String& name, int value );
+CV_EXPORTS void write( FileStorage& fs, const String& name, float value );
+CV_EXPORTS void write( FileStorage& fs, const String& name, double value );
+CV_EXPORTS void write( FileStorage& fs, const String& name, const String& value );
+CV_EXPORTS void write( FileStorage& fs, const String& name, const Mat& value );
+CV_EXPORTS void write( FileStorage& fs, const String& name, const SparseMat& value );
+CV_EXPORTS void write( FileStorage& fs, const String& name, const std::vector<KeyPoint>& value);
+CV_EXPORTS void write( FileStorage& fs, const String& name, const std::vector<DMatch>& value);
+
+CV_EXPORTS void writeScalar( FileStorage& fs, int value );
+CV_EXPORTS void writeScalar( FileStorage& fs, float value );
+CV_EXPORTS void writeScalar( FileStorage& fs, double value );
+CV_EXPORTS void writeScalar( FileStorage& fs, const String& value );
+
+//! @}
+
+//! @relates cv::FileNode
+//! @{
+
+CV_EXPORTS void read(const FileNode& node, int& value, int default_value);
+CV_EXPORTS void read(const FileNode& node, float& value, float default_value);
+CV_EXPORTS void read(const FileNode& node, double& value, double default_value);
+CV_EXPORTS void read(const FileNode& node, String& value, const String& default_value);
+CV_EXPORTS void read(const FileNode& node, Mat& mat, const Mat& default_mat = Mat() );
+CV_EXPORTS void read(const FileNode& node, SparseMat& mat, const SparseMat& default_mat = SparseMat() );
+CV_EXPORTS void read(const FileNode& node, std::vector<KeyPoint>& keypoints);
+CV_EXPORTS void read(const FileNode& node, std::vector<DMatch>& matches);
+
+template<typename _Tp> static inline void read(const FileNode& node, Point_<_Tp>& value, const Point_<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 2 ? default_value : Point_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]));
+}
+
+template<typename _Tp> static inline void read(const FileNode& node, Point3_<_Tp>& value, const Point3_<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 3 ? default_value : Point3_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]),
+                                                            saturate_cast<_Tp>(temp[2]));
+}
+
+template<typename _Tp> static inline void read(const FileNode& node, Size_<_Tp>& value, const Size_<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 2 ? default_value : Size_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]));
+}
+
+template<typename _Tp> static inline void read(const FileNode& node, Complex<_Tp>& value, const Complex<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 2 ? default_value : Complex<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]));
+}
+
+template<typename _Tp> static inline void read(const FileNode& node, Rect_<_Tp>& value, const Rect_<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 4 ? default_value : Rect_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]),
+                                                          saturate_cast<_Tp>(temp[2]), saturate_cast<_Tp>(temp[3]));
+}
+
+template<typename _Tp, int cn> static inline void read(const FileNode& node, Vec<_Tp, cn>& value, const Vec<_Tp, cn>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != cn ? default_value : Vec<_Tp, cn>(&temp[0]);
+}
+
+template<typename _Tp> static inline void read(const FileNode& node, Scalar_<_Tp>& value, const Scalar_<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 4 ? default_value : Scalar_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]),
+                                                            saturate_cast<_Tp>(temp[2]), saturate_cast<_Tp>(temp[3]));
+}
+
+static inline void read(const FileNode& node, Range& value, const Range& default_value)
+{
+    Point2i temp(value.start, value.end); const Point2i default_temp = Point2i(default_value.start, default_value.end);
+    read(node, temp, default_temp);
+    value.start = temp.x; value.end = temp.y;
+}
+
+//! @}
+
+/** @brief Writes string to a file storage.
+@relates cv::FileStorage
+ */
+CV_EXPORTS FileStorage& operator << (FileStorage& fs, const String& str);
+
+//! @cond IGNORED
+
+namespace internal
+{
+    class CV_EXPORTS WriteStructContext
+    {
+    public:
+        WriteStructContext(FileStorage& _fs, const String& name, int flags, const String& typeName = String());
+        ~WriteStructContext();
+    private:
+        FileStorage* fs;
+    };
+
+    template<typename _Tp, int numflag> class VecWriterProxy
+    {
+    public:
+        VecWriterProxy( FileStorage* _fs ) : fs(_fs) {}
+        void operator()(const std::vector<_Tp>& vec) const
+        {
+            size_t count = vec.size();
+            for (size_t i = 0; i < count; i++)
+                write(*fs, vec[i]);
+        }
+    private:
+        FileStorage* fs;
+    };
+
+    template<typename _Tp> class VecWriterProxy<_Tp, 1>
+    {
+    public:
+        VecWriterProxy( FileStorage* _fs ) : fs(_fs) {}
+        void operator()(const std::vector<_Tp>& vec) const
+        {
+            int _fmt = DataType<_Tp>::fmt;
+            char fmt[] = { (char)((_fmt >> 8) + '1'), (char)_fmt, '\0' };
+            fs->writeRaw(fmt, !vec.empty() ? (uchar*)&vec[0] : 0, vec.size() * sizeof(_Tp));
+        }
+    private:
+        FileStorage* fs;
+    };
+
+    template<typename _Tp, int numflag> class VecReaderProxy
+    {
+    public:
+        VecReaderProxy( FileNodeIterator* _it ) : it(_it) {}
+        void operator()(std::vector<_Tp>& vec, size_t count) const
+        {
+            count = std::min(count, it->remaining);
+            vec.resize(count);
+            for (size_t i = 0; i < count; i++, ++(*it))
+                read(**it, vec[i], _Tp());
+        }
+    private:
+        FileNodeIterator* it;
+    };
+
+    template<typename _Tp> class VecReaderProxy<_Tp, 1>
+    {
+    public:
+        VecReaderProxy( FileNodeIterator* _it ) : it(_it) {}
+        void operator()(std::vector<_Tp>& vec, size_t count) const
+        {
+            size_t remaining = it->remaining;
+            size_t cn = DataType<_Tp>::channels;
+            int _fmt = DataType<_Tp>::fmt;
+            char fmt[] = { (char)((_fmt >> 8)+'1'), (char)_fmt, '\0' };
+            size_t remaining1 = remaining / cn;
+            count = count < remaining1 ? count : remaining1;
+            vec.resize(count);
+            it->readRaw(fmt, !vec.empty() ? (uchar*)&vec[0] : 0, count*sizeof(_Tp));
+        }
+    private:
+        FileNodeIterator* it;
+    };
+
+} // internal
+
+//! @endcond
+
+//! @relates cv::FileStorage
+//! @{
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const _Tp& value)
+{
+    write(fs, String(), value);
+}
+
+template<> inline
+void write( FileStorage& fs, const int& value )
+{
+    writeScalar(fs, value);
+}
+
+template<> inline
+void write( FileStorage& fs, const float& value )
+{
+    writeScalar(fs, value);
+}
+
+template<> inline
+void write( FileStorage& fs, const double& value )
+{
+    writeScalar(fs, value);
+}
+
+template<> inline
+void write( FileStorage& fs, const String& value )
+{
+    writeScalar(fs, value);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const Point_<_Tp>& pt )
+{
+    write(fs, pt.x);
+    write(fs, pt.y);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const Point3_<_Tp>& pt )
+{
+    write(fs, pt.x);
+    write(fs, pt.y);
+    write(fs, pt.z);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const Size_<_Tp>& sz )
+{
+    write(fs, sz.width);
+    write(fs, sz.height);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const Complex<_Tp>& c )
+{
+    write(fs, c.re);
+    write(fs, c.im);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const Rect_<_Tp>& r )
+{
+    write(fs, r.x);
+    write(fs, r.y);
+    write(fs, r.width);
+    write(fs, r.height);
+}
+
+template<typename _Tp, int cn> static inline
+void write(FileStorage& fs, const Vec<_Tp, cn>& v )
+{
+    for(int i = 0; i < cn; i++)
+        write(fs, v.val[i]);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const Scalar_<_Tp>& s )
+{
+    write(fs, s.val[0]);
+    write(fs, s.val[1]);
+    write(fs, s.val[2]);
+    write(fs, s.val[3]);
+}
+
+static inline
+void write(FileStorage& fs, const Range& r )
+{
+    write(fs, r.start);
+    write(fs, r.end);
+}
+
+template<typename _Tp> static inline
+void write( FileStorage& fs, const std::vector<_Tp>& vec )
+{
+    cv::internal::VecWriterProxy<_Tp, DataType<_Tp>::fmt != 0> w(&fs);
+    w(vec);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const String& name, const Point_<_Tp>& pt )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, pt);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const String& name, const Point3_<_Tp>& pt )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, pt);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const String& name, const Size_<_Tp>& sz )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, sz);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const String& name, const Complex<_Tp>& c )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, c);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const String& name, const Rect_<_Tp>& r )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, r);
+}
+
+template<typename _Tp, int cn> static inline
+void write(FileStorage& fs, const String& name, const Vec<_Tp, cn>& v )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, v);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const String& name, const Scalar_<_Tp>& s )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, s);
+}
+
+static inline
+void write(FileStorage& fs, const String& name, const Range& r )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, r);
+}
+
+template<typename _Tp> static inline
+void write( FileStorage& fs, const String& name, const std::vector<_Tp>& vec )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+(DataType<_Tp>::fmt != 0 ? FileNode::FLOW : 0));
+    write(fs, vec);
+}
+
+template<typename _Tp> static inline
+void write( FileStorage& fs, const String& name, const std::vector< std::vector<_Tp> >& vec )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ);
+    for(size_t i = 0; i < vec.size(); i++)
+    {
+        cv::internal::WriteStructContext ws_(fs, name, FileNode::SEQ+(DataType<_Tp>::fmt != 0 ? FileNode::FLOW : 0));
+        write(fs, vec[i]);
+    }
+}
+
+//! @} FileStorage
+
+//! @relates cv::FileNode
+//! @{
+
+static inline
+void read(const FileNode& node, bool& value, bool default_value)
+{
+    int temp;
+    read(node, temp, (int)default_value);
+    value = temp != 0;
+}
+
+static inline
+void read(const FileNode& node, uchar& value, uchar default_value)
+{
+    int temp;
+    read(node, temp, (int)default_value);
+    value = saturate_cast<uchar>(temp);
+}
+
+static inline
+void read(const FileNode& node, schar& value, schar default_value)
+{
+    int temp;
+    read(node, temp, (int)default_value);
+    value = saturate_cast<schar>(temp);
+}
+
+static inline
+void read(const FileNode& node, ushort& value, ushort default_value)
+{
+    int temp;
+    read(node, temp, (int)default_value);
+    value = saturate_cast<ushort>(temp);
+}
+
+static inline
+void read(const FileNode& node, short& value, short default_value)
+{
+    int temp;
+    read(node, temp, (int)default_value);
+    value = saturate_cast<short>(temp);
+}
+
+template<typename _Tp> static inline
+void read( FileNodeIterator& it, std::vector<_Tp>& vec, size_t maxCount = (size_t)INT_MAX )
+{
+    cv::internal::VecReaderProxy<_Tp, DataType<_Tp>::fmt != 0> r(&it);
+    r(vec, maxCount);
+}
+
+template<typename _Tp> static inline
+void read( const FileNode& node, std::vector<_Tp>& vec, const std::vector<_Tp>& default_value = std::vector<_Tp>() )
+{
+    if(!node.node)
+        vec = default_value;
+    else
+    {
+        FileNodeIterator it = node.begin();
+        read( it, vec );
+    }
+}
+
+//! @} FileNode
+
+//! @relates cv::FileStorage
+//! @{
+
+/** @brief Writes data to a file storage.
+ */
+template<typename _Tp> static inline
+FileStorage& operator << (FileStorage& fs, const _Tp& value)
+{
+    if( !fs.isOpened() )
+        return fs;
+    if( fs.state == FileStorage::NAME_EXPECTED + FileStorage::INSIDE_MAP )
+        CV_Error( Error::StsError, "No element name has been given" );
+    write( fs, fs.elname, value );
+    if( fs.state & FileStorage::INSIDE_MAP )
+        fs.state = FileStorage::NAME_EXPECTED + FileStorage::INSIDE_MAP;
+    return fs;
+}
+
+/** @brief Writes data to a file storage.
+ */
+static inline
+FileStorage& operator << (FileStorage& fs, const char* str)
+{
+    return (fs << String(str));
+}
+
+/** @brief Writes data to a file storage.
+ */
+static inline
+FileStorage& operator << (FileStorage& fs, char* value)
+{
+    return (fs << String(value));
+}
+
+//! @} FileStorage
+
+//! @relates cv::FileNodeIterator
+//! @{
+
+/** @brief Reads data from a file storage.
+ */
+template<typename _Tp> static inline
+FileNodeIterator& operator >> (FileNodeIterator& it, _Tp& value)
+{
+    read( *it, value, _Tp());
+    return ++it;
+}
+
+/** @brief Reads data from a file storage.
+ */
+template<typename _Tp> static inline
+FileNodeIterator& operator >> (FileNodeIterator& it, std::vector<_Tp>& vec)
+{
+    cv::internal::VecReaderProxy<_Tp, DataType<_Tp>::fmt != 0> r(&it);
+    r(vec, (size_t)INT_MAX);
+    return it;
+}
+
+//! @} FileNodeIterator
+
+//! @relates cv::FileNode
+//! @{
+
+/** @brief Reads data from a file storage.
+ */
+template<typename _Tp> static inline
+void operator >> (const FileNode& n, _Tp& value)
+{
+    read( n, value, _Tp());
+}
+
+/** @brief Reads data from a file storage.
+ */
+template<typename _Tp> static inline
+void operator >> (const FileNode& n, std::vector<_Tp>& vec)
+{
+    FileNodeIterator it = n.begin();
+    it >> vec;
+}
+
+/** @brief Reads KeyPoint from a file storage.
+*/
+//It needs special handling because it contains two types of fields, int & float.
+static inline
+void operator >> (const FileNode& n, std::vector<KeyPoint>& vec)
+{
+    read(n, vec);
+}
+/** @brief Reads DMatch from a file storage.
+*/
+//It needs special handling because it contains two types of fields, int & float.
+static inline
+void operator >> (const FileNode& n, std::vector<DMatch>& vec)
+{
+    read(n, vec);
+}
+
+//! @} FileNode
+
+//! @relates cv::FileNodeIterator
+//! @{
+
+static inline
+bool operator == (const FileNodeIterator& it1, const FileNodeIterator& it2)
+{
+    return it1.fs == it2.fs && it1.container == it2.container &&
+        it1.reader.ptr == it2.reader.ptr && it1.remaining == it2.remaining;
+}
+
+static inline
+bool operator != (const FileNodeIterator& it1, const FileNodeIterator& it2)
+{
+    return !(it1 == it2);
+}
+
+static inline
+ptrdiff_t operator - (const FileNodeIterator& it1, const FileNodeIterator& it2)
+{
+    return it2.remaining - it1.remaining;
+}
+
+static inline
+bool operator < (const FileNodeIterator& it1, const FileNodeIterator& it2)
+{
+    return it1.remaining > it2.remaining;
+}
+
+//! @} FileNodeIterator
+
+//! @cond IGNORED
+
+inline FileNode FileStorage::getFirstTopLevelNode() const { FileNode r = root(); FileNodeIterator it = r.begin(); return it != r.end() ? *it : FileNode(); }
+inline FileNode::FileNode() : fs(0), node(0) {}
+inline FileNode::FileNode(const CvFileStorage* _fs, const CvFileNode* _node) : fs(_fs), node(_node) {}
+inline FileNode::FileNode(const FileNode& _node) : fs(_node.fs), node(_node.node) {}
+inline bool FileNode::empty() const    { return node   == 0;    }
+inline bool FileNode::isNone() const   { return type() == NONE; }
+inline bool FileNode::isSeq() const    { return type() == SEQ;  }
+inline bool FileNode::isMap() const    { return type() == MAP;  }
+inline bool FileNode::isInt() const    { return type() == INT;  }
+inline bool FileNode::isReal() const   { return type() == REAL; }
+inline bool FileNode::isString() const { return type() == STR;  }
+inline CvFileNode* FileNode::operator *() { return (CvFileNode*)node; }
+inline const CvFileNode* FileNode::operator* () const { return node; }
+inline FileNode::operator int() const    { int value;    read(*this, value, 0);     return value; }
+inline FileNode::operator float() const  { float value;  read(*this, value, 0.f);   return value; }
+inline FileNode::operator double() const { double value; read(*this, value, 0.);    return value; }
+inline FileNode::operator String() const { String value; read(*this, value, value); return value; }
+inline double FileNode::real() const  { return double(*this); }
+inline String FileNode::string() const { return String(*this); }
+inline Mat FileNode::mat() const { Mat value; read(*this, value, value);    return value; }
+inline FileNodeIterator FileNode::begin() const { return FileNodeIterator(fs, node); }
+inline FileNodeIterator FileNode::end() const   { return FileNodeIterator(fs, node, size()); }
+inline void FileNode::readRaw( const String& fmt, uchar* vec, size_t len ) const { begin().readRaw( fmt, vec, len ); }
+inline FileNode FileNodeIterator::operator *() const  { return FileNode(fs, (const CvFileNode*)(const void*)reader.ptr); }
+inline FileNode FileNodeIterator::operator ->() const { return FileNode(fs, (const CvFileNode*)(const void*)reader.ptr); }
+inline String::String(const FileNode& fn): cstr_(0), len_(0) { read(fn, *this, *this); }
+
+//! @endcond
+
+
+CV_EXPORTS void cvStartWriteRawData_Base64(::CvFileStorage * fs, const char* name, int len, const char* dt);
+
+CV_EXPORTS void cvWriteRawData_Base64(::CvFileStorage * fs, const void* _data, int len);
+
+CV_EXPORTS void cvEndWriteRawData_Base64(::CvFileStorage * fs);
+
+CV_EXPORTS void cvWriteMat_Base64(::CvFileStorage* fs, const char* name, const ::CvMat* mat);
+
+CV_EXPORTS void cvWriteMatND_Base64(::CvFileStorage* fs, const char* name, const ::CvMatND* mat);
+
+} // cv
+
+#endif // OPENCV_CORE_PERSISTENCE_HPP
diff --git a/thirdparty/linux/include/opencv2/core/private.cuda.hpp b/thirdparty/linux/include/opencv2/core/private.cuda.hpp
new file mode 100644
index 0000000..01a4ab3
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/private.cuda.hpp
@@ -0,0 +1,172 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_PRIVATE_CUDA_HPP
+#define OPENCV_CORE_PRIVATE_CUDA_HPP
+
+#ifndef __OPENCV_BUILD
+#  error this is a private header which should not be used from outside of the OpenCV library
+#endif
+
+#include "cvconfig.h"
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/base.hpp"
+
+#include "opencv2/core/cuda.hpp"
+
+#ifdef HAVE_CUDA
+#  include <cuda.h>
+#  include <cuda_runtime.h>
+#  include <npp.h>
+#  include "opencv2/core/cuda_stream_accessor.hpp"
+#  include "opencv2/core/cuda/common.hpp"
+
+#  define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD)
+
+#  define CUDART_MINIMUM_REQUIRED_VERSION 6050
+
+#  if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
+#    error "Insufficient Cuda Runtime library version, please update it."
+#  endif
+
+#  if defined(CUDA_ARCH_BIN_OR_PTX_10)
+#    error "OpenCV CUDA module doesn't support NVIDIA compute capability 1.0"
+#  endif
+#endif
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda {
+    CV_EXPORTS cv::String getNppErrorMessage(int code);
+    CV_EXPORTS cv::String getCudaDriverApiErrorMessage(int code);
+
+    CV_EXPORTS GpuMat getInputMat(InputArray _src, Stream& stream);
+
+    CV_EXPORTS GpuMat getOutputMat(OutputArray _dst, int rows, int cols, int type, Stream& stream);
+    static inline GpuMat getOutputMat(OutputArray _dst, Size size, int type, Stream& stream)
+    {
+        return getOutputMat(_dst, size.height, size.width, type, stream);
+    }
+
+    CV_EXPORTS void syncOutput(const GpuMat& dst, OutputArray _dst, Stream& stream);
+}}
+
+#ifndef HAVE_CUDA
+
+static inline void throw_no_cuda() { CV_Error(cv::Error::GpuNotSupported, "The library is compiled without CUDA support"); }
+
+#else // HAVE_CUDA
+
+static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform"); }
+
+namespace cv { namespace cuda
+{
+    class CV_EXPORTS BufferPool
+    {
+    public:
+        explicit BufferPool(Stream& stream);
+
+        GpuMat getBuffer(int rows, int cols, int type);
+        GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
+
+        GpuMat::Allocator* getAllocator() const { return allocator_; }
+
+    private:
+        GpuMat::Allocator* allocator_;
+    };
+
+    static inline void checkNppError(int code, const char* file, const int line, const char* func)
+    {
+        if (code < 0)
+            cv::error(cv::Error::GpuApiCallError, getNppErrorMessage(code), func, file, line);
+    }
+
+    static inline void checkCudaDriverApiError(int code, const char* file, const int line, const char* func)
+    {
+        if (code != CUDA_SUCCESS)
+            cv::error(cv::Error::GpuApiCallError, getCudaDriverApiErrorMessage(code), func, file, line);
+    }
+
+    template<int n> struct NPPTypeTraits;
+    template<> struct NPPTypeTraits<CV_8U>  { typedef Npp8u npp_type; };
+    template<> struct NPPTypeTraits<CV_8S>  { typedef Npp8s npp_type; };
+    template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
+    template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
+    template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };
+    template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
+    template<> struct NPPTypeTraits<CV_64F> { typedef Npp64f npp_type; };
+
+    class NppStreamHandler
+    {
+    public:
+        inline explicit NppStreamHandler(Stream& newStream)
+        {
+            oldStream = nppGetStream();
+            nppSetStream(StreamAccessor::getStream(newStream));
+        }
+
+        inline explicit NppStreamHandler(cudaStream_t newStream)
+        {
+            oldStream = nppGetStream();
+            nppSetStream(newStream);
+        }
+
+        inline ~NppStreamHandler()
+        {
+            nppSetStream(oldStream);
+        }
+
+    private:
+        cudaStream_t oldStream;
+    };
+}}
+
+#define nppSafeCall(expr)  cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV_Func)
+#define cuSafeCall(expr)  cv::cuda::checkCudaDriverApiError(expr, __FILE__, __LINE__, CV_Func)
+
+#endif // HAVE_CUDA
+
+//! @endcond
+
+#endif // OPENCV_CORE_PRIVATE_CUDA_HPP
diff --git a/thirdparty/linux/include/opencv2/core/private.hpp b/thirdparty/linux/include/opencv2/core/private.hpp
new file mode 100644
index 0000000..e428ecf
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/private.hpp
@@ -0,0 +1,585 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_PRIVATE_HPP
+#define OPENCV_CORE_PRIVATE_HPP
+
+#ifndef __OPENCV_BUILD
+#  error this is a private header which should not be used from outside of the OpenCV library
+#endif
+
+#include "opencv2/core.hpp"
+#include "cvconfig.h"
+
+#ifdef HAVE_EIGEN
+#  if defined __GNUC__ && defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wshadow"
+#  endif
+#  include <Eigen/Core>
+#  include "opencv2/core/eigen.hpp"
+#endif
+
+#ifdef HAVE_TBB
+#  include "tbb/tbb.h"
+#  include "tbb/task.h"
+#  undef min
+#  undef max
+#endif
+
+#if defined HAVE_FP16 && (defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700))
+#  include <immintrin.h>
+#  define CV_FP16 1
+#elif defined HAVE_FP16 && defined __GNUC__
+#  define CV_FP16 1
+#endif
+
+#ifndef CV_FP16
+#  define CV_FP16 0
+#endif
+
+//! @cond IGNORED
+
+namespace cv
+{
+#ifdef HAVE_TBB
+
+    typedef tbb::blocked_range<int> BlockedRange;
+
+    template<typename Body> static inline
+    void parallel_for( const BlockedRange& range, const Body& body )
+    {
+        tbb::parallel_for(range, body);
+    }
+
+    typedef tbb::split Split;
+
+    template<typename Body> static inline
+    void parallel_reduce( const BlockedRange& range, Body& body )
+    {
+        tbb::parallel_reduce(range, body);
+    }
+
+    typedef tbb::concurrent_vector<Rect> ConcurrentRectVector;
+#else
+    class BlockedRange
+    {
+    public:
+        BlockedRange() : _begin(0), _end(0), _grainsize(0) {}
+        BlockedRange(int b, int e, int g=1) : _begin(b), _end(e), _grainsize(g) {}
+        int begin() const { return _begin; }
+        int end() const { return _end; }
+        int grainsize() const { return _grainsize; }
+
+    protected:
+        int _begin, _end, _grainsize;
+    };
+
+    template<typename Body> static inline
+    void parallel_for( const BlockedRange& range, const Body& body )
+    {
+        body(range);
+    }
+    typedef std::vector<Rect> ConcurrentRectVector;
+
+    class Split {};
+
+    template<typename Body> static inline
+    void parallel_reduce( const BlockedRange& range, Body& body )
+    {
+        body(range);
+    }
+#endif
+
+    // Returns a static string if there is a parallel framework,
+    // NULL otherwise.
+    CV_EXPORTS const char* currentParallelFramework();
+} //namespace cv
+
+/****************************************************************************************\
+*                                  Common declarations                                   *
+\****************************************************************************************/
+
+/* the alignment of all the allocated buffers */
+#define  CV_MALLOC_ALIGN    16
+
+/* IEEE754 constants and macros */
+#define  CV_TOGGLE_FLT(x) ((x)^((int)(x) < 0 ? 0x7fffffff : 0))
+#define  CV_TOGGLE_DBL(x) ((x)^((int64)(x) < 0 ? CV_BIG_INT(0x7fffffffffffffff) : 0))
+
+static inline void* cvAlignPtr( const void* ptr, int align = 32 )
+{
+    CV_DbgAssert ( (align & (align-1)) == 0 );
+    return (void*)( ((size_t)ptr + align - 1) & ~(size_t)(align-1) );
+}
+
+static inline int cvAlign( int size, int align )
+{
+    CV_DbgAssert( (align & (align-1)) == 0 && size < INT_MAX );
+    return (size + align - 1) & -align;
+}
+
+#ifdef IPL_DEPTH_8U
+static inline cv::Size cvGetMatSize( const CvMat* mat )
+{
+    return cv::Size(mat->cols, mat->rows);
+}
+#endif
+
+namespace cv
+{
+CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int unroll_to = 0);
+}
+
+// property implementation macros
+
+#define CV_IMPL_PROPERTY_RO(type, name, member) \
+    inline type get##name() const { return member; }
+
+#define CV_HELP_IMPL_PROPERTY(r_type, w_type, name, member) \
+    CV_IMPL_PROPERTY_RO(r_type, name, member) \
+    inline void set##name(w_type val) { member = val; }
+
+#define CV_HELP_WRAP_PROPERTY(r_type, w_type, name, internal_name, internal_obj) \
+    r_type get##name() const { return internal_obj.get##internal_name(); } \
+    void set##name(w_type val) { internal_obj.set##internal_name(val); }
+
+#define CV_IMPL_PROPERTY(type, name, member) CV_HELP_IMPL_PROPERTY(type, type, name, member)
+#define CV_IMPL_PROPERTY_S(type, name, member) CV_HELP_IMPL_PROPERTY(type, const type &, name, member)
+
+#define CV_WRAP_PROPERTY(type, name, internal_name, internal_obj)  CV_HELP_WRAP_PROPERTY(type, type, name, internal_name, internal_obj)
+#define CV_WRAP_PROPERTY_S(type, name, internal_name, internal_obj) CV_HELP_WRAP_PROPERTY(type, const type &, name, internal_name, internal_obj)
+
+#define CV_WRAP_SAME_PROPERTY(type, name, internal_obj) CV_WRAP_PROPERTY(type, name, name, internal_obj)
+#define CV_WRAP_SAME_PROPERTY_S(type, name, internal_obj) CV_WRAP_PROPERTY_S(type, name, name, internal_obj)
+
+/****************************************************************************************\
+*                     Structures and macros for integration with IPP                     *
+\****************************************************************************************/
+
+#ifdef HAVE_IPP
+#include "ipp.h"
+
+#ifndef IPP_VERSION_UPDATE // prior to 7.1
+#define IPP_VERSION_UPDATE 0
+#endif
+
+#define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR*10 + IPP_VERSION_UPDATE)
+
+// General define for ipp function disabling
+#define IPP_DISABLE_BLOCK 0
+
+#ifdef CV_MALLOC_ALIGN
+#undef CV_MALLOC_ALIGN
+#endif
+#define CV_MALLOC_ALIGN 32 // required for AVX optimization
+
+#define setIppErrorStatus() cv::ipp::setIppStatus(-1, CV_Func, __FILE__, __LINE__)
+
+static inline IppiSize ippiSize(int width, int height)
+{
+    IppiSize size = { width, height };
+    return size;
+}
+
+static inline IppiSize ippiSize(const cv::Size & _size)
+{
+    IppiSize size = { _size.width, _size.height };
+    return size;
+}
+
+static inline IppiPoint ippiPoint(const cv::Point & _point)
+{
+    IppiPoint point = { _point.x, _point.y };
+    return point;
+}
+
+static inline IppiPoint ippiPoint(int x, int y)
+{
+    IppiPoint point = { x, y };
+    return point;
+}
+
+static inline IppiBorderType ippiGetBorderType(int borderTypeNI)
+{
+    return borderTypeNI == cv::BORDER_CONSTANT ? ippBorderConst :
+        borderTypeNI == cv::BORDER_WRAP ? ippBorderWrap :
+        borderTypeNI == cv::BORDER_REPLICATE ? ippBorderRepl :
+        borderTypeNI == cv::BORDER_REFLECT_101 ? ippBorderMirror :
+        borderTypeNI == cv::BORDER_REFLECT ? ippBorderMirrorR : (IppiBorderType)-1;
+}
+
+static inline IppDataType ippiGetDataType(int depth)
+{
+    return depth == CV_8U ? ipp8u :
+        depth == CV_8S ? ipp8s :
+        depth == CV_16U ? ipp16u :
+        depth == CV_16S ? ipp16s :
+        depth == CV_32S ? ipp32s :
+        depth == CV_32F ? ipp32f :
+        depth == CV_64F ? ipp64f : (IppDataType)-1;
+}
+
+// IPP temporary buffer hepler
+template<typename T>
+class IppAutoBuffer
+{
+public:
+    IppAutoBuffer() { m_pBuffer = NULL; }
+    IppAutoBuffer(int size) { Alloc(size); }
+    ~IppAutoBuffer() { Release(); }
+    T* Alloc(int size) { m_pBuffer = (T*)ippMalloc(size); return m_pBuffer; }
+    void Release() { if(m_pBuffer) ippFree(m_pBuffer); }
+    inline operator T* () { return (T*)m_pBuffer;}
+    inline operator const T* () const { return (const T*)m_pBuffer;}
+private:
+    // Disable copy operations
+    IppAutoBuffer(IppAutoBuffer &) {}
+    IppAutoBuffer& operator =(const IppAutoBuffer &) {return *this;}
+
+    T* m_pBuffer;
+};
+
+#else
+#define IPP_VERSION_X100 0
+#endif
+
+#if defined HAVE_IPP
+#if IPP_VERSION_X100 >= 900
+#define IPP_INITIALIZER(FEAT)                           \
+{                                                       \
+    if(FEAT)                                            \
+        ippSetCpuFeatures(FEAT);                        \
+    else                                                \
+        ippInit();                                      \
+}
+#elif IPP_VERSION_X100 >= 800
+#define IPP_INITIALIZER(FEAT)                           \
+{                                                       \
+    ippInit();                                          \
+}
+#else
+#define IPP_INITIALIZER(FEAT)                           \
+{                                                       \
+    ippStaticInit();                                    \
+}
+#endif
+
+#ifdef CVAPI_EXPORTS
+#define IPP_INITIALIZER_AUTO                            \
+struct __IppInitializer__                               \
+{                                                       \
+    __IppInitializer__()                                \
+    {IPP_INITIALIZER(cv::ipp::getIppFeatures())}        \
+};                                                      \
+static struct __IppInitializer__ __ipp_initializer__;
+#else
+#define IPP_INITIALIZER_AUTO
+#endif
+#else
+#define IPP_INITIALIZER
+#define IPP_INITIALIZER_AUTO
+#endif
+
+#define CV_IPP_CHECK_COND (cv::ipp::useIPP())
+#define CV_IPP_CHECK() if(CV_IPP_CHECK_COND)
+
+#ifdef HAVE_IPP
+
+#ifdef CV_IPP_RUN_VERBOSE
+#define CV_IPP_RUN_(condition, func, ...)                                   \
+    {                                                                       \
+        if (cv::ipp::useIPP() && (condition) && (func))                     \
+        {                                                                   \
+            printf("%s: IPP implementation is running\n", CV_Func);         \
+            fflush(stdout);                                                 \
+            CV_IMPL_ADD(CV_IMPL_IPP);                                       \
+            return __VA_ARGS__;                                             \
+        }                                                                   \
+        else                                                                \
+        {                                                                   \
+            printf("%s: Plain implementation is running\n", CV_Func);       \
+            fflush(stdout);                                                 \
+        }                                                                   \
+    }
+#elif defined CV_IPP_RUN_ASSERT
+#define CV_IPP_RUN_(condition, func, ...)                                   \
+    {                                                                       \
+        if (cv::ipp::useIPP() && (condition))                               \
+        {                                                                   \
+            if(func)                                                        \
+            {                                                               \
+                CV_IMPL_ADD(CV_IMPL_IPP);                                   \
+            }                                                               \
+            else                                                            \
+            {                                                               \
+                setIppErrorStatus();                                        \
+                CV_Error(cv::Error::StsAssert, #func);                      \
+            }                                                               \
+            return __VA_ARGS__;                                             \
+        }                                                                   \
+    }
+#else
+#define CV_IPP_RUN_(condition, func, ...)                                   \
+    if (cv::ipp::useIPP() && (condition) && (func))                         \
+    {                                                                       \
+        CV_IMPL_ADD(CV_IMPL_IPP);                                           \
+        return __VA_ARGS__;                                                 \
+    }
+#endif
+#define CV_IPP_RUN_FAST(func, ...)                                          \
+    if (cv::ipp::useIPP() && (func))                                        \
+    {                                                                       \
+        CV_IMPL_ADD(CV_IMPL_IPP);                                           \
+        return __VA_ARGS__;                                                 \
+    }
+#else
+#define CV_IPP_RUN_(condition, func, ...)
+#define CV_IPP_RUN_FAST(func, ...)
+#endif
+
+#define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_((condition), (func), __VA_ARGS__)
+
+
+#ifndef IPPI_CALL
+#  define IPPI_CALL(func) CV_Assert((func) >= 0)
+#endif
+
+/* IPP-compatible return codes */
+typedef enum CvStatus
+{
+    CV_BADMEMBLOCK_ERR          = -113,
+    CV_INPLACE_NOT_SUPPORTED_ERR= -112,
+    CV_UNMATCHED_ROI_ERR        = -111,
+    CV_NOTFOUND_ERR             = -110,
+    CV_BADCONVERGENCE_ERR       = -109,
+
+    CV_BADDEPTH_ERR             = -107,
+    CV_BADROI_ERR               = -106,
+    CV_BADHEADER_ERR            = -105,
+    CV_UNMATCHED_FORMATS_ERR    = -104,
+    CV_UNSUPPORTED_COI_ERR      = -103,
+    CV_UNSUPPORTED_CHANNELS_ERR = -102,
+    CV_UNSUPPORTED_DEPTH_ERR    = -101,
+    CV_UNSUPPORTED_FORMAT_ERR   = -100,
+
+    CV_BADARG_ERR               = -49,  //ipp comp
+    CV_NOTDEFINED_ERR           = -48,  //ipp comp
+
+    CV_BADCHANNELS_ERR          = -47,  //ipp comp
+    CV_BADRANGE_ERR             = -44,  //ipp comp
+    CV_BADSTEP_ERR              = -29,  //ipp comp
+
+    CV_BADFLAG_ERR              =  -12,
+    CV_DIV_BY_ZERO_ERR          =  -11, //ipp comp
+    CV_BADCOEF_ERR              =  -10,
+
+    CV_BADFACTOR_ERR            =  -7,
+    CV_BADPOINT_ERR             =  -6,
+    CV_BADSCALE_ERR             =  -4,
+    CV_OUTOFMEM_ERR             =  -3,
+    CV_NULLPTR_ERR              =  -2,
+    CV_BADSIZE_ERR              =  -1,
+    CV_NO_ERR                   =   0,
+    CV_OK                       =   CV_NO_ERR
+}
+CvStatus;
+
+#ifdef HAVE_TEGRA_OPTIMIZATION
+namespace tegra {
+
+CV_EXPORTS bool useTegra();
+CV_EXPORTS void setUseTegra(bool flag);
+
+}
+#endif
+
+#ifdef ENABLE_INSTRUMENTATION
+namespace cv
+{
+namespace instr
+{
+struct InstrTLSStruct
+{
+    InstrTLSStruct()
+    {
+        pCurrentNode = NULL;
+    }
+    InstrNode* pCurrentNode;
+};
+
+class InstrStruct
+{
+public:
+    InstrStruct()
+    {
+        useInstr    = false;
+        flags       = FLAGS_MAPPING;
+        maxDepth    = 0;
+
+        rootNode.m_payload = NodeData("ROOT", NULL, 0, NULL, false, TYPE_GENERAL, IMPL_PLAIN);
+        tlsStruct.get()->pCurrentNode = &rootNode;
+    }
+
+    Mutex mutexCreate;
+    Mutex mutexCount;
+
+    bool       useInstr;
+    int        flags;
+    int        maxDepth;
+    InstrNode  rootNode;
+    TLSData<InstrTLSStruct> tlsStruct;
+};
+
+class CV_EXPORTS IntrumentationRegion
+{
+public:
+    IntrumentationRegion(const char* funName, const char* fileName, int lineNum, void *retAddress, bool alwaysExpand, TYPE instrType = TYPE_GENERAL, IMPL implType = IMPL_PLAIN);
+    ~IntrumentationRegion();
+
+private:
+    bool    m_disabled; // region status
+    uint64  m_regionTicks;
+};
+
+CV_EXPORTS InstrStruct& getInstrumentStruct();
+InstrTLSStruct&         getInstrumentTLSStruct();
+CV_EXPORTS InstrNode*   getCurrentNode();
+}
+}
+
+#ifdef _WIN32
+#define CV_INSTRUMENT_GET_RETURN_ADDRESS _ReturnAddress()
+#else
+#define CV_INSTRUMENT_GET_RETURN_ADDRESS __builtin_extract_return_addr(__builtin_return_address(0))
+#endif
+
+// Instrument region
+#define CV_INSTRUMENT_REGION_META(NAME, ALWAYS_EXPAND, TYPE, IMPL)        ::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, CV_INSTRUMENT_GET_RETURN_ADDRESS, ALWAYS_EXPAND, TYPE, IMPL);
+#define CV_INSTRUMENT_REGION_CUSTOM_META(NAME, ALWAYS_EXPAND, TYPE, IMPL)\
+    void *__curr_address__ = [&]() {return CV_INSTRUMENT_GET_RETURN_ADDRESS;}();\
+    ::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, __curr_address__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN);
+// Instrument functions with non-void return type
+#define CV_INSTRUMENT_FUN_RT_META(TYPE, IMPL, ERROR_COND, FUN, ...) ([&]()\
+{\
+    if(::cv::instr::useInstrumentation()){\
+        ::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, NULL, false, TYPE, IMPL);\
+        try{\
+            auto status = ((FUN)(__VA_ARGS__));\
+            if(ERROR_COND){\
+                ::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
+                CV_INSTRUMENT_MARK_META(IMPL, #FUN " - BadExit");\
+            }\
+            return status;\
+        }catch(...){\
+            ::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
+            CV_INSTRUMENT_MARK_META(IMPL, #FUN " - BadExit");\
+            throw;\
+        }\
+    }else{\
+        return ((FUN)(__VA_ARGS__));\
+    }\
+}())
+// Instrument functions with void return type
+#define CV_INSTRUMENT_FUN_RV_META(TYPE, IMPL, FUN, ...) ([&]()\
+{\
+    if(::cv::instr::useInstrumentation()){\
+        ::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, NULL, false, TYPE, IMPL);\
+        try{\
+            (FUN)(__VA_ARGS__);\
+        }catch(...){\
+            ::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
+            CV_INSTRUMENT_MARK_META(IMPL, #FUN "- BadExit");\
+            throw;\
+        }\
+    }else{\
+        (FUN)(__VA_ARGS__);\
+    }\
+}())
+// Instrumentation information marker
+#define CV_INSTRUMENT_MARK_META(IMPL, NAME, ...) {::cv::instr::IntrumentationRegion __instr_mark__(NAME, __FILE__, __LINE__, NULL, false, ::cv::instr::TYPE_MARKER, IMPL);}
+
+///// General instrumentation
+// General OpenCV region instrumentation macro
+#define CV_INSTRUMENT_REGION()              CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
+// Custom OpenCV region instrumentation macro
+#define CV_INSTRUMENT_REGION_NAME(NAME)     CV_INSTRUMENT_REGION_CUSTOM_META(NAME,  false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
+// Instrumentation for parallel_for_ or other regions which forks and gathers threads
+#define CV_INSTRUMENT_REGION_MT_FORK()      CV_INSTRUMENT_REGION_META(__FUNCTION__, true,  ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN);
+
+///// IPP instrumentation
+// Wrapper region instrumentation macro
+#define CV_INSTRUMENT_REGION_IPP()          CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP)
+// Function instrumentation macro
+#define CV_INSTRUMENT_FUN_IPP(FUN, ...)     CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, status < 0, FUN, __VA_ARGS__)
+// Diagnostic markers
+#define CV_INSTRUMENT_MARK_IPP(NAME)        CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_IPP, NAME)
+
+///// OpenCL instrumentation
+// Wrapper region instrumentation macro
+#define CV_INSTRUMENT_REGION_OPENCL()              CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
+// OpenCL kernel compilation wrapper
+#define CV_INSTRUMENT_REGION_OPENCL_COMPILE(NAME)  CV_INSTRUMENT_REGION_META(NAME, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
+// OpenCL kernel run wrapper
+#define CV_INSTRUMENT_REGION_OPENCL_RUN(NAME)      CV_INSTRUMENT_REGION_META(NAME, false, ::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL)
+// Diagnostic markers
+#define CV_INSTRUMENT_MARK_OPENCL(NAME)            CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_OPENCL, NAME)
+#else
+#define CV_INSTRUMENT_REGION_META(...)
+
+#define CV_INSTRUMENT_REGION()
+#define CV_INSTRUMENT_REGION_NAME(...)
+#define CV_INSTRUMENT_REGION_MT_FORK()
+
+#define CV_INSTRUMENT_REGION_IPP()
+#define CV_INSTRUMENT_FUN_IPP(FUN, ...) ((FUN)(__VA_ARGS__))
+#define CV_INSTRUMENT_MARK_IPP(...)
+
+#define CV_INSTRUMENT_REGION_OPENCL()
+#define CV_INSTRUMENT_REGION_OPENCL_COMPILE(...)
+#define CV_INSTRUMENT_REGION_OPENCL_RUN(...)
+#define CV_INSTRUMENT_MARK_OPENCL(...)
+#endif
+
+//! @endcond
+
+#endif // OPENCV_CORE_PRIVATE_HPP
diff --git a/thirdparty/linux/include/opencv2/core/ptr.inl.hpp b/thirdparty/linux/include/opencv2/core/ptr.inl.hpp
new file mode 100644
index 0000000..3c095a1
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/ptr.inl.hpp
@@ -0,0 +1,379 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, NVIDIA Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the copyright holders or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_PTR_INL_HPP
+#define OPENCV_CORE_PTR_INL_HPP
+
+#include <algorithm>
+
+//! @cond IGNORED
+
+namespace cv {
+
+template<typename Y>
+void DefaultDeleter<Y>::operator () (Y* p) const
+{
+    delete p;
+}
+
+namespace detail
+{
+
+struct PtrOwner
+{
+    PtrOwner() : refCount(1)
+    {}
+
+    void incRef()
+    {
+        CV_XADD(&refCount, 1);
+    }
+
+    void decRef()
+    {
+        if (CV_XADD(&refCount, -1) == 1) deleteSelf();
+    }
+
+protected:
+    /* This doesn't really need to be virtual, since PtrOwner is never deleted
+       directly, but it doesn't hurt and it helps avoid warnings. */
+    virtual ~PtrOwner()
+    {}
+
+    virtual void deleteSelf() = 0;
+
+private:
+    unsigned int refCount;
+
+    // noncopyable
+    PtrOwner(const PtrOwner&);
+    PtrOwner& operator = (const PtrOwner&);
+};
+
+template<typename Y, typename D>
+struct PtrOwnerImpl : PtrOwner
+{
+    PtrOwnerImpl(Y* p, D d) : owned(p), deleter(d)
+    {}
+
+    void deleteSelf()
+    {
+        deleter(owned);
+        delete this;
+    }
+
+private:
+    Y* owned;
+    D deleter;
+};
+
+
+}
+
+template<typename T>
+Ptr<T>::Ptr() : owner(NULL), stored(NULL)
+{}
+
+template<typename T>
+template<typename Y>
+Ptr<T>::Ptr(Y* p)
+  : owner(p
+      ? new detail::PtrOwnerImpl<Y, DefaultDeleter<Y> >(p, DefaultDeleter<Y>())
+      : NULL),
+    stored(p)
+{}
+
+template<typename T>
+template<typename Y, typename D>
+Ptr<T>::Ptr(Y* p, D d)
+  : owner(p
+      ? new detail::PtrOwnerImpl<Y, D>(p, d)
+      : NULL),
+    stored(p)
+{}
+
+template<typename T>
+Ptr<T>::Ptr(const Ptr& o) : owner(o.owner), stored(o.stored)
+{
+    if (owner) owner->incRef();
+}
+
+template<typename T>
+template<typename Y>
+Ptr<T>::Ptr(const Ptr<Y>& o) : owner(o.owner), stored(o.stored)
+{
+    if (owner) owner->incRef();
+}
+
+template<typename T>
+template<typename Y>
+Ptr<T>::Ptr(const Ptr<Y>& o, T* p) : owner(o.owner), stored(p)
+{
+    if (owner) owner->incRef();
+}
+
+template<typename T>
+Ptr<T>::~Ptr()
+{
+    release();
+}
+
+template<typename T>
+Ptr<T>& Ptr<T>::operator = (const Ptr<T>& o)
+{
+    Ptr(o).swap(*this);
+    return *this;
+}
+
+template<typename T>
+template<typename Y>
+Ptr<T>& Ptr<T>::operator = (const Ptr<Y>& o)
+{
+    Ptr(o).swap(*this);
+    return *this;
+}
+
+template<typename T>
+void Ptr<T>::release()
+{
+    if (owner) owner->decRef();
+    owner = NULL;
+    stored = NULL;
+}
+
+template<typename T>
+template<typename Y>
+void Ptr<T>::reset(Y* p)
+{
+    Ptr(p).swap(*this);
+}
+
+template<typename T>
+template<typename Y, typename D>
+void Ptr<T>::reset(Y* p, D d)
+{
+    Ptr(p, d).swap(*this);
+}
+
+template<typename T>
+void Ptr<T>::swap(Ptr<T>& o)
+{
+    std::swap(owner, o.owner);
+    std::swap(stored, o.stored);
+}
+
+template<typename T>
+T* Ptr<T>::get() const
+{
+    return stored;
+}
+
+template<typename T>
+typename detail::RefOrVoid<T>::type Ptr<T>::operator * () const
+{
+    return *stored;
+}
+
+template<typename T>
+T* Ptr<T>::operator -> () const
+{
+    return stored;
+}
+
+template<typename T>
+Ptr<T>::operator T* () const
+{
+    return stored;
+}
+
+
+template<typename T>
+bool Ptr<T>::empty() const
+{
+    return !stored;
+}
+
+template<typename T>
+template<typename Y>
+Ptr<Y> Ptr<T>::staticCast() const
+{
+    return Ptr<Y>(*this, static_cast<Y*>(stored));
+}
+
+template<typename T>
+template<typename Y>
+Ptr<Y> Ptr<T>::constCast() const
+{
+    return Ptr<Y>(*this, const_cast<Y*>(stored));
+}
+
+template<typename T>
+template<typename Y>
+Ptr<Y> Ptr<T>::dynamicCast() const
+{
+    return Ptr<Y>(*this, dynamic_cast<Y*>(stored));
+}
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+
+template<typename T>
+Ptr<T>::Ptr(Ptr&& o) : owner(o.owner), stored(o.stored)
+{
+    o.owner = NULL;
+    o.stored = NULL;
+}
+
+template<typename T>
+Ptr<T>& Ptr<T>::operator = (Ptr<T>&& o)
+{
+    if (this == &o)
+        return *this;
+
+    release();
+    owner = o.owner;
+    stored = o.stored;
+    o.owner = NULL;
+    o.stored = NULL;
+    return *this;
+}
+
+#endif
+
+
+template<typename T>
+void swap(Ptr<T>& ptr1, Ptr<T>& ptr2){
+    ptr1.swap(ptr2);
+}
+
+template<typename T>
+bool operator == (const Ptr<T>& ptr1, const Ptr<T>& ptr2)
+{
+    return ptr1.get() == ptr2.get();
+}
+
+template<typename T>
+bool operator != (const Ptr<T>& ptr1, const Ptr<T>& ptr2)
+{
+    return ptr1.get() != ptr2.get();
+}
+
+template<typename T>
+Ptr<T> makePtr()
+{
+    return Ptr<T>(new T());
+}
+
+template<typename T, typename A1>
+Ptr<T> makePtr(const A1& a1)
+{
+    return Ptr<T>(new T(a1));
+}
+
+template<typename T, typename A1, typename A2>
+Ptr<T> makePtr(const A1& a1, const A2& a2)
+{
+    return Ptr<T>(new T(a1, a2));
+}
+
+template<typename T, typename A1, typename A2, typename A3>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3)
+{
+    return Ptr<T>(new T(a1, a2, a3));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10, typename A11>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const A11& a11)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10, typename A11, typename A12>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const A11& a11, const A12& a12)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12));
+}
+} // namespace cv
+
+//! @endcond
+
+#endif // OPENCV_CORE_PTR_INL_HPP
diff --git a/thirdparty/linux/include/opencv2/core/saturate.hpp b/thirdparty/linux/include/opencv2/core/saturate.hpp
new file mode 100644
index 0000000..79a9a66
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/saturate.hpp
@@ -0,0 +1,150 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2014, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_SATURATE_HPP
+#define OPENCV_CORE_SATURATE_HPP
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/fast_math.hpp"
+
+namespace cv
+{
+
+//! @addtogroup core_utils
+//! @{
+
+/////////////// saturate_cast (used in image & signal processing) ///////////////////
+
+/** @brief Template function for accurate conversion from one primitive type to another.
+
+ The functions saturate_cast resemble the standard C++ cast operations, such as static_cast\<T\>()
+ and others. They perform an efficient and accurate conversion from one primitive type to another
+ (see the introduction chapter). saturate in the name means that when the input value v is out of the
+ range of the target type, the result is not formed just by taking low bits of the input, but instead
+ the value is clipped. For example:
+ @code
+ uchar a = saturate_cast<uchar>(-100); // a = 0 (UCHAR_MIN)
+ short b = saturate_cast<short>(33333.33333); // b = 32767 (SHRT_MAX)
+ @endcode
+ Such clipping is done when the target type is unsigned char , signed char , unsigned short or
+ signed short . For 32-bit integers, no clipping is done.
+
+ When the parameter is a floating-point value and the target type is an integer (8-, 16- or 32-bit),
+ the floating-point value is first rounded to the nearest integer and then clipped if needed (when
+ the target type is 8- or 16-bit).
+
+ This operation is used in the simplest or most complex image processing functions in OpenCV.
+
+ @param v Function parameter.
+ @sa add, subtract, multiply, divide, Mat::convertTo
+ */
+template<typename _Tp> static inline _Tp saturate_cast(uchar v)    { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(schar v)    { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(ushort v)   { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(short v)    { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(unsigned v) { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(int v)      { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(float v)    { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(double v)   { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(int64 v)    { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(uint64 v)   { return _Tp(v); }
+
+template<> inline uchar saturate_cast<uchar>(schar v)        { return (uchar)std::max((int)v, 0); }
+template<> inline uchar saturate_cast<uchar>(ushort v)       { return (uchar)std::min((unsigned)v, (unsigned)UCHAR_MAX); }
+template<> inline uchar saturate_cast<uchar>(int v)          { return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
+template<> inline uchar saturate_cast<uchar>(short v)        { return saturate_cast<uchar>((int)v); }
+template<> inline uchar saturate_cast<uchar>(unsigned v)     { return (uchar)std::min(v, (unsigned)UCHAR_MAX); }
+template<> inline uchar saturate_cast<uchar>(float v)        { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
+template<> inline uchar saturate_cast<uchar>(double v)       { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
+template<> inline uchar saturate_cast<uchar>(int64 v)        { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
+template<> inline uchar saturate_cast<uchar>(uint64 v)       { return (uchar)std::min(v, (uint64)UCHAR_MAX); }
+
+template<> inline schar saturate_cast<schar>(uchar v)        { return (schar)std::min((int)v, SCHAR_MAX); }
+template<> inline schar saturate_cast<schar>(ushort v)       { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); }
+template<> inline schar saturate_cast<schar>(int v)          { return (schar)((unsigned)(v-SCHAR_MIN) <= (unsigned)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
+template<> inline schar saturate_cast<schar>(short v)        { return saturate_cast<schar>((int)v); }
+template<> inline schar saturate_cast<schar>(unsigned v)     { return (schar)std::min(v, (unsigned)SCHAR_MAX); }
+template<> inline schar saturate_cast<schar>(float v)        { int iv = cvRound(v); return saturate_cast<schar>(iv); }
+template<> inline schar saturate_cast<schar>(double v)       { int iv = cvRound(v); return saturate_cast<schar>(iv); }
+template<> inline schar saturate_cast<schar>(int64 v)        { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
+template<> inline schar saturate_cast<schar>(uint64 v)       { return (schar)std::min(v, (uint64)SCHAR_MAX); }
+
+template<> inline ushort saturate_cast<ushort>(schar v)      { return (ushort)std::max((int)v, 0); }
+template<> inline ushort saturate_cast<ushort>(short v)      { return (ushort)std::max((int)v, 0); }
+template<> inline ushort saturate_cast<ushort>(int v)        { return (ushort)((unsigned)v <= (unsigned)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
+template<> inline ushort saturate_cast<ushort>(unsigned v)   { return (ushort)std::min(v, (unsigned)USHRT_MAX); }
+template<> inline ushort saturate_cast<ushort>(float v)      { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
+template<> inline ushort saturate_cast<ushort>(double v)     { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
+template<> inline ushort saturate_cast<ushort>(int64 v)      { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
+template<> inline ushort saturate_cast<ushort>(uint64 v)     { return (ushort)std::min(v, (uint64)USHRT_MAX); }
+
+template<> inline short saturate_cast<short>(ushort v)       { return (short)std::min((int)v, SHRT_MAX); }
+template<> inline short saturate_cast<short>(int v)          { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
+template<> inline short saturate_cast<short>(unsigned v)     { return (short)std::min(v, (unsigned)SHRT_MAX); }
+template<> inline short saturate_cast<short>(float v)        { int iv = cvRound(v); return saturate_cast<short>(iv); }
+template<> inline short saturate_cast<short>(double v)       { int iv = cvRound(v); return saturate_cast<short>(iv); }
+template<> inline short saturate_cast<short>(int64 v)        { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
+template<> inline short saturate_cast<short>(uint64 v)       { return (short)std::min(v, (uint64)SHRT_MAX); }
+
+template<> inline int saturate_cast<int>(float v)            { return cvRound(v); }
+template<> inline int saturate_cast<int>(double v)           { return cvRound(v); }
+
+// we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc.
+template<> inline unsigned saturate_cast<unsigned>(float v)  { return cvRound(v); }
+template<> inline unsigned saturate_cast<unsigned>(double v) { return cvRound(v); }
+
+//! @}
+
+} // cv
+
+#endif // OPENCV_CORE_SATURATE_HPP
diff --git a/thirdparty/linux/include/opencv2/core/sse_utils.hpp b/thirdparty/linux/include/opencv2/core/sse_utils.hpp
new file mode 100644
index 0000000..69efffe
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/sse_utils.hpp
@@ -0,0 +1,652 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_SSE_UTILS_HPP
+#define OPENCV_CORE_SSE_UTILS_HPP
+
+#ifndef __cplusplus
+#  error sse_utils.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/cvdef.h"
+
+//! @addtogroup core_utils_sse
+//! @{
+
+#if CV_SSE2
+
+inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
+{
+    __m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_g0);
+    __m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_g0);
+    __m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_g1);
+    __m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_g1);
+
+    __m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk2);
+    __m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk2);
+    __m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk3);
+    __m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk3);
+
+    __m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk2);
+    __m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk2);
+    __m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk3);
+    __m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk3);
+
+    __m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk2);
+    __m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk2);
+    __m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk3);
+    __m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk3);
+
+    v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk2);
+    v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk2);
+    v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk3);
+    v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk3);
+}
+
+inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
+                                  __m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
+{
+    __m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_g1);
+    __m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_g1);
+    __m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b0);
+    __m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_b0);
+    __m128i layer1_chunk4 = _mm_unpacklo_epi8(v_g0, v_b1);
+    __m128i layer1_chunk5 = _mm_unpackhi_epi8(v_g0, v_b1);
+
+    __m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk3);
+    __m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk3);
+    __m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk4);
+    __m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk4);
+    __m128i layer2_chunk4 = _mm_unpacklo_epi8(layer1_chunk2, layer1_chunk5);
+    __m128i layer2_chunk5 = _mm_unpackhi_epi8(layer1_chunk2, layer1_chunk5);
+
+    __m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk3);
+    __m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk3);
+    __m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk4);
+    __m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk4);
+    __m128i layer3_chunk4 = _mm_unpacklo_epi8(layer2_chunk2, layer2_chunk5);
+    __m128i layer3_chunk5 = _mm_unpackhi_epi8(layer2_chunk2, layer2_chunk5);
+
+    __m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk3);
+    __m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk3);
+    __m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk4);
+    __m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk4);
+    __m128i layer4_chunk4 = _mm_unpacklo_epi8(layer3_chunk2, layer3_chunk5);
+    __m128i layer4_chunk5 = _mm_unpackhi_epi8(layer3_chunk2, layer3_chunk5);
+
+    v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk3);
+    v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk3);
+    v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk4);
+    v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk4);
+    v_b0 = _mm_unpacklo_epi8(layer4_chunk2, layer4_chunk5);
+    v_b1 = _mm_unpackhi_epi8(layer4_chunk2, layer4_chunk5);
+}
+
+inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
+                                  __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
+{
+    __m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_b0);
+    __m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_b0);
+    __m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b1);
+    __m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_b1);
+    __m128i layer1_chunk4 = _mm_unpacklo_epi8(v_g0, v_a0);
+    __m128i layer1_chunk5 = _mm_unpackhi_epi8(v_g0, v_a0);
+    __m128i layer1_chunk6 = _mm_unpacklo_epi8(v_g1, v_a1);
+    __m128i layer1_chunk7 = _mm_unpackhi_epi8(v_g1, v_a1);
+
+    __m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk4);
+    __m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk4);
+    __m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk5);
+    __m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk5);
+    __m128i layer2_chunk4 = _mm_unpacklo_epi8(layer1_chunk2, layer1_chunk6);
+    __m128i layer2_chunk5 = _mm_unpackhi_epi8(layer1_chunk2, layer1_chunk6);
+    __m128i layer2_chunk6 = _mm_unpacklo_epi8(layer1_chunk3, layer1_chunk7);
+    __m128i layer2_chunk7 = _mm_unpackhi_epi8(layer1_chunk3, layer1_chunk7);
+
+    __m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk4);
+    __m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk4);
+    __m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk5);
+    __m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk5);
+    __m128i layer3_chunk4 = _mm_unpacklo_epi8(layer2_chunk2, layer2_chunk6);
+    __m128i layer3_chunk5 = _mm_unpackhi_epi8(layer2_chunk2, layer2_chunk6);
+    __m128i layer3_chunk6 = _mm_unpacklo_epi8(layer2_chunk3, layer2_chunk7);
+    __m128i layer3_chunk7 = _mm_unpackhi_epi8(layer2_chunk3, layer2_chunk7);
+
+    __m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk4);
+    __m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk4);
+    __m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk5);
+    __m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk5);
+    __m128i layer4_chunk4 = _mm_unpacklo_epi8(layer3_chunk2, layer3_chunk6);
+    __m128i layer4_chunk5 = _mm_unpackhi_epi8(layer3_chunk2, layer3_chunk6);
+    __m128i layer4_chunk6 = _mm_unpacklo_epi8(layer3_chunk3, layer3_chunk7);
+    __m128i layer4_chunk7 = _mm_unpackhi_epi8(layer3_chunk3, layer3_chunk7);
+
+    v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk4);
+    v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk4);
+    v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk5);
+    v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk5);
+    v_b0 = _mm_unpacklo_epi8(layer4_chunk2, layer4_chunk6);
+    v_b1 = _mm_unpackhi_epi8(layer4_chunk2, layer4_chunk6);
+    v_a0 = _mm_unpacklo_epi8(layer4_chunk3, layer4_chunk7);
+    v_a1 = _mm_unpackhi_epi8(layer4_chunk3, layer4_chunk7);
+}
+
+inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
+{
+    __m128i v_mask = _mm_set1_epi16(0x00ff);
+
+    __m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
+    __m128i layer4_chunk2 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
+    __m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
+    __m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8));
+
+    __m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask));
+    __m128i layer3_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8));
+    __m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask));
+    __m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8));
+
+    __m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
+    __m128i layer2_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8));
+    __m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
+    __m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8));
+
+    __m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
+    __m128i layer1_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8));
+    __m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
+    __m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8));
+
+    v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
+    v_g0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8));
+    v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
+    v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8));
+}
+
+inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
+                                __m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
+{
+    __m128i v_mask = _mm_set1_epi16(0x00ff);
+
+    __m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
+    __m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
+    __m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
+    __m128i layer4_chunk4 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8));
+    __m128i layer4_chunk2 = _mm_packus_epi16(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
+    __m128i layer4_chunk5 = _mm_packus_epi16(_mm_srli_epi16(v_b0, 8), _mm_srli_epi16(v_b1, 8));
+
+    __m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask));
+    __m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8));
+    __m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask));
+    __m128i layer3_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8));
+    __m128i layer3_chunk2 = _mm_packus_epi16(_mm_and_si128(layer4_chunk4, v_mask), _mm_and_si128(layer4_chunk5, v_mask));
+    __m128i layer3_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk4, 8), _mm_srli_epi16(layer4_chunk5, 8));
+
+    __m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
+    __m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8));
+    __m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
+    __m128i layer2_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8));
+    __m128i layer2_chunk2 = _mm_packus_epi16(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
+    __m128i layer2_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk4, 8), _mm_srli_epi16(layer3_chunk5, 8));
+
+    __m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
+    __m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8));
+    __m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
+    __m128i layer1_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8));
+    __m128i layer1_chunk2 = _mm_packus_epi16(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
+    __m128i layer1_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk4, 8), _mm_srli_epi16(layer2_chunk5, 8));
+
+    v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
+    v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8));
+    v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
+    v_b0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8));
+    v_g0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
+    v_b1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk4, 8), _mm_srli_epi16(layer1_chunk5, 8));
+}
+
+inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
+                                __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
+{
+    __m128i v_mask = _mm_set1_epi16(0x00ff);
+
+    __m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
+    __m128i layer4_chunk4 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
+    __m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
+    __m128i layer4_chunk5 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8));
+    __m128i layer4_chunk2 = _mm_packus_epi16(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
+    __m128i layer4_chunk6 = _mm_packus_epi16(_mm_srli_epi16(v_b0, 8), _mm_srli_epi16(v_b1, 8));
+    __m128i layer4_chunk3 = _mm_packus_epi16(_mm_and_si128(v_a0, v_mask), _mm_and_si128(v_a1, v_mask));
+    __m128i layer4_chunk7 = _mm_packus_epi16(_mm_srli_epi16(v_a0, 8), _mm_srli_epi16(v_a1, 8));
+
+    __m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask));
+    __m128i layer3_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8));
+    __m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask));
+    __m128i layer3_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8));
+    __m128i layer3_chunk2 = _mm_packus_epi16(_mm_and_si128(layer4_chunk4, v_mask), _mm_and_si128(layer4_chunk5, v_mask));
+    __m128i layer3_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk4, 8), _mm_srli_epi16(layer4_chunk5, 8));
+    __m128i layer3_chunk3 = _mm_packus_epi16(_mm_and_si128(layer4_chunk6, v_mask), _mm_and_si128(layer4_chunk7, v_mask));
+    __m128i layer3_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk6, 8), _mm_srli_epi16(layer4_chunk7, 8));
+
+    __m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
+    __m128i layer2_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8));
+    __m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
+    __m128i layer2_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8));
+    __m128i layer2_chunk2 = _mm_packus_epi16(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
+    __m128i layer2_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk4, 8), _mm_srli_epi16(layer3_chunk5, 8));
+    __m128i layer2_chunk3 = _mm_packus_epi16(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask));
+    __m128i layer2_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk6, 8), _mm_srli_epi16(layer3_chunk7, 8));
+
+    __m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
+    __m128i layer1_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8));
+    __m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
+    __m128i layer1_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8));
+    __m128i layer1_chunk2 = _mm_packus_epi16(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
+    __m128i layer1_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk4, 8), _mm_srli_epi16(layer2_chunk5, 8));
+    __m128i layer1_chunk3 = _mm_packus_epi16(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask));
+    __m128i layer1_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk6, 8), _mm_srli_epi16(layer2_chunk7, 8));
+
+    v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
+    v_b0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8));
+    v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
+    v_b1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8));
+    v_g0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
+    v_a0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk4, 8), _mm_srli_epi16(layer1_chunk5, 8));
+    v_g1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk6, v_mask), _mm_and_si128(layer1_chunk7, v_mask));
+    v_a1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk6, 8), _mm_srli_epi16(layer1_chunk7, 8));
+}
+
+inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
+{
+    __m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g0);
+    __m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g0);
+    __m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_g1);
+    __m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_g1);
+
+    __m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk2);
+    __m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk2);
+    __m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk3);
+    __m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk3);
+
+    __m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk2);
+    __m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk2);
+    __m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk3);
+    __m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk3);
+
+    v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk2);
+    v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk2);
+    v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk3);
+    v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk3);
+}
+
+inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
+                                   __m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
+{
+    __m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g1);
+    __m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g1);
+    __m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b0);
+    __m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_b0);
+    __m128i layer1_chunk4 = _mm_unpacklo_epi16(v_g0, v_b1);
+    __m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_b1);
+
+    __m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk3);
+    __m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk3);
+    __m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk4);
+    __m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk4);
+    __m128i layer2_chunk4 = _mm_unpacklo_epi16(layer1_chunk2, layer1_chunk5);
+    __m128i layer2_chunk5 = _mm_unpackhi_epi16(layer1_chunk2, layer1_chunk5);
+
+    __m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk3);
+    __m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk3);
+    __m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk4);
+    __m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk4);
+    __m128i layer3_chunk4 = _mm_unpacklo_epi16(layer2_chunk2, layer2_chunk5);
+    __m128i layer3_chunk5 = _mm_unpackhi_epi16(layer2_chunk2, layer2_chunk5);
+
+    v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk3);
+    v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk3);
+    v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk4);
+    v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk4);
+    v_b0 = _mm_unpacklo_epi16(layer3_chunk2, layer3_chunk5);
+    v_b1 = _mm_unpackhi_epi16(layer3_chunk2, layer3_chunk5);
+}
+
+inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
+                                   __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
+{
+    __m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_b0);
+    __m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_b0);
+    __m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b1);
+    __m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_b1);
+    __m128i layer1_chunk4 = _mm_unpacklo_epi16(v_g0, v_a0);
+    __m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_a0);
+    __m128i layer1_chunk6 = _mm_unpacklo_epi16(v_g1, v_a1);
+    __m128i layer1_chunk7 = _mm_unpackhi_epi16(v_g1, v_a1);
+
+    __m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk4);
+    __m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk4);
+    __m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk5);
+    __m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk5);
+    __m128i layer2_chunk4 = _mm_unpacklo_epi16(layer1_chunk2, layer1_chunk6);
+    __m128i layer2_chunk5 = _mm_unpackhi_epi16(layer1_chunk2, layer1_chunk6);
+    __m128i layer2_chunk6 = _mm_unpacklo_epi16(layer1_chunk3, layer1_chunk7);
+    __m128i layer2_chunk7 = _mm_unpackhi_epi16(layer1_chunk3, layer1_chunk7);
+
+    __m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk4);
+    __m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk4);
+    __m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk5);
+    __m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk5);
+    __m128i layer3_chunk4 = _mm_unpacklo_epi16(layer2_chunk2, layer2_chunk6);
+    __m128i layer3_chunk5 = _mm_unpackhi_epi16(layer2_chunk2, layer2_chunk6);
+    __m128i layer3_chunk6 = _mm_unpacklo_epi16(layer2_chunk3, layer2_chunk7);
+    __m128i layer3_chunk7 = _mm_unpackhi_epi16(layer2_chunk3, layer2_chunk7);
+
+    v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk4);
+    v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk4);
+    v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk5);
+    v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk5);
+    v_b0 = _mm_unpacklo_epi16(layer3_chunk2, layer3_chunk6);
+    v_b1 = _mm_unpackhi_epi16(layer3_chunk2, layer3_chunk6);
+    v_a0 = _mm_unpacklo_epi16(layer3_chunk3, layer3_chunk7);
+    v_a1 = _mm_unpackhi_epi16(layer3_chunk3, layer3_chunk7);
+}
+
+#if CV_SSE4_1
+
+inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
+{
+    __m128i v_mask = _mm_set1_epi32(0x0000ffff);
+
+    __m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
+    __m128i layer3_chunk2 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16));
+    __m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
+    __m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16));
+
+    __m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
+    __m128i layer2_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
+    __m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
+    __m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
+
+    __m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
+    __m128i layer1_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
+    __m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
+    __m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
+
+    v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
+    v_g0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
+    v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
+    v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16));
+}
+
+inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
+                                 __m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
+{
+    __m128i v_mask = _mm_set1_epi32(0x0000ffff);
+
+    __m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
+    __m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16));
+    __m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
+    __m128i layer3_chunk4 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16));
+    __m128i layer3_chunk2 = _mm_packus_epi32(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
+    __m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16));
+
+    __m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
+    __m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
+    __m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
+    __m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
+    __m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
+    __m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16));
+
+    __m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
+    __m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
+    __m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
+    __m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
+    __m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
+    __m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16));
+
+    v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
+    v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
+    v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
+    v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16));
+    v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
+    v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16));
+}
+
+inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
+                                 __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
+{
+    __m128i v_mask = _mm_set1_epi32(0x0000ffff);
+
+    __m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
+    __m128i layer3_chunk4 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16));
+    __m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
+    __m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16));
+    __m128i layer3_chunk2 = _mm_packus_epi32(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
+    __m128i layer3_chunk6 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16));
+    __m128i layer3_chunk3 = _mm_packus_epi32(_mm_and_si128(v_a0, v_mask), _mm_and_si128(v_a1, v_mask));
+    __m128i layer3_chunk7 = _mm_packus_epi32(_mm_srli_epi32(v_a0, 16), _mm_srli_epi32(v_a1, 16));
+
+    __m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
+    __m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
+    __m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
+    __m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
+    __m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
+    __m128i layer2_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16));
+    __m128i layer2_chunk3 = _mm_packus_epi32(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask));
+    __m128i layer2_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk6, 16), _mm_srli_epi32(layer3_chunk7, 16));
+
+    __m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
+    __m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
+    __m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
+    __m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
+    __m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
+    __m128i layer1_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16));
+    __m128i layer1_chunk3 = _mm_packus_epi32(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask));
+    __m128i layer1_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk6, 16), _mm_srli_epi32(layer2_chunk7, 16));
+
+    v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
+    v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
+    v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
+    v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16));
+    v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
+    v_a0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16));
+    v_g1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk6, v_mask), _mm_and_si128(layer1_chunk7, v_mask));
+    v_a1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk6, 16), _mm_srli_epi32(layer1_chunk7, 16));
+}
+
+#endif // CV_SSE4_1
+
+inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1)
+{
+    __m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g0);
+    __m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g0);
+    __m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_g1);
+    __m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_g1);
+
+    __m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk2);
+    __m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk2);
+    __m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk3);
+    __m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk3);
+
+    v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk2);
+    v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk2);
+    v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk3);
+    v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk3);
+}
+
+inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
+                                __m128 & v_g1, __m128 & v_b0, __m128 & v_b1)
+{
+    __m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g1);
+    __m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g1);
+    __m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b0);
+    __m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b0);
+    __m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_b1);
+    __m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_b1);
+
+    __m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk3);
+    __m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk3);
+    __m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk4);
+    __m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk4);
+    __m128 layer2_chunk4 = _mm_unpacklo_ps(layer1_chunk2, layer1_chunk5);
+    __m128 layer2_chunk5 = _mm_unpackhi_ps(layer1_chunk2, layer1_chunk5);
+
+    v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk3);
+    v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk3);
+    v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk4);
+    v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk4);
+    v_b0 = _mm_unpacklo_ps(layer2_chunk2, layer2_chunk5);
+    v_b1 = _mm_unpackhi_ps(layer2_chunk2, layer2_chunk5);
+}
+
+inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1,
+                                __m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1)
+{
+    __m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_b0);
+    __m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_b0);
+    __m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b1);
+    __m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b1);
+    __m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_a0);
+    __m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_a0);
+    __m128 layer1_chunk6 = _mm_unpacklo_ps(v_g1, v_a1);
+    __m128 layer1_chunk7 = _mm_unpackhi_ps(v_g1, v_a1);
+
+    __m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk4);
+    __m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk4);
+    __m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk5);
+    __m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk5);
+    __m128 layer2_chunk4 = _mm_unpacklo_ps(layer1_chunk2, layer1_chunk6);
+    __m128 layer2_chunk5 = _mm_unpackhi_ps(layer1_chunk2, layer1_chunk6);
+    __m128 layer2_chunk6 = _mm_unpacklo_ps(layer1_chunk3, layer1_chunk7);
+    __m128 layer2_chunk7 = _mm_unpackhi_ps(layer1_chunk3, layer1_chunk7);
+
+    v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk4);
+    v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk4);
+    v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk5);
+    v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk5);
+    v_b0 = _mm_unpacklo_ps(layer2_chunk2, layer2_chunk6);
+    v_b1 = _mm_unpackhi_ps(layer2_chunk2, layer2_chunk6);
+    v_a0 = _mm_unpacklo_ps(layer2_chunk3, layer2_chunk7);
+    v_a1 = _mm_unpackhi_ps(layer2_chunk3, layer2_chunk7);
+}
+
+inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1)
+{
+    const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
+
+    __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
+    __m128 layer2_chunk2 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
+    __m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo);
+    __m128 layer2_chunk3 = _mm_shuffle_ps(v_g0, v_g1, mask_hi);
+
+    __m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo);
+    __m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi);
+    __m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo);
+    __m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi);
+
+    v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo);
+    v_g0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi);
+    v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo);
+    v_g1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi);
+}
+
+inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
+                              __m128 & v_g1, __m128 & v_b0, __m128 & v_b1)
+{
+    const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
+
+    __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
+    __m128 layer2_chunk3 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
+    __m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo);
+    __m128 layer2_chunk4 = _mm_shuffle_ps(v_g0, v_g1, mask_hi);
+    __m128 layer2_chunk2 = _mm_shuffle_ps(v_b0, v_b1, mask_lo);
+    __m128 layer2_chunk5 = _mm_shuffle_ps(v_b0, v_b1, mask_hi);
+
+    __m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo);
+    __m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi);
+    __m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo);
+    __m128 layer1_chunk4 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi);
+    __m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_lo);
+    __m128 layer1_chunk5 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_hi);
+
+    v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo);
+    v_g1 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi);
+    v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo);
+    v_b0 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi);
+    v_g0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_lo);
+    v_b1 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_hi);
+}
+
+inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1,
+                              __m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1)
+{
+    const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
+
+    __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
+    __m128 layer2_chunk4 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
+    __m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo);
+    __m128 layer2_chunk5 = _mm_shuffle_ps(v_g0, v_g1, mask_hi);
+    __m128 layer2_chunk2 = _mm_shuffle_ps(v_b0, v_b1, mask_lo);
+    __m128 layer2_chunk6 = _mm_shuffle_ps(v_b0, v_b1, mask_hi);
+    __m128 layer2_chunk3 = _mm_shuffle_ps(v_a0, v_a1, mask_lo);
+    __m128 layer2_chunk7 = _mm_shuffle_ps(v_a0, v_a1, mask_hi);
+
+    __m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo);
+    __m128 layer1_chunk4 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi);
+    __m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo);
+    __m128 layer1_chunk5 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi);
+    __m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_lo);
+    __m128 layer1_chunk6 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_hi);
+    __m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk6, layer2_chunk7, mask_lo);
+    __m128 layer1_chunk7 = _mm_shuffle_ps(layer2_chunk6, layer2_chunk7, mask_hi);
+
+    v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo);
+    v_b0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi);
+    v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo);
+    v_b1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi);
+    v_g0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_lo);
+    v_a0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_hi);
+    v_g1 = _mm_shuffle_ps(layer1_chunk6, layer1_chunk7, mask_lo);
+    v_a1 = _mm_shuffle_ps(layer1_chunk6, layer1_chunk7, mask_hi);
+}
+
+#endif // CV_SSE2
+
+//! @}
+
+#endif //OPENCV_CORE_SSE_UTILS_HPP
diff --git a/thirdparty/linux/include/opencv2/core/traits.hpp b/thirdparty/linux/include/opencv2/core/traits.hpp
new file mode 100644
index 0000000..f83b05f
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/traits.hpp
@@ -0,0 +1,326 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_TRAITS_HPP
+#define OPENCV_CORE_TRAITS_HPP
+
+#include "opencv2/core/cvdef.h"
+
+namespace cv
+{
+
+//! @addtogroup core_basic
+//! @{
+
+/** @brief Template "trait" class for OpenCV primitive data types.
+
+A primitive OpenCV data type is one of unsigned char, bool, signed char, unsigned short, signed
+short, int, float, double, or a tuple of values of one of these types, where all the values in the
+tuple have the same type. Any primitive type from the list can be defined by an identifier in the
+form CV_\<bit-depth\>{U|S|F}C(\<number_of_channels\>), for example: uchar \~ CV_8UC1, 3-element
+floating-point tuple \~ CV_32FC3, and so on. A universal OpenCV structure that is able to store a
+single instance of such a primitive data type is Vec. Multiple instances of such a type can be
+stored in a std::vector, Mat, Mat_, SparseMat, SparseMat_, or any other container that is able to
+store Vec instances.
+
+The DataType class is basically used to provide a description of such primitive data types without
+adding any fields or methods to the corresponding classes (and it is actually impossible to add
+anything to primitive C/C++ data types). This technique is known in C++ as class traits. It is not
+DataType itself that is used but its specialized versions, such as:
+@code
+    template<> class DataType<uchar>
+    {
+        typedef uchar value_type;
+        typedef int work_type;
+        typedef uchar channel_type;
+        enum { channel_type = CV_8U, channels = 1, fmt='u', type = CV_8U };
+    };
+    ...
+    template<typename _Tp> DataType<std::complex<_Tp> >
+    {
+        typedef std::complex<_Tp> value_type;
+        typedef std::complex<_Tp> work_type;
+        typedef _Tp channel_type;
+        // DataDepth is another helper trait class
+        enum { depth = DataDepth<_Tp>::value, channels=2,
+            fmt=(channels-1)*256+DataDepth<_Tp>::fmt,
+            type=CV_MAKETYPE(depth, channels) };
+    };
+    ...
+@endcode
+The main purpose of this class is to convert compilation-time type information to an
+OpenCV-compatible data type identifier, for example:
+@code
+    // allocates a 30x40 floating-point matrix
+    Mat A(30, 40, DataType<float>::type);
+
+    Mat B = Mat_<std::complex<double> >(3, 3);
+    // the statement below will print 6, 2 , that is depth == CV_64F, channels == 2
+    cout << B.depth() << ", " << B.channels() << endl;
+@endcode
+So, such traits are used to tell OpenCV which data type you are working with, even if such a type is
+not native to OpenCV. For example, the matrix B initialization above is compiled because OpenCV
+defines the proper specialized template class DataType\<complex\<_Tp\> \> . This mechanism is also
+useful (and used in OpenCV this way) for generic algorithms implementations.
+*/
+template<typename _Tp> class DataType
+{
+public:
+    typedef _Tp         value_type;
+    typedef value_type  work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 1,
+           depth        = -1,
+           channels     = 1,
+           fmt          = 0,
+           type = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<bool>
+{
+public:
+    typedef bool        value_type;
+    typedef int         work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_8U,
+           channels     = 1,
+           fmt          = (int)'u',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<uchar>
+{
+public:
+    typedef uchar       value_type;
+    typedef int         work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_8U,
+           channels     = 1,
+           fmt          = (int)'u',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<schar>
+{
+public:
+    typedef schar       value_type;
+    typedef int         work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_8S,
+           channels     = 1,
+           fmt          = (int)'c',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<char>
+{
+public:
+    typedef schar       value_type;
+    typedef int         work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_8S,
+           channels     = 1,
+           fmt          = (int)'c',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<ushort>
+{
+public:
+    typedef ushort      value_type;
+    typedef int         work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_16U,
+           channels     = 1,
+           fmt          = (int)'w',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<short>
+{
+public:
+    typedef short       value_type;
+    typedef int         work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_16S,
+           channels     = 1,
+           fmt          = (int)'s',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<int>
+{
+public:
+    typedef int         value_type;
+    typedef value_type  work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_32S,
+           channels     = 1,
+           fmt          = (int)'i',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<float>
+{
+public:
+    typedef float       value_type;
+    typedef value_type  work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_32F,
+           channels     = 1,
+           fmt          = (int)'f',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<double>
+{
+public:
+    typedef double      value_type;
+    typedef value_type  work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_64F,
+           channels     = 1,
+           fmt          = (int)'d',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+
+/** @brief A helper class for cv::DataType
+
+The class is specialized for each fundamental numerical data type supported by OpenCV. It provides
+DataDepth<T>::value constant.
+*/
+template<typename _Tp> class DataDepth
+{
+public:
+    enum
+    {
+        value = DataType<_Tp>::depth,
+        fmt   = DataType<_Tp>::fmt
+    };
+};
+
+
+
+template<int _depth> class TypeDepth
+{
+    enum { depth = CV_USRTYPE1 };
+    typedef void value_type;
+};
+
+template<> class TypeDepth<CV_8U>
+{
+    enum { depth = CV_8U };
+    typedef uchar value_type;
+};
+
+template<> class TypeDepth<CV_8S>
+{
+    enum { depth = CV_8S };
+    typedef schar value_type;
+};
+
+template<> class TypeDepth<CV_16U>
+{
+    enum { depth = CV_16U };
+    typedef ushort value_type;
+};
+
+template<> class TypeDepth<CV_16S>
+{
+    enum { depth = CV_16S };
+    typedef short value_type;
+};
+
+template<> class TypeDepth<CV_32S>
+{
+    enum { depth = CV_32S };
+    typedef int value_type;
+};
+
+template<> class TypeDepth<CV_32F>
+{
+    enum { depth = CV_32F };
+    typedef float value_type;
+};
+
+template<> class TypeDepth<CV_64F>
+{
+    enum { depth = CV_64F };
+    typedef double value_type;
+};
+
+//! @}
+
+} // cv
+
+#endif // OPENCV_CORE_TRAITS_HPP
diff --git a/thirdparty/linux/include/opencv2/core/types.hpp b/thirdparty/linux/include/opencv2/core/types.hpp
new file mode 100644
index 0000000..d5c64ca
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/types.hpp
@@ -0,0 +1,2264 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_TYPES_HPP
+#define OPENCV_CORE_TYPES_HPP
+
+#ifndef __cplusplus
+#  error types.hpp header must be compiled as C++
+#endif
+
+#include <climits>
+#include <cfloat>
+#include <vector>
+#include <limits>
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/cvstd.hpp"
+#include "opencv2/core/matx.hpp"
+
+namespace cv
+{
+
+//! @addtogroup core_basic
+//! @{
+
+//////////////////////////////// Complex //////////////////////////////
+
+/** @brief  A complex number class.
+
+  The template class is similar and compatible with std::complex, however it provides slightly
+  more convenient access to the real and imaginary parts using through the simple field access, as opposite
+  to std::complex::real() and std::complex::imag().
+*/
+template<typename _Tp> class Complex
+{
+public:
+
+    //! constructors
+    Complex();
+    Complex( _Tp _re, _Tp _im = 0 );
+
+    //! conversion to another data type
+    template<typename T2> operator Complex<T2>() const;
+    //! conjugation
+    Complex conj() const;
+
+    _Tp re, im; //< the real and the imaginary parts
+};
+
+typedef Complex<float> Complexf;
+typedef Complex<double> Complexd;
+
+template<typename _Tp> class DataType< Complex<_Tp> >
+{
+public:
+    typedef Complex<_Tp> value_type;
+    typedef value_type   work_type;
+    typedef _Tp          channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = 2,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels) };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+
+
+//////////////////////////////// Point_ ////////////////////////////////
+
+/** @brief Template class for 2D points specified by its coordinates `x` and `y`.
+
+An instance of the class is interchangeable with C structures, CvPoint and CvPoint2D32f . There is
+also a cast operator to convert point coordinates to the specified type. The conversion from
+floating-point coordinates to integer coordinates is done by rounding. Commonly, the conversion
+uses this operation for each of the coordinates. Besides the class members listed in the
+declaration above, the following operations on points are implemented:
+@code
+    pt1 = pt2 + pt3;
+    pt1 = pt2 - pt3;
+    pt1 = pt2 * a;
+    pt1 = a * pt2;
+    pt1 = pt2 / a;
+    pt1 += pt2;
+    pt1 -= pt2;
+    pt1 *= a;
+    pt1 /= a;
+    double value = norm(pt); // L2 norm
+    pt1 == pt2;
+    pt1 != pt2;
+@endcode
+For your convenience, the following type aliases are defined:
+@code
+    typedef Point_<int> Point2i;
+    typedef Point2i Point;
+    typedef Point_<float> Point2f;
+    typedef Point_<double> Point2d;
+@endcode
+Example:
+@code
+    Point2f a(0.3f, 0.f), b(0.f, 0.4f);
+    Point pt = (a + b)*10.f;
+    cout << pt.x << ", " << pt.y << endl;
+@endcode
+*/
+template<typename _Tp> class Point_
+{
+public:
+    typedef _Tp value_type;
+
+    // various constructors
+    Point_();
+    Point_(_Tp _x, _Tp _y);
+    Point_(const Point_& pt);
+    Point_(const Size_<_Tp>& sz);
+    Point_(const Vec<_Tp, 2>& v);
+
+    Point_& operator = (const Point_& pt);
+    //! conversion to another data type
+    template<typename _Tp2> operator Point_<_Tp2>() const;
+
+    //! conversion to the old-style C structures
+    operator Vec<_Tp, 2>() const;
+
+    //! dot product
+    _Tp dot(const Point_& pt) const;
+    //! dot product computed in double-precision arithmetics
+    double ddot(const Point_& pt) const;
+    //! cross-product
+    double cross(const Point_& pt) const;
+    //! checks whether the point is inside the specified rectangle
+    bool inside(const Rect_<_Tp>& r) const;
+
+    _Tp x, y; //< the point coordinates
+};
+
+typedef Point_<int> Point2i;
+typedef Point_<int64> Point2l;
+typedef Point_<float> Point2f;
+typedef Point_<double> Point2d;
+typedef Point2i Point;
+
+template<typename _Tp> class DataType< Point_<_Tp> >
+{
+public:
+    typedef Point_<_Tp>                               value_type;
+    typedef Point_<typename DataType<_Tp>::work_type> work_type;
+    typedef _Tp                                       channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = 2,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels)
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+
+
+//////////////////////////////// Point3_ ////////////////////////////////
+
+/** @brief Template class for 3D points specified by its coordinates `x`, `y` and `z`.
+
+An instance of the class is interchangeable with the C structure CvPoint2D32f . Similarly to
+Point_ , the coordinates of 3D points can be converted to another type. The vector arithmetic and
+comparison operations are also supported.
+
+The following Point3_\<\> aliases are available:
+@code
+    typedef Point3_<int> Point3i;
+    typedef Point3_<float> Point3f;
+    typedef Point3_<double> Point3d;
+@endcode
+@see cv::Point3i, cv::Point3f and cv::Point3d
+*/
+template<typename _Tp> class Point3_
+{
+public:
+    typedef _Tp value_type;
+
+    // various constructors
+    Point3_();
+    Point3_(_Tp _x, _Tp _y, _Tp _z);
+    Point3_(const Point3_& pt);
+    explicit Point3_(const Point_<_Tp>& pt);
+    Point3_(const Vec<_Tp, 3>& v);
+
+    Point3_& operator = (const Point3_& pt);
+    //! conversion to another data type
+    template<typename _Tp2> operator Point3_<_Tp2>() const;
+    //! conversion to cv::Vec<>
+#if OPENCV_ABI_COMPATIBILITY > 300
+    template<typename _Tp2> operator Vec<_Tp2, 3>() const;
+#else
+    operator Vec<_Tp, 3>() const;
+#endif
+
+    //! dot product
+    _Tp dot(const Point3_& pt) const;
+    //! dot product computed in double-precision arithmetics
+    double ddot(const Point3_& pt) const;
+    //! cross product of the 2 3D points
+    Point3_ cross(const Point3_& pt) const;
+
+    _Tp x, y, z; //< the point coordinates
+};
+
+typedef Point3_<int> Point3i;
+typedef Point3_<float> Point3f;
+typedef Point3_<double> Point3d;
+
+template<typename _Tp> class DataType< Point3_<_Tp> >
+{
+public:
+    typedef Point3_<_Tp>                               value_type;
+    typedef Point3_<typename DataType<_Tp>::work_type> work_type;
+    typedef _Tp                                        channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = 3,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels)
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+
+
+//////////////////////////////// Size_ ////////////////////////////////
+
+/** @brief Template class for specifying the size of an image or rectangle.
+
+The class includes two members called width and height. The structure can be converted to and from
+the old OpenCV structures CvSize and CvSize2D32f . The same set of arithmetic and comparison
+operations as for Point_ is available.
+
+OpenCV defines the following Size_\<\> aliases:
+@code
+    typedef Size_<int> Size2i;
+    typedef Size2i Size;
+    typedef Size_<float> Size2f;
+@endcode
+*/
+template<typename _Tp> class Size_
+{
+public:
+    typedef _Tp value_type;
+
+    //! various constructors
+    Size_();
+    Size_(_Tp _width, _Tp _height);
+    Size_(const Size_& sz);
+    Size_(const Point_<_Tp>& pt);
+
+    Size_& operator = (const Size_& sz);
+    //! the area (width*height)
+    _Tp area() const;
+
+    //! conversion of another data type.
+    template<typename _Tp2> operator Size_<_Tp2>() const;
+
+    _Tp width, height; // the width and the height
+};
+
+typedef Size_<int> Size2i;
+typedef Size_<int64> Size2l;
+typedef Size_<float> Size2f;
+typedef Size_<double> Size2d;
+typedef Size2i Size;
+
+template<typename _Tp> class DataType< Size_<_Tp> >
+{
+public:
+    typedef Size_<_Tp>                               value_type;
+    typedef Size_<typename DataType<_Tp>::work_type> work_type;
+    typedef _Tp                                      channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = 2,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels)
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+
+
+//////////////////////////////// Rect_ ////////////////////////////////
+
+/** @brief Template class for 2D rectangles
+
+described by the following parameters:
+-   Coordinates of the top-left corner. This is a default interpretation of Rect_::x and Rect_::y
+    in OpenCV. Though, in your algorithms you may count x and y from the bottom-left corner.
+-   Rectangle width and height.
+
+OpenCV typically assumes that the top and left boundary of the rectangle are inclusive, while the
+right and bottom boundaries are not. For example, the method Rect_::contains returns true if
+
+\f[x  \leq pt.x < x+width,
+      y  \leq pt.y < y+height\f]
+
+Virtually every loop over an image ROI in OpenCV (where ROI is specified by Rect_\<int\> ) is
+implemented as:
+@code
+    for(int y = roi.y; y < roi.y + roi.height; y++)
+        for(int x = roi.x; x < roi.x + roi.width; x++)
+        {
+            // ...
+        }
+@endcode
+In addition to the class members, the following operations on rectangles are implemented:
+-   \f$\texttt{rect} = \texttt{rect} \pm \texttt{point}\f$ (shifting a rectangle by a certain offset)
+-   \f$\texttt{rect} = \texttt{rect} \pm \texttt{size}\f$ (expanding or shrinking a rectangle by a
+    certain amount)
+-   rect += point, rect -= point, rect += size, rect -= size (augmenting operations)
+-   rect = rect1 & rect2 (rectangle intersection)
+-   rect = rect1 | rect2 (minimum area rectangle containing rect1 and rect2 )
+-   rect &= rect1, rect |= rect1 (and the corresponding augmenting operations)
+-   rect == rect1, rect != rect1 (rectangle comparison)
+
+This is an example how the partial ordering on rectangles can be established (rect1 \f$\subseteq\f$
+rect2):
+@code
+    template<typename _Tp> inline bool
+    operator <= (const Rect_<_Tp>& r1, const Rect_<_Tp>& r2)
+    {
+        return (r1 & r2) == r1;
+    }
+@endcode
+For your convenience, the Rect_\<\> alias is available: cv::Rect
+*/
+template<typename _Tp> class Rect_
+{
+public:
+    typedef _Tp value_type;
+
+    //! various constructors
+    Rect_();
+    Rect_(_Tp _x, _Tp _y, _Tp _width, _Tp _height);
+    Rect_(const Rect_& r);
+    Rect_(const Point_<_Tp>& org, const Size_<_Tp>& sz);
+    Rect_(const Point_<_Tp>& pt1, const Point_<_Tp>& pt2);
+
+    Rect_& operator = ( const Rect_& r );
+    //! the top-left corner
+    Point_<_Tp> tl() const;
+    //! the bottom-right corner
+    Point_<_Tp> br() const;
+
+    //! size (width, height) of the rectangle
+    Size_<_Tp> size() const;
+    //! area (width*height) of the rectangle
+    _Tp area() const;
+
+    //! conversion to another data type
+    template<typename _Tp2> operator Rect_<_Tp2>() const;
+
+    //! checks whether the rectangle contains the point
+    bool contains(const Point_<_Tp>& pt) const;
+
+    _Tp x, y, width, height; //< the top-left corner, as well as width and height of the rectangle
+};
+
+typedef Rect_<int> Rect2i;
+typedef Rect_<float> Rect2f;
+typedef Rect_<double> Rect2d;
+typedef Rect2i Rect;
+
+template<typename _Tp> class DataType< Rect_<_Tp> >
+{
+public:
+    typedef Rect_<_Tp>                               value_type;
+    typedef Rect_<typename DataType<_Tp>::work_type> work_type;
+    typedef _Tp                                      channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = 4,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels)
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+
+
+///////////////////////////// RotatedRect /////////////////////////////
+
+/** @brief The class represents rotated (i.e. not up-right) rectangles on a plane.
+
+Each rectangle is specified by the center point (mass center), length of each side (represented by
+cv::Size2f structure) and the rotation angle in degrees.
+
+The sample below demonstrates how to use RotatedRect:
+@code
+    Mat image(200, 200, CV_8UC3, Scalar(0));
+    RotatedRect rRect = RotatedRect(Point2f(100,100), Size2f(100,50), 30);
+
+    Point2f vertices[4];
+    rRect.points(vertices);
+    for (int i = 0; i < 4; i++)
+        line(image, vertices[i], vertices[(i+1)%4], Scalar(0,255,0));
+
+    Rect brect = rRect.boundingRect();
+    rectangle(image, brect, Scalar(255,0,0));
+
+    imshow("rectangles", image);
+    waitKey(0);
+@endcode
+![image](pics/rotatedrect.png)
+
+@sa CamShift, fitEllipse, minAreaRect, CvBox2D
+*/
+class CV_EXPORTS RotatedRect
+{
+public:
+    //! various constructors
+    RotatedRect();
+    /**
+    @param center The rectangle mass center.
+    @param size Width and height of the rectangle.
+    @param angle The rotation angle in a clockwise direction. When the angle is 0, 90, 180, 270 etc.,
+    the rectangle becomes an up-right rectangle.
+    */
+    RotatedRect(const Point2f& center, const Size2f& size, float angle);
+    /**
+    Any 3 end points of the RotatedRect. They must be given in order (either clockwise or
+    anticlockwise).
+     */
+    RotatedRect(const Point2f& point1, const Point2f& point2, const Point2f& point3);
+
+    /** returns 4 vertices of the rectangle
+    @param pts The points array for storing rectangle vertices.
+    */
+    void points(Point2f pts[]) const;
+    //! returns the minimal up-right integer rectangle containing the rotated rectangle
+    Rect boundingRect() const;
+    //! returns the minimal (exact) floating point rectangle containing the rotated rectangle, not intended for use with images
+    Rect_<float> boundingRect2f() const;
+
+    Point2f center; //< the rectangle mass center
+    Size2f size;    //< width and height of the rectangle
+    float angle;    //< the rotation angle. When the angle is 0, 90, 180, 270 etc., the rectangle becomes an up-right rectangle.
+};
+
+template<> class DataType< RotatedRect >
+{
+public:
+    typedef RotatedRect  value_type;
+    typedef value_type   work_type;
+    typedef float        channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = (int)sizeof(value_type)/sizeof(channel_type), // 5
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels)
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+
+
+//////////////////////////////// Range /////////////////////////////////
+
+/** @brief Template class specifying a continuous subsequence (slice) of a sequence.
+
+The class is used to specify a row or a column span in a matrix ( Mat ) and for many other purposes.
+Range(a,b) is basically the same as a:b in Matlab or a..b in Python. As in Python, start is an
+inclusive left boundary of the range and end is an exclusive right boundary of the range. Such a
+half-opened interval is usually denoted as \f$[start,end)\f$ .
+
+The static method Range::all() returns a special variable that means "the whole sequence" or "the
+whole range", just like " : " in Matlab or " ... " in Python. All the methods and functions in
+OpenCV that take Range support this special Range::all() value. But, of course, in case of your own
+custom processing, you will probably have to check and handle it explicitly:
+@code
+    void my_function(..., const Range& r, ....)
+    {
+        if(r == Range::all()) {
+            // process all the data
+        }
+        else {
+            // process [r.start, r.end)
+        }
+    }
+@endcode
+*/
+class CV_EXPORTS Range
+{
+public:
+    Range();
+    Range(int _start, int _end);
+    int size() const;
+    bool empty() const;
+    static Range all();
+
+    int start, end;
+};
+
+template<> class DataType<Range>
+{
+public:
+    typedef Range      value_type;
+    typedef value_type work_type;
+    typedef int        channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = 2,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels)
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+
+
+//////////////////////////////// Scalar_ ///////////////////////////////
+
+/** @brief Template class for a 4-element vector derived from Vec.
+
+Being derived from Vec\<_Tp, 4\> , Scalar_ and Scalar can be used just as typical 4-element
+vectors. In addition, they can be converted to/from CvScalar . The type Scalar is widely used in
+OpenCV to pass pixel values.
+*/
+template<typename _Tp> class Scalar_ : public Vec<_Tp, 4>
+{
+public:
+    //! various constructors
+    Scalar_();
+    Scalar_(_Tp v0, _Tp v1, _Tp v2=0, _Tp v3=0);
+    Scalar_(_Tp v0);
+
+    template<typename _Tp2, int cn>
+    Scalar_(const Vec<_Tp2, cn>& v);
+
+    //! returns a scalar with all elements set to v0
+    static Scalar_<_Tp> all(_Tp v0);
+
+    //! conversion to another data type
+    template<typename T2> operator Scalar_<T2>() const;
+
+    //! per-element product
+    Scalar_<_Tp> mul(const Scalar_<_Tp>& a, double scale=1 ) const;
+
+    // returns (v0, -v1, -v2, -v3)
+    Scalar_<_Tp> conj() const;
+
+    // returns true iff v1 == v2 == v3 == 0
+    bool isReal() const;
+};
+
+typedef Scalar_<double> Scalar;
+
+template<typename _Tp> class DataType< Scalar_<_Tp> >
+{
+public:
+    typedef Scalar_<_Tp>                               value_type;
+    typedef Scalar_<typename DataType<_Tp>::work_type> work_type;
+    typedef _Tp                                        channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = 4,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels)
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+
+
+/////////////////////////////// KeyPoint ////////////////////////////////
+
+/** @brief Data structure for salient point detectors.
+
+The class instance stores a keypoint, i.e. a point feature found by one of many available keypoint
+detectors, such as Harris corner detector, cv::FAST, cv::StarDetector, cv::SURF, cv::SIFT,
+cv::LDetector etc.
+
+The keypoint is characterized by the 2D position, scale (proportional to the diameter of the
+neighborhood that needs to be taken into account), orientation and some other parameters. The
+keypoint neighborhood is then analyzed by another algorithm that builds a descriptor (usually
+represented as a feature vector). The keypoints representing the same object in different images
+can then be matched using cv::KDTree or another method.
+*/
+class CV_EXPORTS_W_SIMPLE KeyPoint
+{
+public:
+    //! the default constructor
+    CV_WRAP KeyPoint();
+    /**
+    @param _pt x & y coordinates of the keypoint
+    @param _size keypoint diameter
+    @param _angle keypoint orientation
+    @param _response keypoint detector response on the keypoint (that is, strength of the keypoint)
+    @param _octave pyramid octave in which the keypoint has been detected
+    @param _class_id object id
+     */
+    KeyPoint(Point2f _pt, float _size, float _angle=-1, float _response=0, int _octave=0, int _class_id=-1);
+    /**
+    @param x x-coordinate of the keypoint
+    @param y y-coordinate of the keypoint
+    @param _size keypoint diameter
+    @param _angle keypoint orientation
+    @param _response keypoint detector response on the keypoint (that is, strength of the keypoint)
+    @param _octave pyramid octave in which the keypoint has been detected
+    @param _class_id object id
+     */
+    CV_WRAP KeyPoint(float x, float y, float _size, float _angle=-1, float _response=0, int _octave=0, int _class_id=-1);
+
+    size_t hash() const;
+
+    /**
+    This method converts vector of keypoints to vector of points or the reverse, where each keypoint is
+    assigned the same size and the same orientation.
+
+    @param keypoints Keypoints obtained from any feature detection algorithm like SIFT/SURF/ORB
+    @param points2f Array of (x,y) coordinates of each keypoint
+    @param keypointIndexes Array of indexes of keypoints to be converted to points. (Acts like a mask to
+    convert only specified keypoints)
+    */
+    CV_WRAP static void convert(const std::vector<KeyPoint>& keypoints,
+                                CV_OUT std::vector<Point2f>& points2f,
+                                const std::vector<int>& keypointIndexes=std::vector<int>());
+    /** @overload
+    @param points2f Array of (x,y) coordinates of each keypoint
+    @param keypoints Keypoints obtained from any feature detection algorithm like SIFT/SURF/ORB
+    @param size keypoint diameter
+    @param response keypoint detector response on the keypoint (that is, strength of the keypoint)
+    @param octave pyramid octave in which the keypoint has been detected
+    @param class_id object id
+    */
+    CV_WRAP static void convert(const std::vector<Point2f>& points2f,
+                                CV_OUT std::vector<KeyPoint>& keypoints,
+                                float size=1, float response=1, int octave=0, int class_id=-1);
+
+    /**
+    This method computes overlap for pair of keypoints. Overlap is the ratio between area of keypoint
+    regions' intersection and area of keypoint regions' union (considering keypoint region as circle).
+    If they don't overlap, we get zero. If they coincide at same location with same size, we get 1.
+    @param kp1 First keypoint
+    @param kp2 Second keypoint
+    */
+    CV_WRAP static float overlap(const KeyPoint& kp1, const KeyPoint& kp2);
+
+    CV_PROP_RW Point2f pt; //!< coordinates of the keypoints
+    CV_PROP_RW float size; //!< diameter of the meaningful keypoint neighborhood
+    CV_PROP_RW float angle; //!< computed orientation of the keypoint (-1 if not applicable);
+                            //!< it's in [0,360) degrees and measured relative to
+                            //!< image coordinate system, ie in clockwise.
+    CV_PROP_RW float response; //!< the response by which the most strong keypoints have been selected. Can be used for the further sorting or subsampling
+    CV_PROP_RW int octave; //!< octave (pyramid layer) from which the keypoint has been extracted
+    CV_PROP_RW int class_id; //!< object class (if the keypoints need to be clustered by an object they belong to)
+};
+
+template<> class DataType<KeyPoint>
+{
+public:
+    typedef KeyPoint      value_type;
+    typedef float         work_type;
+    typedef float         channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = (int)(sizeof(value_type)/sizeof(channel_type)), // 7
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels)
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+
+
+//////////////////////////////// DMatch /////////////////////////////////
+
+/** @brief Class for matching keypoint descriptors
+
+query descriptor index, train descriptor index, train image index, and distance between
+descriptors.
+*/
+class CV_EXPORTS_W_SIMPLE DMatch
+{
+public:
+    CV_WRAP DMatch();
+    CV_WRAP DMatch(int _queryIdx, int _trainIdx, float _distance);
+    CV_WRAP DMatch(int _queryIdx, int _trainIdx, int _imgIdx, float _distance);
+
+    CV_PROP_RW int queryIdx; // query descriptor index
+    CV_PROP_RW int trainIdx; // train descriptor index
+    CV_PROP_RW int imgIdx;   // train image index
+
+    CV_PROP_RW float distance;
+
+    // less is better
+    bool operator<(const DMatch &m) const;
+};
+
+template<> class DataType<DMatch>
+{
+public:
+    typedef DMatch      value_type;
+    typedef int         work_type;
+    typedef int         channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = (int)(sizeof(value_type)/sizeof(channel_type)), // 4
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels)
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+
+
+///////////////////////////// TermCriteria //////////////////////////////
+
+/** @brief The class defining termination criteria for iterative algorithms.
+
+You can initialize it by default constructor and then override any parameters, or the structure may
+be fully initialized using the advanced variant of the constructor.
+*/
+class CV_EXPORTS TermCriteria
+{
+public:
+    /**
+      Criteria type, can be one of: COUNT, EPS or COUNT + EPS
+    */
+    enum Type
+    {
+        COUNT=1, //!< the maximum number of iterations or elements to compute
+        MAX_ITER=COUNT, //!< ditto
+        EPS=2 //!< the desired accuracy or change in parameters at which the iterative algorithm stops
+    };
+
+    //! default constructor
+    TermCriteria();
+    /**
+    @param type The type of termination criteria, one of TermCriteria::Type
+    @param maxCount The maximum number of iterations or elements to compute.
+    @param epsilon The desired accuracy or change in parameters at which the iterative algorithm stops.
+    */
+    TermCriteria(int type, int maxCount, double epsilon);
+
+    int type; //!< the type of termination criteria: COUNT, EPS or COUNT + EPS
+    int maxCount; // the maximum number of iterations/elements
+    double epsilon; // the desired accuracy
+};
+
+
+//! @} core_basic
+
+///////////////////////// raster image moments //////////////////////////
+
+//! @addtogroup imgproc_shape
+//! @{
+
+/** @brief struct returned by cv::moments
+
+The spatial moments \f$\texttt{Moments::m}_{ji}\f$ are computed as:
+
+\f[\texttt{m} _{ji}= \sum _{x,y}  \left ( \texttt{array} (x,y)  \cdot x^j  \cdot y^i \right )\f]
+
+The central moments \f$\texttt{Moments::mu}_{ji}\f$ are computed as:
+
+\f[\texttt{mu} _{ji}= \sum _{x,y}  \left ( \texttt{array} (x,y)  \cdot (x -  \bar{x} )^j  \cdot (y -  \bar{y} )^i \right )\f]
+
+where \f$(\bar{x}, \bar{y})\f$ is the mass center:
+
+\f[\bar{x} = \frac{\texttt{m}_{10}}{\texttt{m}_{00}} , \; \bar{y} = \frac{\texttt{m}_{01}}{\texttt{m}_{00}}\f]
+
+The normalized central moments \f$\texttt{Moments::nu}_{ij}\f$ are computed as:
+
+\f[\texttt{nu} _{ji}= \frac{\texttt{mu}_{ji}}{\texttt{m}_{00}^{(i+j)/2+1}} .\f]
+
+@note
+\f$\texttt{mu}_{00}=\texttt{m}_{00}\f$, \f$\texttt{nu}_{00}=1\f$
+\f$\texttt{nu}_{10}=\texttt{mu}_{10}=\texttt{mu}_{01}=\texttt{mu}_{10}=0\f$ , hence the values are not
+stored.
+
+The moments of a contour are defined in the same way but computed using the Green's formula (see
+<http://en.wikipedia.org/wiki/Green_theorem>). So, due to a limited raster resolution, the moments
+computed for a contour are slightly different from the moments computed for the same rasterized
+contour.
+
+@note
+Since the contour moments are computed using Green formula, you may get seemingly odd results for
+contours with self-intersections, e.g. a zero area (m00) for butterfly-shaped contours.
+ */
+class CV_EXPORTS_W_MAP Moments
+{
+public:
+    //! the default constructor
+    Moments();
+    //! the full constructor
+    Moments(double m00, double m10, double m01, double m20, double m11,
+            double m02, double m30, double m21, double m12, double m03 );
+    ////! the conversion from CvMoments
+    //Moments( const CvMoments& moments );
+    ////! the conversion to CvMoments
+    //operator CvMoments() const;
+
+    //! @name spatial moments
+    //! @{
+    CV_PROP_RW double  m00, m10, m01, m20, m11, m02, m30, m21, m12, m03;
+    //! @}
+
+    //! @name central moments
+    //! @{
+    CV_PROP_RW double  mu20, mu11, mu02, mu30, mu21, mu12, mu03;
+    //! @}
+
+    //! @name central normalized moments
+    //! @{
+    CV_PROP_RW double  nu20, nu11, nu02, nu30, nu21, nu12, nu03;
+    //! @}
+};
+
+template<> class DataType<Moments>
+{
+public:
+    typedef Moments     value_type;
+    typedef double      work_type;
+    typedef double      channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = (int)(sizeof(value_type)/sizeof(channel_type)), // 24
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels)
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+//! @} imgproc_shape
+
+//! @cond IGNORED
+
+/////////////////////////////////////////////////////////////////////////
+///////////////////////////// Implementation ////////////////////////////
+/////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////// Complex ////////////////////////////////
+
+template<typename _Tp> inline
+Complex<_Tp>::Complex()
+    : re(0), im(0) {}
+
+template<typename _Tp> inline
+Complex<_Tp>::Complex( _Tp _re, _Tp _im )
+    : re(_re), im(_im) {}
+
+template<typename _Tp> template<typename T2> inline
+Complex<_Tp>::operator Complex<T2>() const
+{
+    return Complex<T2>(saturate_cast<T2>(re), saturate_cast<T2>(im));
+}
+
+template<typename _Tp> inline
+Complex<_Tp> Complex<_Tp>::conj() const
+{
+    return Complex<_Tp>(re, -im);
+}
+
+
+template<typename _Tp> static inline
+bool operator == (const Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    return a.re == b.re && a.im == b.im;
+}
+
+template<typename _Tp> static inline
+bool operator != (const Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    return a.re != b.re || a.im != b.im;
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator + (const Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    return Complex<_Tp>( a.re + b.re, a.im + b.im );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp>& operator += (Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    a.re += b.re; a.im += b.im;
+    return a;
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator - (const Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    return Complex<_Tp>( a.re - b.re, a.im - b.im );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp>& operator -= (Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    a.re -= b.re; a.im -= b.im;
+    return a;
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator - (const Complex<_Tp>& a)
+{
+    return Complex<_Tp>(-a.re, -a.im);
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator * (const Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    return Complex<_Tp>( a.re*b.re - a.im*b.im, a.re*b.im + a.im*b.re );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator * (const Complex<_Tp>& a, _Tp b)
+{
+    return Complex<_Tp>( a.re*b, a.im*b );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator * (_Tp b, const Complex<_Tp>& a)
+{
+    return Complex<_Tp>( a.re*b, a.im*b );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator + (const Complex<_Tp>& a, _Tp b)
+{
+    return Complex<_Tp>( a.re + b, a.im );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator - (const Complex<_Tp>& a, _Tp b)
+{ return Complex<_Tp>( a.re - b, a.im ); }
+
+template<typename _Tp> static inline
+Complex<_Tp> operator + (_Tp b, const Complex<_Tp>& a)
+{
+    return Complex<_Tp>( a.re + b, a.im );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator - (_Tp b, const Complex<_Tp>& a)
+{
+    return Complex<_Tp>( b - a.re, -a.im );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp>& operator += (Complex<_Tp>& a, _Tp b)
+{
+    a.re += b; return a;
+}
+
+template<typename _Tp> static inline
+Complex<_Tp>& operator -= (Complex<_Tp>& a, _Tp b)
+{
+    a.re -= b; return a;
+}
+
+template<typename _Tp> static inline
+Complex<_Tp>& operator *= (Complex<_Tp>& a, _Tp b)
+{
+    a.re *= b; a.im *= b; return a;
+}
+
+template<typename _Tp> static inline
+double abs(const Complex<_Tp>& a)
+{
+    return std::sqrt( (double)a.re*a.re + (double)a.im*a.im);
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator / (const Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    double t = 1./((double)b.re*b.re + (double)b.im*b.im);
+    return Complex<_Tp>( (_Tp)((a.re*b.re + a.im*b.im)*t),
+                        (_Tp)((-a.re*b.im + a.im*b.re)*t) );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp>& operator /= (Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    return (a = a / b);
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator / (const Complex<_Tp>& a, _Tp b)
+{
+    _Tp t = (_Tp)1/b;
+    return Complex<_Tp>( a.re*t, a.im*t );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator / (_Tp b, const Complex<_Tp>& a)
+{
+    return Complex<_Tp>(b)/a;
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator /= (const Complex<_Tp>& a, _Tp b)
+{
+    _Tp t = (_Tp)1/b;
+    a.re *= t; a.im *= t; return a;
+}
+
+
+
+//////////////////////////////// 2D Point ///////////////////////////////
+
+template<typename _Tp> inline
+Point_<_Tp>::Point_()
+    : x(0), y(0) {}
+
+template<typename _Tp> inline
+Point_<_Tp>::Point_(_Tp _x, _Tp _y)
+    : x(_x), y(_y) {}
+
+template<typename _Tp> inline
+Point_<_Tp>::Point_(const Point_& pt)
+    : x(pt.x), y(pt.y) {}
+
+template<typename _Tp> inline
+Point_<_Tp>::Point_(const Size_<_Tp>& sz)
+    : x(sz.width), y(sz.height) {}
+
+template<typename _Tp> inline
+Point_<_Tp>::Point_(const Vec<_Tp,2>& v)
+    : x(v[0]), y(v[1]) {}
+
+template<typename _Tp> inline
+Point_<_Tp>& Point_<_Tp>::operator = (const Point_& pt)
+{
+    x = pt.x; y = pt.y;
+    return *this;
+}
+
+template<typename _Tp> template<typename _Tp2> inline
+Point_<_Tp>::operator Point_<_Tp2>() const
+{
+    return Point_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y));
+}
+
+template<typename _Tp> inline
+Point_<_Tp>::operator Vec<_Tp, 2>() const
+{
+    return Vec<_Tp, 2>(x, y);
+}
+
+template<typename _Tp> inline
+_Tp Point_<_Tp>::dot(const Point_& pt) const
+{
+    return saturate_cast<_Tp>(x*pt.x + y*pt.y);
+}
+
+template<typename _Tp> inline
+double Point_<_Tp>::ddot(const Point_& pt) const
+{
+    return (double)x*pt.x + (double)y*pt.y;
+}
+
+template<typename _Tp> inline
+double Point_<_Tp>::cross(const Point_& pt) const
+{
+    return (double)x*pt.y - (double)y*pt.x;
+}
+
+template<typename _Tp> inline bool
+Point_<_Tp>::inside( const Rect_<_Tp>& r ) const
+{
+    return r.contains(*this);
+}
+
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator += (Point_<_Tp>& a, const Point_<_Tp>& b)
+{
+    a.x += b.x;
+    a.y += b.y;
+    return a;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator -= (Point_<_Tp>& a, const Point_<_Tp>& b)
+{
+    a.x -= b.x;
+    a.y -= b.y;
+    return a;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator *= (Point_<_Tp>& a, int b)
+{
+    a.x = saturate_cast<_Tp>(a.x * b);
+    a.y = saturate_cast<_Tp>(a.y * b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator *= (Point_<_Tp>& a, float b)
+{
+    a.x = saturate_cast<_Tp>(a.x * b);
+    a.y = saturate_cast<_Tp>(a.y * b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator *= (Point_<_Tp>& a, double b)
+{
+    a.x = saturate_cast<_Tp>(a.x * b);
+    a.y = saturate_cast<_Tp>(a.y * b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator /= (Point_<_Tp>& a, int b)
+{
+    a.x = saturate_cast<_Tp>(a.x / b);
+    a.y = saturate_cast<_Tp>(a.y / b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator /= (Point_<_Tp>& a, float b)
+{
+    a.x = saturate_cast<_Tp>(a.x / b);
+    a.y = saturate_cast<_Tp>(a.y / b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator /= (Point_<_Tp>& a, double b)
+{
+    a.x = saturate_cast<_Tp>(a.x / b);
+    a.y = saturate_cast<_Tp>(a.y / b);
+    return a;
+}
+
+template<typename _Tp> static inline
+double norm(const Point_<_Tp>& pt)
+{
+    return std::sqrt((double)pt.x*pt.x + (double)pt.y*pt.y);
+}
+
+template<typename _Tp> static inline
+bool operator == (const Point_<_Tp>& a, const Point_<_Tp>& b)
+{
+    return a.x == b.x && a.y == b.y;
+}
+
+template<typename _Tp> static inline
+bool operator != (const Point_<_Tp>& a, const Point_<_Tp>& b)
+{
+    return a.x != b.x || a.y != b.y;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator + (const Point_<_Tp>& a, const Point_<_Tp>& b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(a.x + b.x), saturate_cast<_Tp>(a.y + b.y) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator - (const Point_<_Tp>& a, const Point_<_Tp>& b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(a.x - b.x), saturate_cast<_Tp>(a.y - b.y) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator - (const Point_<_Tp>& a)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(-a.x), saturate_cast<_Tp>(-a.y) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator * (const Point_<_Tp>& a, int b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator * (int a, const Point_<_Tp>& b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(b.x*a), saturate_cast<_Tp>(b.y*a) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator * (const Point_<_Tp>& a, float b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator * (float a, const Point_<_Tp>& b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(b.x*a), saturate_cast<_Tp>(b.y*a) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator * (const Point_<_Tp>& a, double b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator * (double a, const Point_<_Tp>& b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(b.x*a), saturate_cast<_Tp>(b.y*a) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator * (const Matx<_Tp, 2, 2>& a, const Point_<_Tp>& b)
+{
+    Matx<_Tp, 2, 1> tmp = a * Vec<_Tp,2>(b.x, b.y);
+    return Point_<_Tp>(tmp.val[0], tmp.val[1]);
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (const Matx<_Tp, 3, 3>& a, const Point_<_Tp>& b)
+{
+    Matx<_Tp, 3, 1> tmp = a * Vec<_Tp,3>(b.x, b.y, 1);
+    return Point3_<_Tp>(tmp.val[0], tmp.val[1], tmp.val[2]);
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator / (const Point_<_Tp>& a, int b)
+{
+    Point_<_Tp> tmp(a);
+    tmp /= b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator / (const Point_<_Tp>& a, float b)
+{
+    Point_<_Tp> tmp(a);
+    tmp /= b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator / (const Point_<_Tp>& a, double b)
+{
+    Point_<_Tp> tmp(a);
+    tmp /= b;
+    return tmp;
+}
+
+
+
+//////////////////////////////// 3D Point ///////////////////////////////
+
+template<typename _Tp> inline
+Point3_<_Tp>::Point3_()
+    : x(0), y(0), z(0) {}
+
+template<typename _Tp> inline
+Point3_<_Tp>::Point3_(_Tp _x, _Tp _y, _Tp _z)
+    : x(_x), y(_y), z(_z) {}
+
+template<typename _Tp> inline
+Point3_<_Tp>::Point3_(const Point3_& pt)
+    : x(pt.x), y(pt.y), z(pt.z) {}
+
+template<typename _Tp> inline
+Point3_<_Tp>::Point3_(const Point_<_Tp>& pt)
+    : x(pt.x), y(pt.y), z(_Tp()) {}
+
+template<typename _Tp> inline
+Point3_<_Tp>::Point3_(const Vec<_Tp, 3>& v)
+    : x(v[0]), y(v[1]), z(v[2]) {}
+
+template<typename _Tp> template<typename _Tp2> inline
+Point3_<_Tp>::operator Point3_<_Tp2>() const
+{
+    return Point3_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), saturate_cast<_Tp2>(z));
+}
+
+#if OPENCV_ABI_COMPATIBILITY > 300
+template<typename _Tp> template<typename _Tp2> inline
+Point3_<_Tp>::operator Vec<_Tp2, 3>() const
+{
+    return Vec<_Tp2, 3>(x, y, z);
+}
+#else
+template<typename _Tp> inline
+Point3_<_Tp>::operator Vec<_Tp, 3>() const
+{
+    return Vec<_Tp, 3>(x, y, z);
+}
+#endif
+
+template<typename _Tp> inline
+Point3_<_Tp>& Point3_<_Tp>::operator = (const Point3_& pt)
+{
+    x = pt.x; y = pt.y; z = pt.z;
+    return *this;
+}
+
+template<typename _Tp> inline
+_Tp Point3_<_Tp>::dot(const Point3_& pt) const
+{
+    return saturate_cast<_Tp>(x*pt.x + y*pt.y + z*pt.z);
+}
+
+template<typename _Tp> inline
+double Point3_<_Tp>::ddot(const Point3_& pt) const
+{
+    return (double)x*pt.x + (double)y*pt.y + (double)z*pt.z;
+}
+
+template<typename _Tp> inline
+Point3_<_Tp> Point3_<_Tp>::cross(const Point3_<_Tp>& pt) const
+{
+    return Point3_<_Tp>(y*pt.z - z*pt.y, z*pt.x - x*pt.z, x*pt.y - y*pt.x);
+}
+
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator += (Point3_<_Tp>& a, const Point3_<_Tp>& b)
+{
+    a.x += b.x;
+    a.y += b.y;
+    a.z += b.z;
+    return a;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator -= (Point3_<_Tp>& a, const Point3_<_Tp>& b)
+{
+    a.x -= b.x;
+    a.y -= b.y;
+    a.z -= b.z;
+    return a;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator *= (Point3_<_Tp>& a, int b)
+{
+    a.x = saturate_cast<_Tp>(a.x * b);
+    a.y = saturate_cast<_Tp>(a.y * b);
+    a.z = saturate_cast<_Tp>(a.z * b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator *= (Point3_<_Tp>& a, float b)
+{
+    a.x = saturate_cast<_Tp>(a.x * b);
+    a.y = saturate_cast<_Tp>(a.y * b);
+    a.z = saturate_cast<_Tp>(a.z * b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator *= (Point3_<_Tp>& a, double b)
+{
+    a.x = saturate_cast<_Tp>(a.x * b);
+    a.y = saturate_cast<_Tp>(a.y * b);
+    a.z = saturate_cast<_Tp>(a.z * b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator /= (Point3_<_Tp>& a, int b)
+{
+    a.x = saturate_cast<_Tp>(a.x / b);
+    a.y = saturate_cast<_Tp>(a.y / b);
+    a.z = saturate_cast<_Tp>(a.z / b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator /= (Point3_<_Tp>& a, float b)
+{
+    a.x = saturate_cast<_Tp>(a.x / b);
+    a.y = saturate_cast<_Tp>(a.y / b);
+    a.z = saturate_cast<_Tp>(a.z / b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator /= (Point3_<_Tp>& a, double b)
+{
+    a.x = saturate_cast<_Tp>(a.x / b);
+    a.y = saturate_cast<_Tp>(a.y / b);
+    a.z = saturate_cast<_Tp>(a.z / b);
+    return a;
+}
+
+template<typename _Tp> static inline
+double norm(const Point3_<_Tp>& pt)
+{
+    return std::sqrt((double)pt.x*pt.x + (double)pt.y*pt.y + (double)pt.z*pt.z);
+}
+
+template<typename _Tp> static inline
+bool operator == (const Point3_<_Tp>& a, const Point3_<_Tp>& b)
+{
+    return a.x == b.x && a.y == b.y && a.z == b.z;
+}
+
+template<typename _Tp> static inline
+bool operator != (const Point3_<_Tp>& a, const Point3_<_Tp>& b)
+{
+    return a.x != b.x || a.y != b.y || a.z != b.z;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator + (const Point3_<_Tp>& a, const Point3_<_Tp>& b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(a.x + b.x), saturate_cast<_Tp>(a.y + b.y), saturate_cast<_Tp>(a.z + b.z));
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator - (const Point3_<_Tp>& a, const Point3_<_Tp>& b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(a.x - b.x), saturate_cast<_Tp>(a.y - b.y), saturate_cast<_Tp>(a.z - b.z));
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator - (const Point3_<_Tp>& a)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(-a.x), saturate_cast<_Tp>(-a.y), saturate_cast<_Tp>(-a.z) );
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (const Point3_<_Tp>& a, int b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b), saturate_cast<_Tp>(a.z*b) );
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (int a, const Point3_<_Tp>& b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(b.x * a), saturate_cast<_Tp>(b.y * a), saturate_cast<_Tp>(b.z * a) );
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (const Point3_<_Tp>& a, float b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(a.x * b), saturate_cast<_Tp>(a.y * b), saturate_cast<_Tp>(a.z * b) );
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (float a, const Point3_<_Tp>& b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(b.x * a), saturate_cast<_Tp>(b.y * a), saturate_cast<_Tp>(b.z * a) );
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (const Point3_<_Tp>& a, double b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(a.x * b), saturate_cast<_Tp>(a.y * b), saturate_cast<_Tp>(a.z * b) );
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (double a, const Point3_<_Tp>& b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(b.x * a), saturate_cast<_Tp>(b.y * a), saturate_cast<_Tp>(b.z * a) );
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (const Matx<_Tp, 3, 3>& a, const Point3_<_Tp>& b)
+{
+    Matx<_Tp, 3, 1> tmp = a * Vec<_Tp,3>(b.x, b.y, b.z);
+    return Point3_<_Tp>(tmp.val[0], tmp.val[1], tmp.val[2]);
+}
+
+template<typename _Tp> static inline
+Matx<_Tp, 4, 1> operator * (const Matx<_Tp, 4, 4>& a, const Point3_<_Tp>& b)
+{
+    return a * Matx<_Tp, 4, 1>(b.x, b.y, b.z, 1);
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator / (const Point3_<_Tp>& a, int b)
+{
+    Point3_<_Tp> tmp(a);
+    tmp /= b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator / (const Point3_<_Tp>& a, float b)
+{
+    Point3_<_Tp> tmp(a);
+    tmp /= b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator / (const Point3_<_Tp>& a, double b)
+{
+    Point3_<_Tp> tmp(a);
+    tmp /= b;
+    return tmp;
+}
+
+
+
+////////////////////////////////// Size /////////////////////////////////
+
+template<typename _Tp> inline
+Size_<_Tp>::Size_()
+    : width(0), height(0) {}
+
+template<typename _Tp> inline
+Size_<_Tp>::Size_(_Tp _width, _Tp _height)
+    : width(_width), height(_height) {}
+
+template<typename _Tp> inline
+Size_<_Tp>::Size_(const Size_& sz)
+    : width(sz.width), height(sz.height) {}
+
+template<typename _Tp> inline
+Size_<_Tp>::Size_(const Point_<_Tp>& pt)
+    : width(pt.x), height(pt.y) {}
+
+template<typename _Tp> template<typename _Tp2> inline
+Size_<_Tp>::operator Size_<_Tp2>() const
+{
+    return Size_<_Tp2>(saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height));
+}
+
+template<typename _Tp> inline
+Size_<_Tp>& Size_<_Tp>::operator = (const Size_<_Tp>& sz)
+{
+    width = sz.width; height = sz.height;
+    return *this;
+}
+
+template<typename _Tp> inline
+_Tp Size_<_Tp>::area() const
+{
+    return width * height;
+}
+
+template<typename _Tp> static inline
+Size_<_Tp>& operator *= (Size_<_Tp>& a, _Tp b)
+{
+    a.width *= b;
+    a.height *= b;
+    return a;
+}
+
+template<typename _Tp> static inline
+Size_<_Tp> operator * (const Size_<_Tp>& a, _Tp b)
+{
+    Size_<_Tp> tmp(a);
+    tmp *= b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+Size_<_Tp>& operator /= (Size_<_Tp>& a, _Tp b)
+{
+    a.width /= b;
+    a.height /= b;
+    return a;
+}
+
+template<typename _Tp> static inline
+Size_<_Tp> operator / (const Size_<_Tp>& a, _Tp b)
+{
+    Size_<_Tp> tmp(a);
+    tmp /= b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+Size_<_Tp>& operator += (Size_<_Tp>& a, const Size_<_Tp>& b)
+{
+    a.width += b.width;
+    a.height += b.height;
+    return a;
+}
+
+template<typename _Tp> static inline
+Size_<_Tp> operator + (const Size_<_Tp>& a, const Size_<_Tp>& b)
+{
+    Size_<_Tp> tmp(a);
+    tmp += b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+Size_<_Tp>& operator -= (Size_<_Tp>& a, const Size_<_Tp>& b)
+{
+    a.width -= b.width;
+    a.height -= b.height;
+    return a;
+}
+
+template<typename _Tp> static inline
+Size_<_Tp> operator - (const Size_<_Tp>& a, const Size_<_Tp>& b)
+{
+    Size_<_Tp> tmp(a);
+    tmp -= b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+bool operator == (const Size_<_Tp>& a, const Size_<_Tp>& b)
+{
+    return a.width == b.width && a.height == b.height;
+}
+
+template<typename _Tp> static inline
+bool operator != (const Size_<_Tp>& a, const Size_<_Tp>& b)
+{
+    return !(a == b);
+}
+
+
+
+////////////////////////////////// Rect /////////////////////////////////
+
+template<typename _Tp> inline
+Rect_<_Tp>::Rect_()
+    : x(0), y(0), width(0), height(0) {}
+
+template<typename _Tp> inline
+Rect_<_Tp>::Rect_(_Tp _x, _Tp _y, _Tp _width, _Tp _height)
+    : x(_x), y(_y), width(_width), height(_height) {}
+
+template<typename _Tp> inline
+Rect_<_Tp>::Rect_(const Rect_<_Tp>& r)
+    : x(r.x), y(r.y), width(r.width), height(r.height) {}
+
+template<typename _Tp> inline
+Rect_<_Tp>::Rect_(const Point_<_Tp>& org, const Size_<_Tp>& sz)
+    : x(org.x), y(org.y), width(sz.width), height(sz.height) {}
+
+template<typename _Tp> inline
+Rect_<_Tp>::Rect_(const Point_<_Tp>& pt1, const Point_<_Tp>& pt2)
+{
+    x = std::min(pt1.x, pt2.x);
+    y = std::min(pt1.y, pt2.y);
+    width = std::max(pt1.x, pt2.x) - x;
+    height = std::max(pt1.y, pt2.y) - y;
+}
+
+template<typename _Tp> inline
+Rect_<_Tp>& Rect_<_Tp>::operator = ( const Rect_<_Tp>& r )
+{
+    x = r.x;
+    y = r.y;
+    width = r.width;
+    height = r.height;
+    return *this;
+}
+
+template<typename _Tp> inline
+Point_<_Tp> Rect_<_Tp>::tl() const
+{
+    return Point_<_Tp>(x,y);
+}
+
+template<typename _Tp> inline
+Point_<_Tp> Rect_<_Tp>::br() const
+{
+    return Point_<_Tp>(x + width, y + height);
+}
+
+template<typename _Tp> inline
+Size_<_Tp> Rect_<_Tp>::size() const
+{
+    return Size_<_Tp>(width, height);
+}
+
+template<typename _Tp> inline
+_Tp Rect_<_Tp>::area() const
+{
+    return width * height;
+}
+
+template<typename _Tp> template<typename _Tp2> inline
+Rect_<_Tp>::operator Rect_<_Tp2>() const
+{
+    return Rect_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height));
+}
+
+template<typename _Tp> inline
+bool Rect_<_Tp>::contains(const Point_<_Tp>& pt) const
+{
+    return x <= pt.x && pt.x < x + width && y <= pt.y && pt.y < y + height;
+}
+
+
+template<typename _Tp> static inline
+Rect_<_Tp>& operator += ( Rect_<_Tp>& a, const Point_<_Tp>& b )
+{
+    a.x += b.x;
+    a.y += b.y;
+    return a;
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp>& operator -= ( Rect_<_Tp>& a, const Point_<_Tp>& b )
+{
+    a.x -= b.x;
+    a.y -= b.y;
+    return a;
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp>& operator += ( Rect_<_Tp>& a, const Size_<_Tp>& b )
+{
+    a.width += b.width;
+    a.height += b.height;
+    return a;
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp>& operator -= ( Rect_<_Tp>& a, const Size_<_Tp>& b )
+{
+    a.width -= b.width;
+    a.height -= b.height;
+    return a;
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp>& operator &= ( Rect_<_Tp>& a, const Rect_<_Tp>& b )
+{
+    _Tp x1 = std::max(a.x, b.x);
+    _Tp y1 = std::max(a.y, b.y);
+    a.width = std::min(a.x + a.width, b.x + b.width) - x1;
+    a.height = std::min(a.y + a.height, b.y + b.height) - y1;
+    a.x = x1;
+    a.y = y1;
+    if( a.width <= 0 || a.height <= 0 )
+        a = Rect();
+    return a;
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp>& operator |= ( Rect_<_Tp>& a, const Rect_<_Tp>& b )
+{
+    _Tp x1 = std::min(a.x, b.x);
+    _Tp y1 = std::min(a.y, b.y);
+    a.width = std::max(a.x + a.width, b.x + b.width) - x1;
+    a.height = std::max(a.y + a.height, b.y + b.height) - y1;
+    a.x = x1;
+    a.y = y1;
+    return a;
+}
+
+template<typename _Tp> static inline
+bool operator == (const Rect_<_Tp>& a, const Rect_<_Tp>& b)
+{
+    return a.x == b.x && a.y == b.y && a.width == b.width && a.height == b.height;
+}
+
+template<typename _Tp> static inline
+bool operator != (const Rect_<_Tp>& a, const Rect_<_Tp>& b)
+{
+    return a.x != b.x || a.y != b.y || a.width != b.width || a.height != b.height;
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp> operator + (const Rect_<_Tp>& a, const Point_<_Tp>& b)
+{
+    return Rect_<_Tp>( a.x + b.x, a.y + b.y, a.width, a.height );
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp> operator - (const Rect_<_Tp>& a, const Point_<_Tp>& b)
+{
+    return Rect_<_Tp>( a.x - b.x, a.y - b.y, a.width, a.height );
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp> operator + (const Rect_<_Tp>& a, const Size_<_Tp>& b)
+{
+    return Rect_<_Tp>( a.x, a.y, a.width + b.width, a.height + b.height );
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp> operator & (const Rect_<_Tp>& a, const Rect_<_Tp>& b)
+{
+    Rect_<_Tp> c = a;
+    return c &= b;
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp> operator | (const Rect_<_Tp>& a, const Rect_<_Tp>& b)
+{
+    Rect_<_Tp> c = a;
+    return c |= b;
+}
+
+/**
+ * @brief measure dissimilarity between two sample sets
+ *
+ * computes the complement of the Jaccard Index as described in <https://en.wikipedia.org/wiki/Jaccard_index>.
+ * For rectangles this reduces to computing the intersection over the union.
+ */
+template<typename _Tp> static inline
+double jaccardDistance(const Rect_<_Tp>& a, const Rect_<_Tp>& b) {
+    _Tp Aa = a.area();
+    _Tp Ab = b.area();
+
+    if ((Aa + Ab) <= std::numeric_limits<_Tp>::epsilon()) {
+        // jaccard_index = 1 -> distance = 0
+        return 0.0;
+    }
+
+    double Aab = (a & b).area();
+    // distance = 1 - jaccard_index
+    return 1.0 - Aab / (Aa + Ab - Aab);
+}
+
+////////////////////////////// RotatedRect //////////////////////////////
+
+inline
+RotatedRect::RotatedRect()
+    : center(), size(), angle(0) {}
+
+inline
+RotatedRect::RotatedRect(const Point2f& _center, const Size2f& _size, float _angle)
+    : center(_center), size(_size), angle(_angle) {}
+
+
+
+///////////////////////////////// Range /////////////////////////////////
+
+inline
+Range::Range()
+    : start(0), end(0) {}
+
+inline
+Range::Range(int _start, int _end)
+    : start(_start), end(_end) {}
+
+inline
+int Range::size() const
+{
+    return end - start;
+}
+
+inline
+bool Range::empty() const
+{
+    return start == end;
+}
+
+inline
+Range Range::all()
+{
+    return Range(INT_MIN, INT_MAX);
+}
+
+
+static inline
+bool operator == (const Range& r1, const Range& r2)
+{
+    return r1.start == r2.start && r1.end == r2.end;
+}
+
+static inline
+bool operator != (const Range& r1, const Range& r2)
+{
+    return !(r1 == r2);
+}
+
+static inline
+bool operator !(const Range& r)
+{
+    return r.start == r.end;
+}
+
+static inline
+Range operator & (const Range& r1, const Range& r2)
+{
+    Range r(std::max(r1.start, r2.start), std::min(r1.end, r2.end));
+    r.end = std::max(r.end, r.start);
+    return r;
+}
+
+static inline
+Range& operator &= (Range& r1, const Range& r2)
+{
+    r1 = r1 & r2;
+    return r1;
+}
+
+static inline
+Range operator + (const Range& r1, int delta)
+{
+    return Range(r1.start + delta, r1.end + delta);
+}
+
+static inline
+Range operator + (int delta, const Range& r1)
+{
+    return Range(r1.start + delta, r1.end + delta);
+}
+
+static inline
+Range operator - (const Range& r1, int delta)
+{
+    return r1 + (-delta);
+}
+
+
+
+///////////////////////////////// Scalar ////////////////////////////////
+
+template<typename _Tp> inline
+Scalar_<_Tp>::Scalar_()
+{
+    this->val[0] = this->val[1] = this->val[2] = this->val[3] = 0;
+}
+
+template<typename _Tp> inline
+Scalar_<_Tp>::Scalar_(_Tp v0, _Tp v1, _Tp v2, _Tp v3)
+{
+    this->val[0] = v0;
+    this->val[1] = v1;
+    this->val[2] = v2;
+    this->val[3] = v3;
+}
+
+template<typename _Tp> template<typename _Tp2, int cn> inline
+Scalar_<_Tp>::Scalar_(const Vec<_Tp2, cn>& v)
+{
+    int i;
+    for( i = 0; i < (cn < 4 ? cn : 4); i++ )
+        this->val[i] = cv::saturate_cast<_Tp>(v.val[i]);
+    for( ; i < 4; i++ )
+        this->val[i] = 0;
+}
+
+template<typename _Tp> inline
+Scalar_<_Tp>::Scalar_(_Tp v0)
+{
+    this->val[0] = v0;
+    this->val[1] = this->val[2] = this->val[3] = 0;
+}
+
+template<typename _Tp> inline
+Scalar_<_Tp> Scalar_<_Tp>::all(_Tp v0)
+{
+    return Scalar_<_Tp>(v0, v0, v0, v0);
+}
+
+
+template<typename _Tp> inline
+Scalar_<_Tp> Scalar_<_Tp>::mul(const Scalar_<_Tp>& a, double scale ) const
+{
+    return Scalar_<_Tp>(saturate_cast<_Tp>(this->val[0] * a.val[0] * scale),
+                        saturate_cast<_Tp>(this->val[1] * a.val[1] * scale),
+                        saturate_cast<_Tp>(this->val[2] * a.val[2] * scale),
+                        saturate_cast<_Tp>(this->val[3] * a.val[3] * scale));
+}
+
+template<typename _Tp> inline
+Scalar_<_Tp> Scalar_<_Tp>::conj() const
+{
+    return Scalar_<_Tp>(saturate_cast<_Tp>( this->val[0]),
+                        saturate_cast<_Tp>(-this->val[1]),
+                        saturate_cast<_Tp>(-this->val[2]),
+                        saturate_cast<_Tp>(-this->val[3]));
+}
+
+template<typename _Tp> inline
+bool Scalar_<_Tp>::isReal() const
+{
+    return this->val[1] == 0 && this->val[2] == 0 && this->val[3] == 0;
+}
+
+
+template<typename _Tp> template<typename T2> inline
+Scalar_<_Tp>::operator Scalar_<T2>() const
+{
+    return Scalar_<T2>(saturate_cast<T2>(this->val[0]),
+                       saturate_cast<T2>(this->val[1]),
+                       saturate_cast<T2>(this->val[2]),
+                       saturate_cast<T2>(this->val[3]));
+}
+
+
+template<typename _Tp> static inline
+Scalar_<_Tp>& operator += (Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    a.val[0] += b.val[0];
+    a.val[1] += b.val[1];
+    a.val[2] += b.val[2];
+    a.val[3] += b.val[3];
+    return a;
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp>& operator -= (Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    a.val[0] -= b.val[0];
+    a.val[1] -= b.val[1];
+    a.val[2] -= b.val[2];
+    a.val[3] -= b.val[3];
+    return a;
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp>& operator *= ( Scalar_<_Tp>& a, _Tp v )
+{
+    a.val[0] *= v;
+    a.val[1] *= v;
+    a.val[2] *= v;
+    a.val[3] *= v;
+    return a;
+}
+
+template<typename _Tp> static inline
+bool operator == ( const Scalar_<_Tp>& a, const Scalar_<_Tp>& b )
+{
+    return a.val[0] == b.val[0] && a.val[1] == b.val[1] &&
+           a.val[2] == b.val[2] && a.val[3] == b.val[3];
+}
+
+template<typename _Tp> static inline
+bool operator != ( const Scalar_<_Tp>& a, const Scalar_<_Tp>& b )
+{
+    return a.val[0] != b.val[0] || a.val[1] != b.val[1] ||
+           a.val[2] != b.val[2] || a.val[3] != b.val[3];
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator + (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    return Scalar_<_Tp>(a.val[0] + b.val[0],
+                        a.val[1] + b.val[1],
+                        a.val[2] + b.val[2],
+                        a.val[3] + b.val[3]);
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator - (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    return Scalar_<_Tp>(saturate_cast<_Tp>(a.val[0] - b.val[0]),
+                        saturate_cast<_Tp>(a.val[1] - b.val[1]),
+                        saturate_cast<_Tp>(a.val[2] - b.val[2]),
+                        saturate_cast<_Tp>(a.val[3] - b.val[3]));
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator * (const Scalar_<_Tp>& a, _Tp alpha)
+{
+    return Scalar_<_Tp>(a.val[0] * alpha,
+                        a.val[1] * alpha,
+                        a.val[2] * alpha,
+                        a.val[3] * alpha);
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator * (_Tp alpha, const Scalar_<_Tp>& a)
+{
+    return a*alpha;
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator - (const Scalar_<_Tp>& a)
+{
+    return Scalar_<_Tp>(saturate_cast<_Tp>(-a.val[0]),
+                        saturate_cast<_Tp>(-a.val[1]),
+                        saturate_cast<_Tp>(-a.val[2]),
+                        saturate_cast<_Tp>(-a.val[3]));
+}
+
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator * (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    return Scalar_<_Tp>(saturate_cast<_Tp>(a[0]*b[0] - a[1]*b[1] - a[2]*b[2] - a[3]*b[3]),
+                        saturate_cast<_Tp>(a[0]*b[1] + a[1]*b[0] + a[2]*b[3] - a[3]*b[2]),
+                        saturate_cast<_Tp>(a[0]*b[2] - a[1]*b[3] + a[2]*b[0] + a[3]*b[1]),
+                        saturate_cast<_Tp>(a[0]*b[3] + a[1]*b[2] - a[2]*b[1] + a[3]*b[0]));
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp>& operator *= (Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    a = a * b;
+    return a;
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator / (const Scalar_<_Tp>& a, _Tp alpha)
+{
+    return Scalar_<_Tp>(a.val[0] / alpha,
+                        a.val[1] / alpha,
+                        a.val[2] / alpha,
+                        a.val[3] / alpha);
+}
+
+template<typename _Tp> static inline
+Scalar_<float> operator / (const Scalar_<float>& a, float alpha)
+{
+    float s = 1 / alpha;
+    return Scalar_<float>(a.val[0] * s, a.val[1] * s, a.val[2] * s, a.val[3] * s);
+}
+
+template<typename _Tp> static inline
+Scalar_<double> operator / (const Scalar_<double>& a, double alpha)
+{
+    double s = 1 / alpha;
+    return Scalar_<double>(a.val[0] * s, a.val[1] * s, a.val[2] * s, a.val[3] * s);
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp>& operator /= (Scalar_<_Tp>& a, _Tp alpha)
+{
+    a = a / alpha;
+    return a;
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator / (_Tp a, const Scalar_<_Tp>& b)
+{
+    _Tp s = a / (b[0]*b[0] + b[1]*b[1] + b[2]*b[2] + b[3]*b[3]);
+    return b.conj() * s;
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator / (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    return a * ((_Tp)1 / b);
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp>& operator /= (Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    a = a / b;
+    return a;
+}
+
+template<typename _Tp> static inline
+Scalar operator * (const Matx<_Tp, 4, 4>& a, const Scalar& b)
+{
+    Matx<double, 4, 1> c((Matx<double, 4, 4>)a, b, Matx_MatMulOp());
+    return reinterpret_cast<const Scalar&>(c);
+}
+
+template<> inline
+Scalar operator * (const Matx<double, 4, 4>& a, const Scalar& b)
+{
+    Matx<double, 4, 1> c(a, b, Matx_MatMulOp());
+    return reinterpret_cast<const Scalar&>(c);
+}
+
+
+
+//////////////////////////////// KeyPoint ///////////////////////////////
+
+inline
+KeyPoint::KeyPoint()
+    : pt(0,0), size(0), angle(-1), response(0), octave(0), class_id(-1) {}
+
+inline
+KeyPoint::KeyPoint(Point2f _pt, float _size, float _angle, float _response, int _octave, int _class_id)
+    : pt(_pt), size(_size), angle(_angle), response(_response), octave(_octave), class_id(_class_id) {}
+
+inline
+KeyPoint::KeyPoint(float x, float y, float _size, float _angle, float _response, int _octave, int _class_id)
+    : pt(x, y), size(_size), angle(_angle), response(_response), octave(_octave), class_id(_class_id) {}
+
+
+
+///////////////////////////////// DMatch ////////////////////////////////
+
+inline
+DMatch::DMatch()
+    : queryIdx(-1), trainIdx(-1), imgIdx(-1), distance(FLT_MAX) {}
+
+inline
+DMatch::DMatch(int _queryIdx, int _trainIdx, float _distance)
+    : queryIdx(_queryIdx), trainIdx(_trainIdx), imgIdx(-1), distance(_distance) {}
+
+inline
+DMatch::DMatch(int _queryIdx, int _trainIdx, int _imgIdx, float _distance)
+    : queryIdx(_queryIdx), trainIdx(_trainIdx), imgIdx(_imgIdx), distance(_distance) {}
+
+inline
+bool DMatch::operator < (const DMatch &m) const
+{
+    return distance < m.distance;
+}
+
+
+
+////////////////////////////// TermCriteria /////////////////////////////
+
+inline
+TermCriteria::TermCriteria()
+    : type(0), maxCount(0), epsilon(0) {}
+
+inline
+TermCriteria::TermCriteria(int _type, int _maxCount, double _epsilon)
+    : type(_type), maxCount(_maxCount), epsilon(_epsilon) {}
+
+//! @endcond
+
+} // cv
+
+#endif //OPENCV_CORE_TYPES_HPP
diff --git a/thirdparty/linux/include/opencv2/core/types_c.h b/thirdparty/linux/include/opencv2/core/types_c.h
new file mode 100644
index 0000000..f82a59e
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/types_c.h
@@ -0,0 +1,1837 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_TYPES_H
+#define OPENCV_CORE_TYPES_H
+
+#ifdef HAVE_IPL
+#  ifndef __IPL_H__
+#    if defined WIN32 || defined _WIN32
+#      include <ipl.h>
+#    else
+#      include <ipl/ipl.h>
+#    endif
+#  endif
+#elif defined __IPL_H__
+#  define HAVE_IPL
+#endif
+
+#include "opencv2/core/cvdef.h"
+
+#ifndef SKIP_INCLUDES
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+#endif // SKIP_INCLUDES
+
+#if defined WIN32 || defined _WIN32
+#  define CV_CDECL __cdecl
+#  define CV_STDCALL __stdcall
+#else
+#  define CV_CDECL
+#  define CV_STDCALL
+#endif
+
+#ifndef CV_DEFAULT
+#  ifdef __cplusplus
+#    define CV_DEFAULT(val) = val
+#  else
+#    define CV_DEFAULT(val)
+#  endif
+#endif
+
+#ifndef CV_EXTERN_C_FUNCPTR
+#  ifdef __cplusplus
+#    define CV_EXTERN_C_FUNCPTR(x) extern "C" { typedef x; }
+#  else
+#    define CV_EXTERN_C_FUNCPTR(x) typedef x
+#  endif
+#endif
+
+#ifndef CVAPI
+#  define CVAPI(rettype) CV_EXTERN_C CV_EXPORTS rettype CV_CDECL
+#endif
+
+#ifndef CV_IMPL
+#  define CV_IMPL CV_EXTERN_C
+#endif
+
+#ifdef __cplusplus
+#  include "opencv2/core.hpp"
+#endif
+
+/** @addtogroup core_c
+    @{
+*/
+
+/** @brief This is the "metatype" used *only* as a function parameter.
+
+It denotes that the function accepts arrays of multiple types, such as IplImage*, CvMat* or even
+CvSeq* sometimes. The particular array type is determined at runtime by analyzing the first 4
+bytes of the header. In C++ interface the role of CvArr is played by InputArray and OutputArray.
+ */
+typedef void CvArr;
+
+typedef int CVStatus;
+
+/** @see cv::Error::Code */
+enum {
+ CV_StsOk=                       0,  /**< everything is ok                */
+ CV_StsBackTrace=               -1,  /**< pseudo error for back trace     */
+ CV_StsError=                   -2,  /**< unknown /unspecified error      */
+ CV_StsInternal=                -3,  /**< internal error (bad state)      */
+ CV_StsNoMem=                   -4,  /**< insufficient memory             */
+ CV_StsBadArg=                  -5,  /**< function arg/param is bad       */
+ CV_StsBadFunc=                 -6,  /**< unsupported function            */
+ CV_StsNoConv=                  -7,  /**< iter. didn't converge           */
+ CV_StsAutoTrace=               -8,  /**< tracing                         */
+ CV_HeaderIsNull=               -9,  /**< image header is NULL            */
+ CV_BadImageSize=              -10,  /**< image size is invalid           */
+ CV_BadOffset=                 -11,  /**< offset is invalid               */
+ CV_BadDataPtr=                -12,  /**/
+ CV_BadStep=                   -13,  /**/
+ CV_BadModelOrChSeq=           -14,  /**/
+ CV_BadNumChannels=            -15,  /**/
+ CV_BadNumChannel1U=           -16,  /**/
+ CV_BadDepth=                  -17,  /**/
+ CV_BadAlphaChannel=           -18,  /**/
+ CV_BadOrder=                  -19,  /**/
+ CV_BadOrigin=                 -20,  /**/
+ CV_BadAlign=                  -21,  /**/
+ CV_BadCallBack=               -22,  /**/
+ CV_BadTileSize=               -23,  /**/
+ CV_BadCOI=                    -24,  /**/
+ CV_BadROISize=                -25,  /**/
+ CV_MaskIsTiled=               -26,  /**/
+ CV_StsNullPtr=                -27,  /**< null pointer */
+ CV_StsVecLengthErr=           -28,  /**< incorrect vector length */
+ CV_StsFilterStructContentErr= -29,  /**< incorr. filter structure content */
+ CV_StsKernelStructContentErr= -30,  /**< incorr. transform kernel content */
+ CV_StsFilterOffsetErr=        -31,  /**< incorrect filter offset value */
+ CV_StsBadSize=                -201, /**< the input/output structure size is incorrect  */
+ CV_StsDivByZero=              -202, /**< division by zero */
+ CV_StsInplaceNotSupported=    -203, /**< in-place operation is not supported */
+ CV_StsObjectNotFound=         -204, /**< request can't be completed */
+ CV_StsUnmatchedFormats=       -205, /**< formats of input/output arrays differ */
+ CV_StsBadFlag=                -206, /**< flag is wrong or not supported */
+ CV_StsBadPoint=               -207, /**< bad CvPoint */
+ CV_StsBadMask=                -208, /**< bad format of mask (neither 8uC1 nor 8sC1)*/
+ CV_StsUnmatchedSizes=         -209, /**< sizes of input/output structures do not match */
+ CV_StsUnsupportedFormat=      -210, /**< the data format/type is not supported by the function*/
+ CV_StsOutOfRange=             -211, /**< some of parameters are out of range */
+ CV_StsParseError=             -212, /**< invalid syntax/structure of the parsed file */
+ CV_StsNotImplemented=         -213, /**< the requested function/feature is not implemented */
+ CV_StsBadMemBlock=            -214, /**< an allocated block has been corrupted */
+ CV_StsAssert=                 -215, /**< assertion failed */
+ CV_GpuNotSupported=           -216,
+ CV_GpuApiCallError=           -217,
+ CV_OpenGlNotSupported=        -218,
+ CV_OpenGlApiCallError=        -219,
+ CV_OpenCLApiCallError=        -220,
+ CV_OpenCLDoubleNotSupported=  -221,
+ CV_OpenCLInitError=           -222,
+ CV_OpenCLNoAMDBlasFft=        -223
+};
+
+/****************************************************************************************\
+*                             Common macros and inline functions                         *
+\****************************************************************************************/
+
+#define CV_SWAP(a,b,t) ((t) = (a), (a) = (b), (b) = (t))
+
+/** min & max without jumps */
+#define  CV_IMIN(a, b)  ((a) ^ (((a)^(b)) & (((a) < (b)) - 1)))
+
+#define  CV_IMAX(a, b)  ((a) ^ (((a)^(b)) & (((a) > (b)) - 1)))
+
+/** absolute value without jumps */
+#ifndef __cplusplus
+#  define  CV_IABS(a)     (((a) ^ ((a) < 0 ? -1 : 0)) - ((a) < 0 ? -1 : 0))
+#else
+#  define  CV_IABS(a)     abs(a)
+#endif
+#define  CV_CMP(a,b)    (((a) > (b)) - ((a) < (b)))
+#define  CV_SIGN(a)     CV_CMP((a),0)
+
+#define cvInvSqrt(value) ((float)(1./sqrt(value)))
+#define cvSqrt(value)  ((float)sqrt(value))
+
+
+/*************** Random number generation *******************/
+
+typedef uint64 CvRNG;
+
+#define CV_RNG_COEFF 4164903690U
+
+/** @brief Initializes a random number generator state.
+
+The function initializes a random number generator and returns the state. The pointer to the state
+can be then passed to the cvRandInt, cvRandReal and cvRandArr functions. In the current
+implementation a multiply-with-carry generator is used.
+@param seed 64-bit value used to initiate a random sequence
+@sa the C++ class RNG replaced CvRNG.
+ */
+CV_INLINE CvRNG cvRNG( int64 seed CV_DEFAULT(-1))
+{
+    CvRNG rng = seed ? (uint64)seed : (uint64)(int64)-1;
+    return rng;
+}
+
+/** @brief Returns a 32-bit unsigned integer and updates RNG.
+
+The function returns a uniformly-distributed random 32-bit unsigned integer and updates the RNG
+state. It is similar to the rand() function from the C runtime library, except that OpenCV functions
+always generates a 32-bit random number, regardless of the platform.
+@param rng CvRNG state initialized by cvRNG.
+ */
+CV_INLINE unsigned cvRandInt( CvRNG* rng )
+{
+    uint64 temp = *rng;
+    temp = (uint64)(unsigned)temp*CV_RNG_COEFF + (temp >> 32);
+    *rng = temp;
+    return (unsigned)temp;
+}
+
+/** @brief Returns a floating-point random number and updates RNG.
+
+The function returns a uniformly-distributed random floating-point number between 0 and 1 (1 is not
+included).
+@param rng RNG state initialized by cvRNG
+ */
+CV_INLINE double cvRandReal( CvRNG* rng )
+{
+    return cvRandInt(rng)*2.3283064365386962890625e-10 /* 2^-32 */;
+}
+
+/****************************************************************************************\
+*                                  Image type (IplImage)                                 *
+\****************************************************************************************/
+
+#ifndef HAVE_IPL
+
+/*
+ * The following definitions (until #endif)
+ * is an extract from IPL headers.
+ * Copyright (c) 1995 Intel Corporation.
+ */
+#define IPL_DEPTH_SIGN 0x80000000
+
+#define IPL_DEPTH_1U     1
+#define IPL_DEPTH_8U     8
+#define IPL_DEPTH_16U   16
+#define IPL_DEPTH_32F   32
+
+#define IPL_DEPTH_8S  (IPL_DEPTH_SIGN| 8)
+#define IPL_DEPTH_16S (IPL_DEPTH_SIGN|16)
+#define IPL_DEPTH_32S (IPL_DEPTH_SIGN|32)
+
+#define IPL_DATA_ORDER_PIXEL  0
+#define IPL_DATA_ORDER_PLANE  1
+
+#define IPL_ORIGIN_TL 0
+#define IPL_ORIGIN_BL 1
+
+#define IPL_ALIGN_4BYTES   4
+#define IPL_ALIGN_8BYTES   8
+#define IPL_ALIGN_16BYTES 16
+#define IPL_ALIGN_32BYTES 32
+
+#define IPL_ALIGN_DWORD   IPL_ALIGN_4BYTES
+#define IPL_ALIGN_QWORD   IPL_ALIGN_8BYTES
+
+#define IPL_BORDER_CONSTANT   0
+#define IPL_BORDER_REPLICATE  1
+#define IPL_BORDER_REFLECT    2
+#define IPL_BORDER_WRAP       3
+
+/** The IplImage is taken from the Intel Image Processing Library, in which the format is native. OpenCV
+only supports a subset of possible IplImage formats, as outlined in the parameter list above.
+
+In addition to the above restrictions, OpenCV handles ROIs differently. OpenCV functions require
+that the image size or ROI size of all source and destination images match exactly. On the other
+hand, the Intel Image Processing Library processes the area of intersection between the source and
+destination images (or ROIs), allowing them to vary independently.
+*/
+typedef struct
+#ifdef __cplusplus
+  CV_EXPORTS
+#endif
+_IplImage
+{
+    int  nSize;             /**< sizeof(IplImage) */
+    int  ID;                /**< version (=0)*/
+    int  nChannels;         /**< Most of OpenCV functions support 1,2,3 or 4 channels */
+    int  alphaChannel;      /**< Ignored by OpenCV */
+    int  depth;             /**< Pixel depth in bits: IPL_DEPTH_8U, IPL_DEPTH_8S, IPL_DEPTH_16S,
+                               IPL_DEPTH_32S, IPL_DEPTH_32F and IPL_DEPTH_64F are supported.  */
+    char colorModel[4];     /**< Ignored by OpenCV */
+    char channelSeq[4];     /**< ditto */
+    int  dataOrder;         /**< 0 - interleaved color channels, 1 - separate color channels.
+                               cvCreateImage can only create interleaved images */
+    int  origin;            /**< 0 - top-left origin,
+                               1 - bottom-left origin (Windows bitmaps style).  */
+    int  align;             /**< Alignment of image rows (4 or 8).
+                               OpenCV ignores it and uses widthStep instead.    */
+    int  width;             /**< Image width in pixels.                           */
+    int  height;            /**< Image height in pixels.                          */
+    struct _IplROI *roi;    /**< Image ROI. If NULL, the whole image is selected. */
+    struct _IplImage *maskROI;      /**< Must be NULL. */
+    void  *imageId;                 /**< "           " */
+    struct _IplTileInfo *tileInfo;  /**< "           " */
+    int  imageSize;         /**< Image data size in bytes
+                               (==image->height*image->widthStep
+                               in case of interleaved data)*/
+    char *imageData;        /**< Pointer to aligned image data.         */
+    int  widthStep;         /**< Size of aligned image row in bytes.    */
+    int  BorderMode[4];     /**< Ignored by OpenCV.                     */
+    int  BorderConst[4];    /**< Ditto.                                 */
+    char *imageDataOrigin;  /**< Pointer to very origin of image data
+                               (not necessarily aligned) -
+                               needed for correct deallocation */
+
+#ifdef __cplusplus
+    _IplImage() {}
+    _IplImage(const cv::Mat& m);
+#endif
+}
+IplImage;
+
+typedef struct _IplTileInfo IplTileInfo;
+
+typedef struct _IplROI
+{
+    int  coi; /**< 0 - no COI (all channels are selected), 1 - 0th channel is selected ...*/
+    int  xOffset;
+    int  yOffset;
+    int  width;
+    int  height;
+}
+IplROI;
+
+typedef struct _IplConvKernel
+{
+    int  nCols;
+    int  nRows;
+    int  anchorX;
+    int  anchorY;
+    int *values;
+    int  nShiftR;
+}
+IplConvKernel;
+
+typedef struct _IplConvKernelFP
+{
+    int  nCols;
+    int  nRows;
+    int  anchorX;
+    int  anchorY;
+    float *values;
+}
+IplConvKernelFP;
+
+#define IPL_IMAGE_HEADER 1
+#define IPL_IMAGE_DATA   2
+#define IPL_IMAGE_ROI    4
+
+#endif/*HAVE_IPL*/
+
+/** extra border mode */
+#define IPL_BORDER_REFLECT_101    4
+#define IPL_BORDER_TRANSPARENT    5
+
+#define IPL_IMAGE_MAGIC_VAL  ((int)sizeof(IplImage))
+#define CV_TYPE_NAME_IMAGE "opencv-image"
+
+#define CV_IS_IMAGE_HDR(img) \
+    ((img) != NULL && ((const IplImage*)(img))->nSize == sizeof(IplImage))
+
+#define CV_IS_IMAGE(img) \
+    (CV_IS_IMAGE_HDR(img) && ((IplImage*)img)->imageData != NULL)
+
+/** for storing double-precision
+   floating point data in IplImage's */
+#define IPL_DEPTH_64F  64
+
+/** get reference to pixel at (col,row),
+   for multi-channel images (col) should be multiplied by number of channels */
+#define CV_IMAGE_ELEM( image, elemtype, row, col )       \
+    (((elemtype*)((image)->imageData + (image)->widthStep*(row)))[(col)])
+
+/****************************************************************************************\
+*                                  Matrix type (CvMat)                                   *
+\****************************************************************************************/
+
+#define CV_AUTO_STEP  0x7fffffff
+#define CV_WHOLE_ARR  cvSlice( 0, 0x3fffffff )
+
+#define CV_MAGIC_MASK       0xFFFF0000
+#define CV_MAT_MAGIC_VAL    0x42420000
+#define CV_TYPE_NAME_MAT    "opencv-matrix"
+
+/** Matrix elements are stored row by row. Element (i, j) (i - 0-based row index, j - 0-based column
+index) of a matrix can be retrieved or modified using CV_MAT_ELEM macro:
+
+    uchar pixval = CV_MAT_ELEM(grayimg, uchar, i, j)
+    CV_MAT_ELEM(cameraMatrix, float, 0, 2) = image.width*0.5f;
+
+To access multiple-channel matrices, you can use
+CV_MAT_ELEM(matrix, type, i, j\*nchannels + channel_idx).
+
+@deprecated CvMat is now obsolete; consider using Mat instead.
+ */
+typedef struct CvMat
+{
+    int type;
+    int step;
+
+    /* for internal use only */
+    int* refcount;
+    int hdr_refcount;
+
+    union
+    {
+        uchar* ptr;
+        short* s;
+        int* i;
+        float* fl;
+        double* db;
+    } data;
+
+#ifdef __cplusplus
+    union
+    {
+        int rows;
+        int height;
+    };
+
+    union
+    {
+        int cols;
+        int width;
+    };
+#else
+    int rows;
+    int cols;
+#endif
+
+
+#ifdef __cplusplus
+    CvMat() {}
+    CvMat(const CvMat& m) { memcpy(this, &m, sizeof(CvMat));}
+    CvMat(const cv::Mat& m);
+#endif
+
+}
+CvMat;
+
+
+#define CV_IS_MAT_HDR(mat) \
+    ((mat) != NULL && \
+    (((const CvMat*)(mat))->type & CV_MAGIC_MASK) == CV_MAT_MAGIC_VAL && \
+    ((const CvMat*)(mat))->cols > 0 && ((const CvMat*)(mat))->rows > 0)
+
+#define CV_IS_MAT_HDR_Z(mat) \
+    ((mat) != NULL && \
+    (((const CvMat*)(mat))->type & CV_MAGIC_MASK) == CV_MAT_MAGIC_VAL && \
+    ((const CvMat*)(mat))->cols >= 0 && ((const CvMat*)(mat))->rows >= 0)
+
+#define CV_IS_MAT(mat) \
+    (CV_IS_MAT_HDR(mat) && ((const CvMat*)(mat))->data.ptr != NULL)
+
+#define CV_IS_MASK_ARR(mat) \
+    (((mat)->type & (CV_MAT_TYPE_MASK & ~CV_8SC1)) == 0)
+
+#define CV_ARE_TYPES_EQ(mat1, mat2) \
+    ((((mat1)->type ^ (mat2)->type) & CV_MAT_TYPE_MASK) == 0)
+
+#define CV_ARE_CNS_EQ(mat1, mat2) \
+    ((((mat1)->type ^ (mat2)->type) & CV_MAT_CN_MASK) == 0)
+
+#define CV_ARE_DEPTHS_EQ(mat1, mat2) \
+    ((((mat1)->type ^ (mat2)->type) & CV_MAT_DEPTH_MASK) == 0)
+
+#define CV_ARE_SIZES_EQ(mat1, mat2) \
+    ((mat1)->rows == (mat2)->rows && (mat1)->cols == (mat2)->cols)
+
+#define CV_IS_MAT_CONST(mat)  \
+    (((mat)->rows|(mat)->cols) == 1)
+
+#define IPL2CV_DEPTH(depth) \
+    ((((CV_8U)+(CV_16U<<4)+(CV_32F<<8)+(CV_64F<<16)+(CV_8S<<20)+ \
+    (CV_16S<<24)+(CV_32S<<28)) >> ((((depth) & 0xF0) >> 2) + \
+    (((depth) & IPL_DEPTH_SIGN) ? 20 : 0))) & 15)
+
+/** Inline constructor. No data is allocated internally!!!
+ * (Use together with cvCreateData, or use cvCreateMat instead to
+ * get a matrix with allocated data):
+ */
+CV_INLINE CvMat cvMat( int rows, int cols, int type, void* data CV_DEFAULT(NULL))
+{
+    CvMat m;
+
+    assert( (unsigned)CV_MAT_DEPTH(type) <= CV_64F );
+    type = CV_MAT_TYPE(type);
+    m.type = CV_MAT_MAGIC_VAL | CV_MAT_CONT_FLAG | type;
+    m.cols = cols;
+    m.rows = rows;
+    m.step = m.cols*CV_ELEM_SIZE(type);
+    m.data.ptr = (uchar*)data;
+    m.refcount = NULL;
+    m.hdr_refcount = 0;
+
+    return m;
+}
+
+#ifdef __cplusplus
+inline CvMat::CvMat(const cv::Mat& m)
+{
+    CV_DbgAssert(m.dims <= 2);
+    *this = cvMat(m.rows, m.dims == 1 ? 1 : m.cols, m.type(), m.data);
+    step = (int)m.step[0];
+    type = (type & ~cv::Mat::CONTINUOUS_FLAG) | (m.flags & cv::Mat::CONTINUOUS_FLAG);
+}
+#endif
+
+
+#define CV_MAT_ELEM_PTR_FAST( mat, row, col, pix_size )  \
+    (assert( (unsigned)(row) < (unsigned)(mat).rows &&   \
+             (unsigned)(col) < (unsigned)(mat).cols ),   \
+     (mat).data.ptr + (size_t)(mat).step*(row) + (pix_size)*(col))
+
+#define CV_MAT_ELEM_PTR( mat, row, col )                 \
+    CV_MAT_ELEM_PTR_FAST( mat, row, col, CV_ELEM_SIZE((mat).type) )
+
+#define CV_MAT_ELEM( mat, elemtype, row, col )           \
+    (*(elemtype*)CV_MAT_ELEM_PTR_FAST( mat, row, col, sizeof(elemtype)))
+
+/** @brief Returns the particular element of single-channel floating-point matrix.
+
+The function is a fast replacement for cvGetReal2D in the case of single-channel floating-point
+matrices. It is faster because it is inline, it does fewer checks for array type and array element
+type, and it checks for the row and column ranges only in debug mode.
+@param mat Input matrix
+@param row The zero-based index of row
+@param col The zero-based index of column
+ */
+CV_INLINE  double  cvmGet( const CvMat* mat, int row, int col )
+{
+    int type;
+
+    type = CV_MAT_TYPE(mat->type);
+    assert( (unsigned)row < (unsigned)mat->rows &&
+            (unsigned)col < (unsigned)mat->cols );
+
+    if( type == CV_32FC1 )
+        return ((float*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col];
+    else
+    {
+        assert( type == CV_64FC1 );
+        return ((double*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col];
+    }
+}
+
+/** @brief Sets a specific element of a single-channel floating-point matrix.
+
+The function is a fast replacement for cvSetReal2D in the case of single-channel floating-point
+matrices. It is faster because it is inline, it does fewer checks for array type and array element
+type, and it checks for the row and column ranges only in debug mode.
+@param mat The matrix
+@param row The zero-based index of row
+@param col The zero-based index of column
+@param value The new value of the matrix element
+ */
+CV_INLINE  void  cvmSet( CvMat* mat, int row, int col, double value )
+{
+    int type;
+    type = CV_MAT_TYPE(mat->type);
+    assert( (unsigned)row < (unsigned)mat->rows &&
+            (unsigned)col < (unsigned)mat->cols );
+
+    if( type == CV_32FC1 )
+        ((float*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col] = (float)value;
+    else
+    {
+        assert( type == CV_64FC1 );
+        ((double*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col] = value;
+    }
+}
+
+
+CV_INLINE int cvIplDepth( int type )
+{
+    int depth = CV_MAT_DEPTH(type);
+    return CV_ELEM_SIZE1(depth)*8 | (depth == CV_8S || depth == CV_16S ||
+           depth == CV_32S ? IPL_DEPTH_SIGN : 0);
+}
+
+
+/****************************************************************************************\
+*                       Multi-dimensional dense array (CvMatND)                          *
+\****************************************************************************************/
+
+#define CV_MATND_MAGIC_VAL    0x42430000
+#define CV_TYPE_NAME_MATND    "opencv-nd-matrix"
+
+#define CV_MAX_DIM            32
+#define CV_MAX_DIM_HEAP       1024
+
+/**
+  @deprecated consider using cv::Mat instead
+  */
+typedef struct
+#ifdef __cplusplus
+  CV_EXPORTS
+#endif
+CvMatND
+{
+    int type;
+    int dims;
+
+    int* refcount;
+    int hdr_refcount;
+
+    union
+    {
+        uchar* ptr;
+        float* fl;
+        double* db;
+        int* i;
+        short* s;
+    } data;
+
+    struct
+    {
+        int size;
+        int step;
+    }
+    dim[CV_MAX_DIM];
+
+#ifdef __cplusplus
+    CvMatND() {}
+    CvMatND(const cv::Mat& m);
+#endif
+}
+CvMatND;
+
+#define CV_IS_MATND_HDR(mat) \
+    ((mat) != NULL && (((const CvMatND*)(mat))->type & CV_MAGIC_MASK) == CV_MATND_MAGIC_VAL)
+
+#define CV_IS_MATND(mat) \
+    (CV_IS_MATND_HDR(mat) && ((const CvMatND*)(mat))->data.ptr != NULL)
+
+
+/****************************************************************************************\
+*                      Multi-dimensional sparse array (CvSparseMat)                      *
+\****************************************************************************************/
+
+#define CV_SPARSE_MAT_MAGIC_VAL    0x42440000
+#define CV_TYPE_NAME_SPARSE_MAT    "opencv-sparse-matrix"
+
+struct CvSet;
+
+typedef struct
+#ifdef __cplusplus
+  CV_EXPORTS
+#endif
+CvSparseMat
+{
+    int type;
+    int dims;
+    int* refcount;
+    int hdr_refcount;
+
+    struct CvSet* heap;
+    void** hashtable;
+    int hashsize;
+    int valoffset;
+    int idxoffset;
+    int size[CV_MAX_DIM];
+
+#ifdef __cplusplus
+    void copyToSparseMat(cv::SparseMat& m) const;
+#endif
+}
+CvSparseMat;
+
+#ifdef __cplusplus
+    CV_EXPORTS CvSparseMat* cvCreateSparseMat(const cv::SparseMat& m);
+#endif
+
+#define CV_IS_SPARSE_MAT_HDR(mat) \
+    ((mat) != NULL && \
+    (((const CvSparseMat*)(mat))->type & CV_MAGIC_MASK) == CV_SPARSE_MAT_MAGIC_VAL)
+
+#define CV_IS_SPARSE_MAT(mat) \
+    CV_IS_SPARSE_MAT_HDR(mat)
+
+/**************** iteration through a sparse array *****************/
+
+typedef struct CvSparseNode
+{
+    unsigned hashval;
+    struct CvSparseNode* next;
+}
+CvSparseNode;
+
+typedef struct CvSparseMatIterator
+{
+    CvSparseMat* mat;
+    CvSparseNode* node;
+    int curidx;
+}
+CvSparseMatIterator;
+
+#define CV_NODE_VAL(mat,node)   ((void*)((uchar*)(node) + (mat)->valoffset))
+#define CV_NODE_IDX(mat,node)   ((int*)((uchar*)(node) + (mat)->idxoffset))
+
+/****************************************************************************************\
+*                                         Histogram                                      *
+\****************************************************************************************/
+
+typedef int CvHistType;
+
+#define CV_HIST_MAGIC_VAL     0x42450000
+#define CV_HIST_UNIFORM_FLAG  (1 << 10)
+
+/** indicates whether bin ranges are set already or not */
+#define CV_HIST_RANGES_FLAG   (1 << 11)
+
+#define CV_HIST_ARRAY         0
+#define CV_HIST_SPARSE        1
+#define CV_HIST_TREE          CV_HIST_SPARSE
+
+/** should be used as a parameter only,
+   it turns to CV_HIST_UNIFORM_FLAG of hist->type */
+#define CV_HIST_UNIFORM       1
+
+typedef struct CvHistogram
+{
+    int     type;
+    CvArr*  bins;
+    float   thresh[CV_MAX_DIM][2];  /**< For uniform histograms.                      */
+    float** thresh2;                /**< For non-uniform histograms.                  */
+    CvMatND mat;                    /**< Embedded matrix header for array histograms. */
+}
+CvHistogram;
+
+#define CV_IS_HIST( hist ) \
+    ((hist) != NULL  && \
+     (((CvHistogram*)(hist))->type & CV_MAGIC_MASK) == CV_HIST_MAGIC_VAL && \
+     (hist)->bins != NULL)
+
+#define CV_IS_UNIFORM_HIST( hist ) \
+    (((hist)->type & CV_HIST_UNIFORM_FLAG) != 0)
+
+#define CV_IS_SPARSE_HIST( hist ) \
+    CV_IS_SPARSE_MAT((hist)->bins)
+
+#define CV_HIST_HAS_RANGES( hist ) \
+    (((hist)->type & CV_HIST_RANGES_FLAG) != 0)
+
+/****************************************************************************************\
+*                      Other supplementary data type definitions                         *
+\****************************************************************************************/
+
+/*************************************** CvRect *****************************************/
+/** @sa Rect_ */
+typedef struct CvRect
+{
+    int x;
+    int y;
+    int width;
+    int height;
+
+#ifdef __cplusplus
+    CvRect(int _x = 0, int _y = 0, int w = 0, int h = 0): x(_x), y(_y), width(w), height(h) {}
+    template<typename _Tp>
+    CvRect(const cv::Rect_<_Tp>& r): x(cv::saturate_cast<int>(r.x)), y(cv::saturate_cast<int>(r.y)), width(cv::saturate_cast<int>(r.width)), height(cv::saturate_cast<int>(r.height)) {}
+    template<typename _Tp>
+    operator cv::Rect_<_Tp>() const { return cv::Rect_<_Tp>((_Tp)x, (_Tp)y, (_Tp)width, (_Tp)height); }
+#endif
+}
+CvRect;
+
+/** constructs CvRect structure. */
+CV_INLINE  CvRect  cvRect( int x, int y, int width, int height )
+{
+    CvRect r;
+
+    r.x = x;
+    r.y = y;
+    r.width = width;
+    r.height = height;
+
+    return r;
+}
+
+
+CV_INLINE  IplROI  cvRectToROI( CvRect rect, int coi )
+{
+    IplROI roi;
+    roi.xOffset = rect.x;
+    roi.yOffset = rect.y;
+    roi.width = rect.width;
+    roi.height = rect.height;
+    roi.coi = coi;
+
+    return roi;
+}
+
+
+CV_INLINE  CvRect  cvROIToRect( IplROI roi )
+{
+    return cvRect( roi.xOffset, roi.yOffset, roi.width, roi.height );
+}
+
+/*********************************** CvTermCriteria *************************************/
+
+#define CV_TERMCRIT_ITER    1
+#define CV_TERMCRIT_NUMBER  CV_TERMCRIT_ITER
+#define CV_TERMCRIT_EPS     2
+
+/** @sa TermCriteria
+ */
+typedef struct CvTermCriteria
+{
+    int    type;  /**< may be combination of
+                     CV_TERMCRIT_ITER
+                     CV_TERMCRIT_EPS */
+    int    max_iter;
+    double epsilon;
+
+#ifdef __cplusplus
+    CvTermCriteria(int _type = 0, int _iter = 0, double _eps = 0) : type(_type), max_iter(_iter), epsilon(_eps)  {}
+    CvTermCriteria(const cv::TermCriteria& t) : type(t.type), max_iter(t.maxCount), epsilon(t.epsilon)  {}
+    operator cv::TermCriteria() const { return cv::TermCriteria(type, max_iter, epsilon); }
+#endif
+
+}
+CvTermCriteria;
+
+CV_INLINE  CvTermCriteria  cvTermCriteria( int type, int max_iter, double epsilon )
+{
+    CvTermCriteria t;
+
+    t.type = type;
+    t.max_iter = max_iter;
+    t.epsilon = (float)epsilon;
+
+    return t;
+}
+
+
+/******************************* CvPoint and variants ***********************************/
+
+typedef struct CvPoint
+{
+    int x;
+    int y;
+
+#ifdef __cplusplus
+    CvPoint(int _x = 0, int _y = 0): x(_x), y(_y) {}
+    template<typename _Tp>
+    CvPoint(const cv::Point_<_Tp>& pt): x((int)pt.x), y((int)pt.y) {}
+    template<typename _Tp>
+    operator cv::Point_<_Tp>() const { return cv::Point_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y)); }
+#endif
+}
+CvPoint;
+
+/** constructs CvPoint structure. */
+CV_INLINE  CvPoint  cvPoint( int x, int y )
+{
+    CvPoint p;
+
+    p.x = x;
+    p.y = y;
+
+    return p;
+}
+
+
+typedef struct CvPoint2D32f
+{
+    float x;
+    float y;
+
+#ifdef __cplusplus
+    CvPoint2D32f(float _x = 0, float _y = 0): x(_x), y(_y) {}
+    template<typename _Tp>
+    CvPoint2D32f(const cv::Point_<_Tp>& pt): x((float)pt.x), y((float)pt.y) {}
+    template<typename _Tp>
+    operator cv::Point_<_Tp>() const { return cv::Point_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y)); }
+#endif
+}
+CvPoint2D32f;
+
+/** constructs CvPoint2D32f structure. */
+CV_INLINE  CvPoint2D32f  cvPoint2D32f( double x, double y )
+{
+    CvPoint2D32f p;
+
+    p.x = (float)x;
+    p.y = (float)y;
+
+    return p;
+}
+
+/** converts CvPoint to CvPoint2D32f. */
+CV_INLINE  CvPoint2D32f  cvPointTo32f( CvPoint point )
+{
+    return cvPoint2D32f( (float)point.x, (float)point.y );
+}
+
+/** converts CvPoint2D32f to CvPoint. */
+CV_INLINE  CvPoint  cvPointFrom32f( CvPoint2D32f point )
+{
+    CvPoint ipt;
+    ipt.x = cvRound(point.x);
+    ipt.y = cvRound(point.y);
+
+    return ipt;
+}
+
+
+typedef struct CvPoint3D32f
+{
+    float x;
+    float y;
+    float z;
+
+#ifdef __cplusplus
+    CvPoint3D32f(float _x = 0, float _y = 0, float _z = 0): x(_x), y(_y), z(_z) {}
+    template<typename _Tp>
+    CvPoint3D32f(const cv::Point3_<_Tp>& pt): x((float)pt.x), y((float)pt.y), z((float)pt.z) {}
+    template<typename _Tp>
+    operator cv::Point3_<_Tp>() const { return cv::Point3_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y), cv::saturate_cast<_Tp>(z)); }
+#endif
+}
+CvPoint3D32f;
+
+/** constructs CvPoint3D32f structure. */
+CV_INLINE  CvPoint3D32f  cvPoint3D32f( double x, double y, double z )
+{
+    CvPoint3D32f p;
+
+    p.x = (float)x;
+    p.y = (float)y;
+    p.z = (float)z;
+
+    return p;
+}
+
+
+typedef struct CvPoint2D64f
+{
+    double x;
+    double y;
+}
+CvPoint2D64f;
+
+/** constructs CvPoint2D64f structure.*/
+CV_INLINE  CvPoint2D64f  cvPoint2D64f( double x, double y )
+{
+    CvPoint2D64f p;
+
+    p.x = x;
+    p.y = y;
+
+    return p;
+}
+
+
+typedef struct CvPoint3D64f
+{
+    double x;
+    double y;
+    double z;
+}
+CvPoint3D64f;
+
+/** constructs CvPoint3D64f structure. */
+CV_INLINE  CvPoint3D64f  cvPoint3D64f( double x, double y, double z )
+{
+    CvPoint3D64f p;
+
+    p.x = x;
+    p.y = y;
+    p.z = z;
+
+    return p;
+}
+
+
+/******************************** CvSize's & CvBox **************************************/
+
+typedef struct CvSize
+{
+    int width;
+    int height;
+
+#ifdef __cplusplus
+    CvSize(int w = 0, int h = 0): width(w), height(h) {}
+    template<typename _Tp>
+    CvSize(const cv::Size_<_Tp>& sz): width(cv::saturate_cast<int>(sz.width)), height(cv::saturate_cast<int>(sz.height)) {}
+    template<typename _Tp>
+    operator cv::Size_<_Tp>() const { return cv::Size_<_Tp>(cv::saturate_cast<_Tp>(width), cv::saturate_cast<_Tp>(height)); }
+#endif
+}
+CvSize;
+
+/** constructs CvSize structure. */
+CV_INLINE  CvSize  cvSize( int width, int height )
+{
+    CvSize s;
+
+    s.width = width;
+    s.height = height;
+
+    return s;
+}
+
+typedef struct CvSize2D32f
+{
+    float width;
+    float height;
+
+#ifdef __cplusplus
+    CvSize2D32f(float w = 0, float h = 0): width(w), height(h) {}
+    template<typename _Tp>
+    CvSize2D32f(const cv::Size_<_Tp>& sz): width(cv::saturate_cast<float>(sz.width)), height(cv::saturate_cast<float>(sz.height)) {}
+    template<typename _Tp>
+    operator cv::Size_<_Tp>() const { return cv::Size_<_Tp>(cv::saturate_cast<_Tp>(width), cv::saturate_cast<_Tp>(height)); }
+#endif
+}
+CvSize2D32f;
+
+/** constructs CvSize2D32f structure. */
+CV_INLINE  CvSize2D32f  cvSize2D32f( double width, double height )
+{
+    CvSize2D32f s;
+
+    s.width = (float)width;
+    s.height = (float)height;
+
+    return s;
+}
+
+/** @sa RotatedRect
+ */
+typedef struct CvBox2D
+{
+    CvPoint2D32f center;  /**< Center of the box.                          */
+    CvSize2D32f  size;    /**< Box width and length.                       */
+    float angle;          /**< Angle between the horizontal axis           */
+                          /**< and the first side (i.e. length) in degrees */
+
+#ifdef __cplusplus
+    CvBox2D(CvPoint2D32f c = CvPoint2D32f(), CvSize2D32f s = CvSize2D32f(), float a = 0) : center(c), size(s), angle(a) {}
+    CvBox2D(const cv::RotatedRect& rr) : center(rr.center), size(rr.size), angle(rr.angle) {}
+    operator cv::RotatedRect() const { return cv::RotatedRect(center, size, angle); }
+#endif
+}
+CvBox2D;
+
+
+/** Line iterator state: */
+typedef struct CvLineIterator
+{
+    /** Pointer to the current point: */
+    uchar* ptr;
+
+    /* Bresenham algorithm state: */
+    int  err;
+    int  plus_delta;
+    int  minus_delta;
+    int  plus_step;
+    int  minus_step;
+}
+CvLineIterator;
+
+
+
+/************************************* CvSlice ******************************************/
+#define CV_WHOLE_SEQ_END_INDEX 0x3fffffff
+#define CV_WHOLE_SEQ  cvSlice(0, CV_WHOLE_SEQ_END_INDEX)
+
+typedef struct CvSlice
+{
+    int  start_index, end_index;
+
+#if defined(__cplusplus) && !defined(__CUDACC__)
+    CvSlice(int start = 0, int end = 0) : start_index(start), end_index(end) {}
+    CvSlice(const cv::Range& r) { *this = (r.start != INT_MIN && r.end != INT_MAX) ? CvSlice(r.start, r.end) : CvSlice(0, CV_WHOLE_SEQ_END_INDEX); }
+    operator cv::Range() const { return (start_index == 0 && end_index == CV_WHOLE_SEQ_END_INDEX ) ? cv::Range::all() : cv::Range(start_index, end_index); }
+#endif
+}
+CvSlice;
+
+CV_INLINE  CvSlice  cvSlice( int start, int end )
+{
+    CvSlice slice;
+    slice.start_index = start;
+    slice.end_index = end;
+
+    return slice;
+}
+
+
+
+/************************************* CvScalar *****************************************/
+/** @sa Scalar_
+ */
+typedef struct CvScalar
+{
+    double val[4];
+
+#ifdef __cplusplus
+    CvScalar() {}
+    CvScalar(double d0, double d1 = 0, double d2 = 0, double d3 = 0) { val[0] = d0; val[1] = d1; val[2] = d2; val[3] = d3; }
+    template<typename _Tp>
+    CvScalar(const cv::Scalar_<_Tp>& s) { val[0] = s.val[0]; val[1] = s.val[1]; val[2] = s.val[2]; val[3] = s.val[3]; }
+    template<typename _Tp>
+    operator cv::Scalar_<_Tp>() const { return cv::Scalar_<_Tp>(cv::saturate_cast<_Tp>(val[0]), cv::saturate_cast<_Tp>(val[1]), cv::saturate_cast<_Tp>(val[2]), cv::saturate_cast<_Tp>(val[3])); }
+    template<typename _Tp, int cn>
+    CvScalar(const cv::Vec<_Tp, cn>& v)
+    {
+        int i;
+        for( i = 0; i < (cn < 4 ? cn : 4); i++ ) val[i] = v.val[i];
+        for( ; i < 4; i++ ) val[i] = 0;
+    }
+#endif
+}
+CvScalar;
+
+CV_INLINE  CvScalar  cvScalar( double val0, double val1 CV_DEFAULT(0),
+                               double val2 CV_DEFAULT(0), double val3 CV_DEFAULT(0))
+{
+    CvScalar scalar;
+    scalar.val[0] = val0; scalar.val[1] = val1;
+    scalar.val[2] = val2; scalar.val[3] = val3;
+    return scalar;
+}
+
+
+CV_INLINE  CvScalar  cvRealScalar( double val0 )
+{
+    CvScalar scalar;
+    scalar.val[0] = val0;
+    scalar.val[1] = scalar.val[2] = scalar.val[3] = 0;
+    return scalar;
+}
+
+CV_INLINE  CvScalar  cvScalarAll( double val0123 )
+{
+    CvScalar scalar;
+    scalar.val[0] = val0123;
+    scalar.val[1] = val0123;
+    scalar.val[2] = val0123;
+    scalar.val[3] = val0123;
+    return scalar;
+}
+
+/****************************************************************************************\
+*                                   Dynamic Data structures                              *
+\****************************************************************************************/
+
+/******************************** Memory storage ****************************************/
+
+typedef struct CvMemBlock
+{
+    struct CvMemBlock*  prev;
+    struct CvMemBlock*  next;
+}
+CvMemBlock;
+
+#define CV_STORAGE_MAGIC_VAL    0x42890000
+
+typedef struct CvMemStorage
+{
+    int signature;
+    CvMemBlock* bottom;           /**< First allocated block.                   */
+    CvMemBlock* top;              /**< Current memory block - top of the stack. */
+    struct  CvMemStorage* parent; /**< We get new blocks from parent as needed. */
+    int block_size;               /**< Block size.                              */
+    int free_space;               /**< Remaining free space in current block.   */
+}
+CvMemStorage;
+
+#define CV_IS_STORAGE(storage)  \
+    ((storage) != NULL &&       \
+    (((CvMemStorage*)(storage))->signature & CV_MAGIC_MASK) == CV_STORAGE_MAGIC_VAL)
+
+
+typedef struct CvMemStoragePos
+{
+    CvMemBlock* top;
+    int free_space;
+}
+CvMemStoragePos;
+
+
+/*********************************** Sequence *******************************************/
+
+typedef struct CvSeqBlock
+{
+    struct CvSeqBlock*  prev; /**< Previous sequence block.                   */
+    struct CvSeqBlock*  next; /**< Next sequence block.                       */
+  int    start_index;         /**< Index of the first element in the block +  */
+                              /**< sequence->first->start_index.              */
+    int    count;             /**< Number of elements in the block.           */
+    schar* data;              /**< Pointer to the first element of the block. */
+}
+CvSeqBlock;
+
+
+#define CV_TREE_NODE_FIELDS(node_type)                               \
+    int       flags;             /**< Miscellaneous flags.     */      \
+    int       header_size;       /**< Size of sequence header. */      \
+    struct    node_type* h_prev; /**< Previous sequence.       */      \
+    struct    node_type* h_next; /**< Next sequence.           */      \
+    struct    node_type* v_prev; /**< 2nd previous sequence.   */      \
+    struct    node_type* v_next  /**< 2nd next sequence.       */
+
+/**
+   Read/Write sequence.
+   Elements can be dynamically inserted to or deleted from the sequence.
+*/
+#define CV_SEQUENCE_FIELDS()                                              \
+    CV_TREE_NODE_FIELDS(CvSeq);                                           \
+    int       total;          /**< Total number of elements.            */  \
+    int       elem_size;      /**< Size of sequence element in bytes.   */  \
+    schar*    block_max;      /**< Maximal bound of the last block.     */  \
+    schar*    ptr;            /**< Current write pointer.               */  \
+    int       delta_elems;    /**< Grow seq this many at a time.        */  \
+    CvMemStorage* storage;    /**< Where the seq is stored.             */  \
+    CvSeqBlock* free_blocks;  /**< Free blocks list.                    */  \
+    CvSeqBlock* first;        /**< Pointer to the first sequence block. */
+
+typedef struct CvSeq
+{
+    CV_SEQUENCE_FIELDS()
+}
+CvSeq;
+
+#define CV_TYPE_NAME_SEQ             "opencv-sequence"
+#define CV_TYPE_NAME_SEQ_TREE        "opencv-sequence-tree"
+
+/*************************************** Set ********************************************/
+/** @brief Set
+  Order is not preserved. There can be gaps between sequence elements.
+  After the element has been inserted it stays in the same place all the time.
+  The MSB(most-significant or sign bit) of the first field (flags) is 0 iff the element exists.
+*/
+#define CV_SET_ELEM_FIELDS(elem_type)   \
+    int  flags;                         \
+    struct elem_type* next_free;
+
+typedef struct CvSetElem
+{
+    CV_SET_ELEM_FIELDS(CvSetElem)
+}
+CvSetElem;
+
+#define CV_SET_FIELDS()      \
+    CV_SEQUENCE_FIELDS()     \
+    CvSetElem* free_elems;   \
+    int active_count;
+
+typedef struct CvSet
+{
+    CV_SET_FIELDS()
+}
+CvSet;
+
+
+#define CV_SET_ELEM_IDX_MASK   ((1 << 26) - 1)
+#define CV_SET_ELEM_FREE_FLAG  (1 << (sizeof(int)*8-1))
+
+/** Checks whether the element pointed by ptr belongs to a set or not */
+#define CV_IS_SET_ELEM( ptr )  (((CvSetElem*)(ptr))->flags >= 0)
+
+/************************************* Graph ********************************************/
+
+/** @name Graph
+
+We represent a graph as a set of vertices. Vertices contain their adjacency lists (more exactly,
+pointers to first incoming or outcoming edge (or 0 if isolated vertex)). Edges are stored in
+another set. There is a singly-linked list of incoming/outcoming edges for each vertex.
+
+Each edge consists of:
+
+- Two pointers to the starting and ending vertices (vtx[0] and vtx[1] respectively).
+
+    A graph may be oriented or not. In the latter case, edges between vertex i to vertex j are not
+distinguished during search operations.
+
+- Two pointers to next edges for the starting and ending vertices, where next[0] points to the
+next edge in the vtx[0] adjacency list and next[1] points to the next edge in the vtx[1]
+adjacency list.
+
+@see CvGraphEdge, CvGraphVtx, CvGraphVtx2D, CvGraph
+@{
+*/
+#define CV_GRAPH_EDGE_FIELDS()      \
+    int flags;                      \
+    float weight;                   \
+    struct CvGraphEdge* next[2];    \
+    struct CvGraphVtx* vtx[2];
+
+
+#define CV_GRAPH_VERTEX_FIELDS()    \
+    int flags;                      \
+    struct CvGraphEdge* first;
+
+
+typedef struct CvGraphEdge
+{
+    CV_GRAPH_EDGE_FIELDS()
+}
+CvGraphEdge;
+
+typedef struct CvGraphVtx
+{
+    CV_GRAPH_VERTEX_FIELDS()
+}
+CvGraphVtx;
+
+typedef struct CvGraphVtx2D
+{
+    CV_GRAPH_VERTEX_FIELDS()
+    CvPoint2D32f* ptr;
+}
+CvGraphVtx2D;
+
+/**
+   Graph is "derived" from the set (this is set a of vertices)
+   and includes another set (edges)
+*/
+#define  CV_GRAPH_FIELDS()   \
+    CV_SET_FIELDS()          \
+    CvSet* edges;
+
+typedef struct CvGraph
+{
+    CV_GRAPH_FIELDS()
+}
+CvGraph;
+
+#define CV_TYPE_NAME_GRAPH "opencv-graph"
+
+/** @} */
+
+/*********************************** Chain/Countour *************************************/
+
+typedef struct CvChain
+{
+    CV_SEQUENCE_FIELDS()
+    CvPoint  origin;
+}
+CvChain;
+
+#define CV_CONTOUR_FIELDS()  \
+    CV_SEQUENCE_FIELDS()     \
+    CvRect rect;             \
+    int color;               \
+    int reserved[3];
+
+typedef struct CvContour
+{
+    CV_CONTOUR_FIELDS()
+}
+CvContour;
+
+typedef CvContour CvPoint2DSeq;
+
+/****************************************************************************************\
+*                                    Sequence types                                      *
+\****************************************************************************************/
+
+#define CV_SEQ_MAGIC_VAL             0x42990000
+
+#define CV_IS_SEQ(seq) \
+    ((seq) != NULL && (((CvSeq*)(seq))->flags & CV_MAGIC_MASK) == CV_SEQ_MAGIC_VAL)
+
+#define CV_SET_MAGIC_VAL             0x42980000
+#define CV_IS_SET(set) \
+    ((set) != NULL && (((CvSeq*)(set))->flags & CV_MAGIC_MASK) == CV_SET_MAGIC_VAL)
+
+#define CV_SEQ_ELTYPE_BITS           12
+#define CV_SEQ_ELTYPE_MASK           ((1 << CV_SEQ_ELTYPE_BITS) - 1)
+
+#define CV_SEQ_ELTYPE_POINT          CV_32SC2  /**< (x,y) */
+#define CV_SEQ_ELTYPE_CODE           CV_8UC1   /**< freeman code: 0..7 */
+#define CV_SEQ_ELTYPE_GENERIC        0
+#define CV_SEQ_ELTYPE_PTR            CV_USRTYPE1
+#define CV_SEQ_ELTYPE_PPOINT         CV_SEQ_ELTYPE_PTR  /**< &(x,y) */
+#define CV_SEQ_ELTYPE_INDEX          CV_32SC1  /**< #(x,y) */
+#define CV_SEQ_ELTYPE_GRAPH_EDGE     0  /**< &next_o, &next_d, &vtx_o, &vtx_d */
+#define CV_SEQ_ELTYPE_GRAPH_VERTEX   0  /**< first_edge, &(x,y) */
+#define CV_SEQ_ELTYPE_TRIAN_ATR      0  /**< vertex of the binary tree   */
+#define CV_SEQ_ELTYPE_CONNECTED_COMP 0  /**< connected component  */
+#define CV_SEQ_ELTYPE_POINT3D        CV_32FC3  /**< (x,y,z)  */
+
+#define CV_SEQ_KIND_BITS        2
+#define CV_SEQ_KIND_MASK        (((1 << CV_SEQ_KIND_BITS) - 1)<<CV_SEQ_ELTYPE_BITS)
+
+/** types of sequences */
+#define CV_SEQ_KIND_GENERIC     (0 << CV_SEQ_ELTYPE_BITS)
+#define CV_SEQ_KIND_CURVE       (1 << CV_SEQ_ELTYPE_BITS)
+#define CV_SEQ_KIND_BIN_TREE    (2 << CV_SEQ_ELTYPE_BITS)
+
+/** types of sparse sequences (sets) */
+#define CV_SEQ_KIND_GRAPH       (1 << CV_SEQ_ELTYPE_BITS)
+#define CV_SEQ_KIND_SUBDIV2D    (2 << CV_SEQ_ELTYPE_BITS)
+
+#define CV_SEQ_FLAG_SHIFT       (CV_SEQ_KIND_BITS + CV_SEQ_ELTYPE_BITS)
+
+/** flags for curves */
+#define CV_SEQ_FLAG_CLOSED     (1 << CV_SEQ_FLAG_SHIFT)
+#define CV_SEQ_FLAG_SIMPLE     (0 << CV_SEQ_FLAG_SHIFT)
+#define CV_SEQ_FLAG_CONVEX     (0 << CV_SEQ_FLAG_SHIFT)
+#define CV_SEQ_FLAG_HOLE       (2 << CV_SEQ_FLAG_SHIFT)
+
+/** flags for graphs */
+#define CV_GRAPH_FLAG_ORIENTED (1 << CV_SEQ_FLAG_SHIFT)
+
+#define CV_GRAPH               CV_SEQ_KIND_GRAPH
+#define CV_ORIENTED_GRAPH      (CV_SEQ_KIND_GRAPH|CV_GRAPH_FLAG_ORIENTED)
+
+/** point sets */
+#define CV_SEQ_POINT_SET       (CV_SEQ_KIND_GENERIC| CV_SEQ_ELTYPE_POINT)
+#define CV_SEQ_POINT3D_SET     (CV_SEQ_KIND_GENERIC| CV_SEQ_ELTYPE_POINT3D)
+#define CV_SEQ_POLYLINE        (CV_SEQ_KIND_CURVE  | CV_SEQ_ELTYPE_POINT)
+#define CV_SEQ_POLYGON         (CV_SEQ_FLAG_CLOSED | CV_SEQ_POLYLINE )
+#define CV_SEQ_CONTOUR         CV_SEQ_POLYGON
+#define CV_SEQ_SIMPLE_POLYGON  (CV_SEQ_FLAG_SIMPLE | CV_SEQ_POLYGON  )
+
+/** chain-coded curves */
+#define CV_SEQ_CHAIN           (CV_SEQ_KIND_CURVE  | CV_SEQ_ELTYPE_CODE)
+#define CV_SEQ_CHAIN_CONTOUR   (CV_SEQ_FLAG_CLOSED | CV_SEQ_CHAIN)
+
+/** binary tree for the contour */
+#define CV_SEQ_POLYGON_TREE    (CV_SEQ_KIND_BIN_TREE  | CV_SEQ_ELTYPE_TRIAN_ATR)
+
+/** sequence of the connected components */
+#define CV_SEQ_CONNECTED_COMP  (CV_SEQ_KIND_GENERIC  | CV_SEQ_ELTYPE_CONNECTED_COMP)
+
+/** sequence of the integer numbers */
+#define CV_SEQ_INDEX           (CV_SEQ_KIND_GENERIC  | CV_SEQ_ELTYPE_INDEX)
+
+#define CV_SEQ_ELTYPE( seq )   ((seq)->flags & CV_SEQ_ELTYPE_MASK)
+#define CV_SEQ_KIND( seq )     ((seq)->flags & CV_SEQ_KIND_MASK )
+
+/** flag checking */
+#define CV_IS_SEQ_INDEX( seq )      ((CV_SEQ_ELTYPE(seq) == CV_SEQ_ELTYPE_INDEX) && \
+                                     (CV_SEQ_KIND(seq) == CV_SEQ_KIND_GENERIC))
+
+#define CV_IS_SEQ_CURVE( seq )      (CV_SEQ_KIND(seq) == CV_SEQ_KIND_CURVE)
+#define CV_IS_SEQ_CLOSED( seq )     (((seq)->flags & CV_SEQ_FLAG_CLOSED) != 0)
+#define CV_IS_SEQ_CONVEX( seq )     0
+#define CV_IS_SEQ_HOLE( seq )       (((seq)->flags & CV_SEQ_FLAG_HOLE) != 0)
+#define CV_IS_SEQ_SIMPLE( seq )     1
+
+/** type checking macros */
+#define CV_IS_SEQ_POINT_SET( seq ) \
+    ((CV_SEQ_ELTYPE(seq) == CV_32SC2 || CV_SEQ_ELTYPE(seq) == CV_32FC2))
+
+#define CV_IS_SEQ_POINT_SUBSET( seq ) \
+    (CV_IS_SEQ_INDEX( seq ) || CV_SEQ_ELTYPE(seq) == CV_SEQ_ELTYPE_PPOINT)
+
+#define CV_IS_SEQ_POLYLINE( seq )   \
+    (CV_SEQ_KIND(seq) == CV_SEQ_KIND_CURVE && CV_IS_SEQ_POINT_SET(seq))
+
+#define CV_IS_SEQ_POLYGON( seq )   \
+    (CV_IS_SEQ_POLYLINE(seq) && CV_IS_SEQ_CLOSED(seq))
+
+#define CV_IS_SEQ_CHAIN( seq )   \
+    (CV_SEQ_KIND(seq) == CV_SEQ_KIND_CURVE && (seq)->elem_size == 1)
+
+#define CV_IS_SEQ_CONTOUR( seq )   \
+    (CV_IS_SEQ_CLOSED(seq) && (CV_IS_SEQ_POLYLINE(seq) || CV_IS_SEQ_CHAIN(seq)))
+
+#define CV_IS_SEQ_CHAIN_CONTOUR( seq ) \
+    (CV_IS_SEQ_CHAIN( seq ) && CV_IS_SEQ_CLOSED( seq ))
+
+#define CV_IS_SEQ_POLYGON_TREE( seq ) \
+    (CV_SEQ_ELTYPE (seq) ==  CV_SEQ_ELTYPE_TRIAN_ATR &&    \
+    CV_SEQ_KIND( seq ) ==  CV_SEQ_KIND_BIN_TREE )
+
+#define CV_IS_GRAPH( seq )    \
+    (CV_IS_SET(seq) && CV_SEQ_KIND((CvSet*)(seq)) == CV_SEQ_KIND_GRAPH)
+
+#define CV_IS_GRAPH_ORIENTED( seq )   \
+    (((seq)->flags & CV_GRAPH_FLAG_ORIENTED) != 0)
+
+#define CV_IS_SUBDIV2D( seq )  \
+    (CV_IS_SET(seq) && CV_SEQ_KIND((CvSet*)(seq)) == CV_SEQ_KIND_SUBDIV2D)
+
+/****************************************************************************************/
+/*                            Sequence writer & reader                                  */
+/****************************************************************************************/
+
+#define CV_SEQ_WRITER_FIELDS()                                     \
+    int          header_size;                                      \
+    CvSeq*       seq;        /**< the sequence written */            \
+    CvSeqBlock*  block;      /**< current block */                   \
+    schar*       ptr;        /**< pointer to free space */           \
+    schar*       block_min;  /**< pointer to the beginning of block*/\
+    schar*       block_max;  /**< pointer to the end of block */
+
+typedef struct CvSeqWriter
+{
+    CV_SEQ_WRITER_FIELDS()
+}
+CvSeqWriter;
+
+
+#define CV_SEQ_READER_FIELDS()                                      \
+    int          header_size;                                       \
+    CvSeq*       seq;        /**< sequence, beign read */             \
+    CvSeqBlock*  block;      /**< current block */                    \
+    schar*       ptr;        /**< pointer to element be read next */  \
+    schar*       block_min;  /**< pointer to the beginning of block */\
+    schar*       block_max;  /**< pointer to the end of block */      \
+    int          delta_index;/**< = seq->first->start_index   */      \
+    schar*       prev_elem;  /**< pointer to previous element */
+
+typedef struct CvSeqReader
+{
+    CV_SEQ_READER_FIELDS()
+}
+CvSeqReader;
+
+/****************************************************************************************/
+/*                                Operations on sequences                               */
+/****************************************************************************************/
+
+#define  CV_SEQ_ELEM( seq, elem_type, index )                    \
+/** assert gives some guarantee that <seq> parameter is valid */  \
+(   assert(sizeof((seq)->first[0]) == sizeof(CvSeqBlock) &&      \
+    (seq)->elem_size == sizeof(elem_type)),                      \
+    (elem_type*)((seq)->first && (unsigned)index <               \
+    (unsigned)((seq)->first->count) ?                            \
+    (seq)->first->data + (index) * sizeof(elem_type) :           \
+    cvGetSeqElem( (CvSeq*)(seq), (index) )))
+#define CV_GET_SEQ_ELEM( elem_type, seq, index ) CV_SEQ_ELEM( (seq), elem_type, (index) )
+
+/** Add element to sequence: */
+#define CV_WRITE_SEQ_ELEM_VAR( elem_ptr, writer )     \
+{                                                     \
+    if( (writer).ptr >= (writer).block_max )          \
+    {                                                 \
+        cvCreateSeqBlock( &writer);                   \
+    }                                                 \
+    memcpy((writer).ptr, elem_ptr, (writer).seq->elem_size);\
+    (writer).ptr += (writer).seq->elem_size;          \
+}
+
+#define CV_WRITE_SEQ_ELEM( elem, writer )             \
+{                                                     \
+    assert( (writer).seq->elem_size == sizeof(elem)); \
+    if( (writer).ptr >= (writer).block_max )          \
+    {                                                 \
+        cvCreateSeqBlock( &writer);                   \
+    }                                                 \
+    assert( (writer).ptr <= (writer).block_max - sizeof(elem));\
+    memcpy((writer).ptr, &(elem), sizeof(elem));      \
+    (writer).ptr += sizeof(elem);                     \
+}
+
+
+/** Move reader position forward: */
+#define CV_NEXT_SEQ_ELEM( elem_size, reader )                 \
+{                                                             \
+    if( ((reader).ptr += (elem_size)) >= (reader).block_max ) \
+    {                                                         \
+        cvChangeSeqBlock( &(reader), 1 );                     \
+    }                                                         \
+}
+
+
+/** Move reader position backward: */
+#define CV_PREV_SEQ_ELEM( elem_size, reader )                \
+{                                                            \
+    if( ((reader).ptr -= (elem_size)) < (reader).block_min ) \
+    {                                                        \
+        cvChangeSeqBlock( &(reader), -1 );                   \
+    }                                                        \
+}
+
+/** Read element and move read position forward: */
+#define CV_READ_SEQ_ELEM( elem, reader )                       \
+{                                                              \
+    assert( (reader).seq->elem_size == sizeof(elem));          \
+    memcpy( &(elem), (reader).ptr, sizeof((elem)));            \
+    CV_NEXT_SEQ_ELEM( sizeof(elem), reader )                   \
+}
+
+/** Read element and move read position backward: */
+#define CV_REV_READ_SEQ_ELEM( elem, reader )                     \
+{                                                                \
+    assert( (reader).seq->elem_size == sizeof(elem));            \
+    memcpy(&(elem), (reader).ptr, sizeof((elem)));               \
+    CV_PREV_SEQ_ELEM( sizeof(elem), reader )                     \
+}
+
+
+#define CV_READ_CHAIN_POINT( _pt, reader )                              \
+{                                                                       \
+    (_pt) = (reader).pt;                                                \
+    if( (reader).ptr )                                                  \
+    {                                                                   \
+        CV_READ_SEQ_ELEM( (reader).code, (reader));                     \
+        assert( ((reader).code & ~7) == 0 );                            \
+        (reader).pt.x += (reader).deltas[(int)(reader).code][0];        \
+        (reader).pt.y += (reader).deltas[(int)(reader).code][1];        \
+    }                                                                   \
+}
+
+#define CV_CURRENT_POINT( reader )  (*((CvPoint*)((reader).ptr)))
+#define CV_PREV_POINT( reader )     (*((CvPoint*)((reader).prev_elem)))
+
+#define CV_READ_EDGE( pt1, pt2, reader )               \
+{                                                      \
+    assert( sizeof(pt1) == sizeof(CvPoint) &&          \
+            sizeof(pt2) == sizeof(CvPoint) &&          \
+            reader.seq->elem_size == sizeof(CvPoint)); \
+    (pt1) = CV_PREV_POINT( reader );                   \
+    (pt2) = CV_CURRENT_POINT( reader );                \
+    (reader).prev_elem = (reader).ptr;                 \
+    CV_NEXT_SEQ_ELEM( sizeof(CvPoint), (reader));      \
+}
+
+/************ Graph macros ************/
+
+/** Return next graph edge for given vertex: */
+#define  CV_NEXT_GRAPH_EDGE( edge, vertex )                              \
+     (assert((edge)->vtx[0] == (vertex) || (edge)->vtx[1] == (vertex)),  \
+      (edge)->next[(edge)->vtx[1] == (vertex)])
+
+
+
+/****************************************************************************************\
+*             Data structures for persistence (a.k.a serialization) functionality        *
+\****************************************************************************************/
+
+/** "black box" file storage */
+typedef struct CvFileStorage CvFileStorage;
+
+/** Storage flags: */
+#define CV_STORAGE_READ          0
+#define CV_STORAGE_WRITE         1
+#define CV_STORAGE_WRITE_TEXT    CV_STORAGE_WRITE
+#define CV_STORAGE_WRITE_BINARY  CV_STORAGE_WRITE
+#define CV_STORAGE_APPEND        2
+#define CV_STORAGE_MEMORY        4
+#define CV_STORAGE_FORMAT_MASK   (7<<3)
+#define CV_STORAGE_FORMAT_AUTO   0
+#define CV_STORAGE_FORMAT_XML    8
+#define CV_STORAGE_FORMAT_YAML  16
+#define CV_STORAGE_FORMAT_JSON  24
+#define CV_STORAGE_BASE64       64
+#define CV_STORAGE_WRITE_BASE64  (CV_STORAGE_BASE64 | CV_STORAGE_WRITE)
+
+/** @brief List of attributes. :
+
+In the current implementation, attributes are used to pass extra parameters when writing user
+objects (see cvWrite). XML attributes inside tags are not supported, aside from the object type
+specification (type_id attribute).
+@see cvAttrList, cvAttrValue
+ */
+typedef struct CvAttrList
+{
+    const char** attr;         /**< NULL-terminated array of (attribute_name,attribute_value) pairs. */
+    struct CvAttrList* next;   /**< Pointer to next chunk of the attributes list.                    */
+}
+CvAttrList;
+
+/** initializes CvAttrList structure */
+CV_INLINE CvAttrList cvAttrList( const char** attr CV_DEFAULT(NULL),
+                                 CvAttrList* next CV_DEFAULT(NULL) )
+{
+    CvAttrList l;
+    l.attr = attr;
+    l.next = next;
+
+    return l;
+}
+
+struct CvTypeInfo;
+
+#define CV_NODE_NONE        0
+#define CV_NODE_INT         1
+#define CV_NODE_INTEGER     CV_NODE_INT
+#define CV_NODE_REAL        2
+#define CV_NODE_FLOAT       CV_NODE_REAL
+#define CV_NODE_STR         3
+#define CV_NODE_STRING      CV_NODE_STR
+#define CV_NODE_REF         4 /**< not used */
+#define CV_NODE_SEQ         5
+#define CV_NODE_MAP         6
+#define CV_NODE_TYPE_MASK   7
+
+#define CV_NODE_TYPE(flags)  ((flags) & CV_NODE_TYPE_MASK)
+
+/** file node flags */
+#define CV_NODE_FLOW        8 /**<Used only for writing structures in YAML format. */
+#define CV_NODE_USER        16
+#define CV_NODE_EMPTY       32
+#define CV_NODE_NAMED       64
+
+#define CV_NODE_IS_INT(flags)        (CV_NODE_TYPE(flags) == CV_NODE_INT)
+#define CV_NODE_IS_REAL(flags)       (CV_NODE_TYPE(flags) == CV_NODE_REAL)
+#define CV_NODE_IS_STRING(flags)     (CV_NODE_TYPE(flags) == CV_NODE_STRING)
+#define CV_NODE_IS_SEQ(flags)        (CV_NODE_TYPE(flags) == CV_NODE_SEQ)
+#define CV_NODE_IS_MAP(flags)        (CV_NODE_TYPE(flags) == CV_NODE_MAP)
+#define CV_NODE_IS_COLLECTION(flags) (CV_NODE_TYPE(flags) >= CV_NODE_SEQ)
+#define CV_NODE_IS_FLOW(flags)       (((flags) & CV_NODE_FLOW) != 0)
+#define CV_NODE_IS_EMPTY(flags)      (((flags) & CV_NODE_EMPTY) != 0)
+#define CV_NODE_IS_USER(flags)       (((flags) & CV_NODE_USER) != 0)
+#define CV_NODE_HAS_NAME(flags)      (((flags) & CV_NODE_NAMED) != 0)
+
+#define CV_NODE_SEQ_SIMPLE 256
+#define CV_NODE_SEQ_IS_SIMPLE(seq) (((seq)->flags & CV_NODE_SEQ_SIMPLE) != 0)
+
+typedef struct CvString
+{
+    int len;
+    char* ptr;
+}
+CvString;
+
+/** All the keys (names) of elements in the readed file storage
+   are stored in the hash to speed up the lookup operations: */
+typedef struct CvStringHashNode
+{
+    unsigned hashval;
+    CvString str;
+    struct CvStringHashNode* next;
+}
+CvStringHashNode;
+
+typedef struct CvGenericHash CvFileNodeHash;
+
+/** Basic element of the file storage - scalar or collection: */
+typedef struct CvFileNode
+{
+    int tag;
+    struct CvTypeInfo* info; /**< type information
+            (only for user-defined object, for others it is 0) */
+    union
+    {
+        double f; /**< scalar floating-point number */
+        int i;    /**< scalar integer number */
+        CvString str; /**< text string */
+        CvSeq* seq; /**< sequence (ordered collection of file nodes) */
+        CvFileNodeHash* map; /**< map (collection of named file nodes) */
+    } data;
+}
+CvFileNode;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+typedef int (CV_CDECL *CvIsInstanceFunc)( const void* struct_ptr );
+typedef void (CV_CDECL *CvReleaseFunc)( void** struct_dblptr );
+typedef void* (CV_CDECL *CvReadFunc)( CvFileStorage* storage, CvFileNode* node );
+typedef void (CV_CDECL *CvWriteFunc)( CvFileStorage* storage, const char* name,
+                                      const void* struct_ptr, CvAttrList attributes );
+typedef void* (CV_CDECL *CvCloneFunc)( const void* struct_ptr );
+#ifdef __cplusplus
+}
+#endif
+
+/** @brief Type information
+
+The structure contains information about one of the standard or user-defined types. Instances of the
+type may or may not contain a pointer to the corresponding CvTypeInfo structure. In any case, there
+is a way to find the type info structure for a given object using the cvTypeOf function.
+Alternatively, type info can be found by type name using cvFindType, which is used when an object
+is read from file storage. The user can register a new type with cvRegisterType that adds the type
+information structure into the beginning of the type list. Thus, it is possible to create
+specialized types from generic standard types and override the basic methods.
+ */
+typedef struct CvTypeInfo
+{
+    int flags; /**< not used */
+    int header_size; /**< sizeof(CvTypeInfo) */
+    struct CvTypeInfo* prev; /**< previous registered type in the list */
+    struct CvTypeInfo* next; /**< next registered type in the list */
+    const char* type_name; /**< type name, written to file storage */
+    CvIsInstanceFunc is_instance; /**< checks if the passed object belongs to the type */
+    CvReleaseFunc release; /**< releases object (memory etc.) */
+    CvReadFunc read; /**< reads object from file storage */
+    CvWriteFunc write; /**< writes object to file storage */
+    CvCloneFunc clone; /**< creates a copy of the object */
+}
+CvTypeInfo;
+
+
+/**** System data types ******/
+
+typedef struct CvPluginFuncInfo
+{
+    void** func_addr;
+    void* default_func_addr;
+    const char* func_names;
+    int search_modules;
+    int loaded_from;
+}
+CvPluginFuncInfo;
+
+typedef struct CvModuleInfo
+{
+    struct CvModuleInfo* next;
+    const char* name;
+    const char* version;
+    CvPluginFuncInfo* func_tab;
+}
+CvModuleInfo;
+
+/** @} */
+
+#endif /*OPENCV_CORE_TYPES_H*/
+
+/* End of file. */
diff --git a/thirdparty/linux/include/opencv2/core/utility.hpp b/thirdparty/linux/include/opencv2/core/utility.hpp
new file mode 100644
index 0000000..e7a7c2d
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/utility.hpp
@@ -0,0 +1,1171 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_UTILITY_H
+#define OPENCV_CORE_UTILITY_H
+
+#ifndef __cplusplus
+#  error utility.hpp header must be compiled as C++
+#endif
+
+#if defined(check)
+#  warning Detected Apple 'check' macro definition, it can cause build conflicts. Please, include this header before any Apple headers.
+#endif
+
+#include "opencv2/core.hpp"
+
+namespace cv
+{
+
+#ifdef CV_COLLECT_IMPL_DATA
+CV_EXPORTS void setImpl(int flags); // set implementation flags and reset storage arrays
+CV_EXPORTS void addImpl(int flag, const char* func = 0); // add implementation and function name to storage arrays
+// Get stored implementation flags and fucntions names arrays
+// Each implementation entry correspond to function name entry, so you can find which implementation was executed in which fucntion
+CV_EXPORTS int getImpl(std::vector<int> &impl, std::vector<String> &funName);
+
+CV_EXPORTS bool useCollection(); // return implementation collection state
+CV_EXPORTS void setUseCollection(bool flag); // set implementation collection state
+
+#define CV_IMPL_PLAIN  0x01 // native CPU OpenCV implementation
+#define CV_IMPL_OCL    0x02 // OpenCL implementation
+#define CV_IMPL_IPP    0x04 // IPP implementation
+#define CV_IMPL_MT     0x10 // multithreaded implementation
+
+#define CV_IMPL_ADD(impl)                                                   \
+    if(cv::useCollection())                                                 \
+    {                                                                       \
+        cv::addImpl(impl, CV_Func);                                         \
+    }
+#else
+#define CV_IMPL_ADD(impl)
+#endif
+
+//! @addtogroup core_utils
+//! @{
+
+/** @brief  Automatically Allocated Buffer Class
+
+ The class is used for temporary buffers in functions and methods.
+ If a temporary buffer is usually small (a few K's of memory),
+ but its size depends on the parameters, it makes sense to create a small
+ fixed-size array on stack and use it if it's large enough. If the required buffer size
+ is larger than the fixed size, another buffer of sufficient size is allocated dynamically
+ and released after the processing. Therefore, in typical cases, when the buffer size is small,
+ there is no overhead associated with malloc()/free().
+ At the same time, there is no limit on the size of processed data.
+
+ This is what AutoBuffer does. The template takes 2 parameters - type of the buffer elements and
+ the number of stack-allocated elements. Here is how the class is used:
+
+ \code
+ void my_func(const cv::Mat& m)
+ {
+    cv::AutoBuffer<float> buf; // create automatic buffer containing 1000 floats
+
+    buf.allocate(m.rows); // if m.rows <= 1000, the pre-allocated buffer is used,
+                          // otherwise the buffer of "m.rows" floats will be allocated
+                          // dynamically and deallocated in cv::AutoBuffer destructor
+    ...
+ }
+ \endcode
+*/
+template<typename _Tp, size_t fixed_size = 1024/sizeof(_Tp)+8> class AutoBuffer
+{
+public:
+    typedef _Tp value_type;
+
+    //! the default constructor
+    AutoBuffer();
+    //! constructor taking the real buffer size
+    AutoBuffer(size_t _size);
+
+    //! the copy constructor
+    AutoBuffer(const AutoBuffer<_Tp, fixed_size>& buf);
+    //! the assignment operator
+    AutoBuffer<_Tp, fixed_size>& operator = (const AutoBuffer<_Tp, fixed_size>& buf);
+
+    //! destructor. calls deallocate()
+    ~AutoBuffer();
+
+    //! allocates the new buffer of size _size. if the _size is small enough, stack-allocated buffer is used
+    void allocate(size_t _size);
+    //! deallocates the buffer if it was dynamically allocated
+    void deallocate();
+    //! resizes the buffer and preserves the content
+    void resize(size_t _size);
+    //! returns the current buffer size
+    size_t size() const;
+    //! returns pointer to the real buffer, stack-allocated or head-allocated
+    operator _Tp* ();
+    //! returns read-only pointer to the real buffer, stack-allocated or head-allocated
+    operator const _Tp* () const;
+
+protected:
+    //! pointer to the real buffer, can point to buf if the buffer is small enough
+    _Tp* ptr;
+    //! size of the real buffer
+    size_t sz;
+    //! pre-allocated buffer. At least 1 element to confirm C++ standard reqirements
+    _Tp buf[(fixed_size > 0) ? fixed_size : 1];
+};
+
+/**  @brief Sets/resets the break-on-error mode.
+
+When the break-on-error mode is set, the default error handler issues a hardware exception, which
+can make debugging more convenient.
+
+\return the previous state
+ */
+CV_EXPORTS bool setBreakOnError(bool flag);
+
+extern "C" typedef int (*ErrorCallback)( int status, const char* func_name,
+                                       const char* err_msg, const char* file_name,
+                                       int line, void* userdata );
+
+
+/** @brief Sets the new error handler and the optional user data.
+
+  The function sets the new error handler, called from cv::error().
+
+  \param errCallback the new error handler. If NULL, the default error handler is used.
+  \param userdata the optional user data pointer, passed to the callback.
+  \param prevUserdata the optional output parameter where the previous user data pointer is stored
+
+  \return the previous error handler
+*/
+CV_EXPORTS ErrorCallback redirectError( ErrorCallback errCallback, void* userdata=0, void** prevUserdata=0);
+
+/** @brief Returns a text string formatted using the printf-like expression.
+
+The function acts like sprintf but forms and returns an STL string. It can be used to form an error
+message in the Exception constructor.
+@param fmt printf-compatible formatting specifiers.
+ */
+CV_EXPORTS String format( const char* fmt, ... );
+CV_EXPORTS String tempfile( const char* suffix = 0);
+CV_EXPORTS void glob(String pattern, std::vector<String>& result, bool recursive = false);
+
+/** @brief OpenCV will try to set the number of threads for the next parallel region.
+
+If threads == 0, OpenCV will disable threading optimizations and run all it's functions
+sequentially. Passing threads \< 0 will reset threads number to system default. This function must
+be called outside of parallel region.
+
+OpenCV will try to run it's functions with specified threads number, but some behaviour differs from
+framework:
+-   `TBB` – User-defined parallel constructions will run with the same threads number, if
+    another does not specified. If late on user creates own scheduler, OpenCV will be use it.
+-   `OpenMP` – No special defined behaviour.
+-   `Concurrency` – If threads == 1, OpenCV will disable threading optimizations and run it's
+    functions sequentially.
+-   `GCD` – Supports only values \<= 0.
+-   `C=` – No special defined behaviour.
+@param nthreads Number of threads used by OpenCV.
+@sa getNumThreads, getThreadNum
+ */
+CV_EXPORTS_W void setNumThreads(int nthreads);
+
+/** @brief Returns the number of threads used by OpenCV for parallel regions.
+
+Always returns 1 if OpenCV is built without threading support.
+
+The exact meaning of return value depends on the threading framework used by OpenCV library:
+- `TBB` – The number of threads, that OpenCV will try to use for parallel regions. If there is
+  any tbb::thread_scheduler_init in user code conflicting with OpenCV, then function returns
+  default number of threads used by TBB library.
+- `OpenMP` – An upper bound on the number of threads that could be used to form a new team.
+- `Concurrency` – The number of threads, that OpenCV will try to use for parallel regions.
+- `GCD` – Unsupported; returns the GCD thread pool limit (512) for compatibility.
+- `C=` – The number of threads, that OpenCV will try to use for parallel regions, if before
+  called setNumThreads with threads \> 0, otherwise returns the number of logical CPUs,
+  available for the process.
+@sa setNumThreads, getThreadNum
+ */
+CV_EXPORTS_W int getNumThreads();
+
+/** @brief Returns the index of the currently executed thread within the current parallel region. Always
+returns 0 if called outside of parallel region.
+
+The exact meaning of return value depends on the threading framework used by OpenCV library:
+- `TBB` – Unsupported with current 4.1 TBB release. May be will be supported in future.
+- `OpenMP` – The thread number, within the current team, of the calling thread.
+- `Concurrency` – An ID for the virtual processor that the current context is executing on (0
+  for master thread and unique number for others, but not necessary 1,2,3,...).
+- `GCD` – System calling thread's ID. Never returns 0 inside parallel region.
+- `C=` – The index of the current parallel task.
+@sa setNumThreads, getNumThreads
+ */
+CV_EXPORTS_W int getThreadNum();
+
+/** @brief Returns full configuration time cmake output.
+
+Returned value is raw cmake output including version control system revision, compiler version,
+compiler flags, enabled modules and third party libraries, etc. Output format depends on target
+architecture.
+ */
+CV_EXPORTS_W const String& getBuildInformation();
+
+/** @brief Returns the number of ticks.
+
+The function returns the number of ticks after the certain event (for example, when the machine was
+turned on). It can be used to initialize RNG or to measure a function execution time by reading the
+tick count before and after the function call.
+@sa getTickFrequency, TickMeter
+ */
+CV_EXPORTS_W int64 getTickCount();
+
+/** @brief Returns the number of ticks per second.
+
+The function returns the number of ticks per second. That is, the following code computes the
+execution time in seconds:
+@code
+    double t = (double)getTickCount();
+    // do something ...
+    t = ((double)getTickCount() - t)/getTickFrequency();
+@endcode
+@sa getTickCount, TickMeter
+ */
+CV_EXPORTS_W double getTickFrequency();
+
+/** @brief a Class to measure passing time.
+
+The class computes passing time by counting the number of ticks per second. That is, the following code computes the
+execution time in seconds:
+@code
+TickMeter tm;
+tm.start();
+// do something ...
+tm.stop();
+std::cout << tm.getTimeSec();
+@endcode
+@sa getTickCount, getTickFrequency
+*/
+
+class CV_EXPORTS_W TickMeter
+{
+public:
+    //! the default constructor
+    CV_WRAP TickMeter()
+    {
+    reset();
+    }
+
+    /**
+    starts counting ticks.
+    */
+    CV_WRAP void start()
+    {
+    startTime = cv::getTickCount();
+    }
+
+    /**
+    stops counting ticks.
+    */
+    CV_WRAP void stop()
+    {
+    int64 time = cv::getTickCount();
+    if (startTime == 0)
+    return;
+    ++counter;
+    sumTime += (time - startTime);
+    startTime = 0;
+    }
+
+    /**
+    returns counted ticks.
+    */
+    CV_WRAP int64 getTimeTicks() const
+    {
+    return sumTime;
+    }
+
+    /**
+    returns passed time in microseconds.
+    */
+    CV_WRAP double getTimeMicro() const
+    {
+    return getTimeMilli()*1e3;
+    }
+
+    /**
+    returns passed time in milliseconds.
+    */
+    CV_WRAP double getTimeMilli() const
+    {
+    return getTimeSec()*1e3;
+    }
+
+    /**
+    returns passed time in seconds.
+    */
+    CV_WRAP double getTimeSec()   const
+    {
+    return (double)getTimeTicks() / getTickFrequency();
+    }
+
+    /**
+    returns internal counter value.
+    */
+    CV_WRAP int64 getCounter() const
+    {
+    return counter;
+    }
+
+    /**
+    resets internal values.
+    */
+    CV_WRAP void reset()
+    {
+    startTime = 0;
+    sumTime = 0;
+    counter = 0;
+    }
+
+private:
+    int64 counter;
+    int64 sumTime;
+    int64 startTime;
+};
+
+/** @brief output operator
+@code
+TickMeter tm;
+tm.start();
+// do something ...
+tm.stop();
+std::cout << tm;
+@endcode
+*/
+
+static inline
+std::ostream& operator << (std::ostream& out, const TickMeter& tm)
+{
+    return out << tm.getTimeSec() << "sec";
+}
+
+/** @brief Returns the number of CPU ticks.
+
+The function returns the current number of CPU ticks on some architectures (such as x86, x64,
+PowerPC). On other platforms the function is equivalent to getTickCount. It can also be used for
+very accurate time measurements, as well as for RNG initialization. Note that in case of multi-CPU
+systems a thread, from which getCPUTickCount is called, can be suspended and resumed at another CPU
+with its own counter. So, theoretically (and practically) the subsequent calls to the function do
+not necessary return the monotonously increasing values. Also, since a modern CPU varies the CPU
+frequency depending on the load, the number of CPU clocks spent in some code cannot be directly
+converted to time units. Therefore, getTickCount is generally a preferable solution for measuring
+execution time.
+ */
+CV_EXPORTS_W int64 getCPUTickCount();
+
+/** @brief Returns true if the specified feature is supported by the host hardware.
+
+The function returns true if the host hardware supports the specified feature. When user calls
+setUseOptimized(false), the subsequent calls to checkHardwareSupport() will return false until
+setUseOptimized(true) is called. This way user can dynamically switch on and off the optimized code
+in OpenCV.
+@param feature The feature of interest, one of cv::CpuFeatures
+ */
+CV_EXPORTS_W bool checkHardwareSupport(int feature);
+
+/** @brief Returns the number of logical CPUs available for the process.
+ */
+CV_EXPORTS_W int getNumberOfCPUs();
+
+
+/** @brief Aligns a pointer to the specified number of bytes.
+
+The function returns the aligned pointer of the same type as the input pointer:
+\f[\texttt{(_Tp*)(((size_t)ptr + n-1) & -n)}\f]
+@param ptr Aligned pointer.
+@param n Alignment size that must be a power of two.
+ */
+template<typename _Tp> static inline _Tp* alignPtr(_Tp* ptr, int n=(int)sizeof(_Tp))
+{
+    return (_Tp*)(((size_t)ptr + n-1) & -n);
+}
+
+/** @brief Aligns a buffer size to the specified number of bytes.
+
+The function returns the minimum number that is greater or equal to sz and is divisible by n :
+\f[\texttt{(sz + n-1) & -n}\f]
+@param sz Buffer size to align.
+@param n Alignment size that must be a power of two.
+ */
+static inline size_t alignSize(size_t sz, int n)
+{
+    CV_DbgAssert((n & (n - 1)) == 0); // n is a power of 2
+    return (sz + n-1) & -n;
+}
+
+/** @brief Enables or disables the optimized code.
+
+The function can be used to dynamically turn on and off optimized code (code that uses SSE2, AVX,
+and other instructions on the platforms that support it). It sets a global flag that is further
+checked by OpenCV functions. Since the flag is not checked in the inner OpenCV loops, it is only
+safe to call the function on the very top level in your application where you can be sure that no
+other OpenCV function is currently executed.
+
+By default, the optimized code is enabled unless you disable it in CMake. The current status can be
+retrieved using useOptimized.
+@param onoff The boolean flag specifying whether the optimized code should be used (onoff=true)
+or not (onoff=false).
+ */
+CV_EXPORTS_W void setUseOptimized(bool onoff);
+
+/** @brief Returns the status of optimized code usage.
+
+The function returns true if the optimized code is enabled. Otherwise, it returns false.
+ */
+CV_EXPORTS_W bool useOptimized();
+
+static inline size_t getElemSize(int type) { return CV_ELEM_SIZE(type); }
+
+/////////////////////////////// Parallel Primitives //////////////////////////////////
+
+/** @brief Base class for parallel data processors
+*/
+class CV_EXPORTS ParallelLoopBody
+{
+public:
+    virtual ~ParallelLoopBody();
+    virtual void operator() (const Range& range) const = 0;
+};
+
+/** @brief Parallel data processor
+*/
+CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes=-1.);
+
+/////////////////////////////// forEach method of cv::Mat ////////////////////////////
+template<typename _Tp, typename Functor> inline
+void Mat::forEach_impl(const Functor& operation) {
+    if (false) {
+        operation(*reinterpret_cast<_Tp*>(0), reinterpret_cast<int*>(0));
+        // If your compiler fail in this line.
+        // Please check that your functor signature is
+        //     (_Tp&, const int*)   <- multidimential
+        //  or (_Tp&, void*)        <- in case of you don't need current idx.
+    }
+
+    CV_Assert(this->total() / this->size[this->dims - 1] <= INT_MAX);
+    const int LINES = static_cast<int>(this->total() / this->size[this->dims - 1]);
+
+    class PixelOperationWrapper :public ParallelLoopBody
+    {
+    public:
+        PixelOperationWrapper(Mat_<_Tp>* const frame, const Functor& _operation)
+            : mat(frame), op(_operation) {}
+        virtual ~PixelOperationWrapper(){}
+        // ! Overloaded virtual operator
+        // convert range call to row call.
+        virtual void operator()(const Range &range) const {
+            const int DIMS = mat->dims;
+            const int COLS = mat->size[DIMS - 1];
+            if (DIMS <= 2) {
+                for (int row = range.start; row < range.end; ++row) {
+                    this->rowCall2(row, COLS);
+                }
+            } else {
+                std::vector<int> idx(COLS); /// idx is modified in this->rowCall
+                idx[DIMS - 2] = range.start - 1;
+
+                for (int line_num = range.start; line_num < range.end; ++line_num) {
+                    idx[DIMS - 2]++;
+                    for (int i = DIMS - 2; i >= 0; --i) {
+                        if (idx[i] >= mat->size[i]) {
+                            idx[i - 1] += idx[i] / mat->size[i];
+                            idx[i] %= mat->size[i];
+                            continue; // carry-over;
+                        }
+                        else {
+                            break;
+                        }
+                    }
+                    this->rowCall(&idx[0], COLS, DIMS);
+                }
+            }
+        }
+    private:
+        Mat_<_Tp>* const mat;
+        const Functor op;
+        // ! Call operator for each elements in this row.
+        inline void rowCall(int* const idx, const int COLS, const int DIMS) const {
+            int &col = idx[DIMS - 1];
+            col = 0;
+            _Tp* pixel = &(mat->template at<_Tp>(idx));
+
+            while (col < COLS) {
+                op(*pixel, const_cast<const int*>(idx));
+                pixel++; col++;
+            }
+            col = 0;
+        }
+        // ! Call operator for each elements in this row. 2d mat special version.
+        inline void rowCall2(const int row, const int COLS) const {
+            union Index{
+                int body[2];
+                operator const int*() const {
+                    return reinterpret_cast<const int*>(this);
+                }
+                int& operator[](const int i) {
+                    return body[i];
+                }
+            } idx = {{row, 0}};
+            // Special union is needed to avoid
+            // "error: array subscript is above array bounds [-Werror=array-bounds]"
+            // when call the functor `op` such that access idx[3].
+
+            _Tp* pixel = &(mat->template at<_Tp>(idx));
+            const _Tp* const pixel_end = pixel + COLS;
+            while(pixel < pixel_end) {
+                op(*pixel++, static_cast<const int*>(idx));
+                idx[1]++;
+            }
+        }
+        PixelOperationWrapper& operator=(const PixelOperationWrapper &) {
+            CV_Assert(false);
+            // We can not remove this implementation because Visual Studio warning C4822.
+            return *this;
+        }
+    };
+
+    parallel_for_(cv::Range(0, LINES), PixelOperationWrapper(reinterpret_cast<Mat_<_Tp>*>(this), operation));
+}
+
+/////////////////////////// Synchronization Primitives ///////////////////////////////
+
+class CV_EXPORTS Mutex
+{
+public:
+    Mutex();
+    ~Mutex();
+    Mutex(const Mutex& m);
+    Mutex& operator = (const Mutex& m);
+
+    void lock();
+    bool trylock();
+    void unlock();
+
+    struct Impl;
+protected:
+    Impl* impl;
+};
+
+class CV_EXPORTS AutoLock
+{
+public:
+    AutoLock(Mutex& m) : mutex(&m) { mutex->lock(); }
+    ~AutoLock() { mutex->unlock(); }
+protected:
+    Mutex* mutex;
+private:
+    AutoLock(const AutoLock&);
+    AutoLock& operator = (const AutoLock&);
+};
+
+// TLS interface
+class CV_EXPORTS TLSDataContainer
+{
+protected:
+    TLSDataContainer();
+    virtual ~TLSDataContainer();
+
+    void  gatherData(std::vector<void*> &data) const;
+#if OPENCV_ABI_COMPATIBILITY > 300
+    void* getData() const;
+    void  release();
+
+private:
+#else
+    void  release();
+
+public:
+    void* getData() const;
+#endif
+    virtual void* createDataInstance() const = 0;
+    virtual void  deleteDataInstance(void* pData) const = 0;
+
+    int key_;
+};
+
+// Main TLS data class
+template <typename T>
+class TLSData : protected TLSDataContainer
+{
+public:
+    inline TLSData()        {}
+    inline ~TLSData()       { release();            } // Release key and delete associated data
+    inline T* get() const   { return (T*)getData(); } // Get data assosiated with key
+
+     // Get data from all threads
+    inline void gather(std::vector<T*> &data) const
+    {
+        std::vector<void*> &dataVoid = reinterpret_cast<std::vector<void*>&>(data);
+        gatherData(dataVoid);
+    }
+
+private:
+    virtual void* createDataInstance() const {return new T;}                // Wrapper to allocate data by template
+    virtual void  deleteDataInstance(void* pData) const {delete (T*)pData;} // Wrapper to release data by template
+
+    // Disable TLS copy operations
+    TLSData(TLSData &) {}
+    TLSData& operator =(const TLSData &) {return *this;}
+};
+
+/** @brief Designed for command line parsing
+
+The sample below demonstrates how to use CommandLineParser:
+@code
+    CommandLineParser parser(argc, argv, keys);
+    parser.about("Application name v1.0.0");
+
+    if (parser.has("help"))
+    {
+        parser.printMessage();
+        return 0;
+    }
+
+    int N = parser.get<int>("N");
+    double fps = parser.get<double>("fps");
+    String path = parser.get<String>("path");
+
+    use_time_stamp = parser.has("timestamp");
+
+    String img1 = parser.get<String>(0);
+    String img2 = parser.get<String>(1);
+
+    int repeat = parser.get<int>(2);
+
+    if (!parser.check())
+    {
+        parser.printErrors();
+        return 0;
+    }
+@endcode
+
+### Keys syntax
+
+The keys parameter is a string containing several blocks, each one is enclosed in curley braces and
+describes one argument. Each argument contains three parts separated by the `|` symbol:
+
+-# argument names is a space-separated list of option synonyms (to mark argument as positional, prefix it with the `@` symbol)
+-# default value will be used if the argument was not provided (can be empty)
+-# help message (can be empty)
+
+For example:
+
+@code{.cpp}
+    const String keys =
+        "{help h usage ? |      | print this message   }"
+        "{@image1        |      | image1 for compare   }"
+        "{@image2        |<none>| image2 for compare   }"
+        "{@repeat        |1     | number               }"
+        "{path           |.     | path to file         }"
+        "{fps            | -1.0 | fps for output video }"
+        "{N count        |100   | count of objects     }"
+        "{ts timestamp   |      | use time stamp       }"
+        ;
+}
+@endcode
+
+Note that there are no default values for `help` and `timestamp` so we can check their presence using the `has()` method.
+Arguments with default values are considered to be always present. Use the `get()` method in these cases to check their
+actual value instead.
+
+String keys like `get<String>("@image1")` return the empty string `""` by default - even with an empty default value.
+Use the special `<none>` default value to enforce that the returned string must not be empty. (like in `get<String>("@image2")`)
+
+### Usage
+
+For the described keys:
+
+@code{.sh}
+    # Good call (3 positional parameters: image1, image2 and repeat; N is 200, ts is true)
+    $ ./app -N=200 1.png 2.jpg 19 -ts
+
+    # Bad call
+    $ ./app -fps=aaa
+    ERRORS:
+    Parameter 'fps': can not convert: [aaa] to [double]
+@endcode
+ */
+class CV_EXPORTS CommandLineParser
+{
+public:
+
+    /** @brief Constructor
+
+    Initializes command line parser object
+
+    @param argc number of command line arguments (from main())
+    @param argv array of command line arguments (from main())
+    @param keys string describing acceptable command line parameters (see class description for syntax)
+    */
+    CommandLineParser(int argc, const char* const argv[], const String& keys);
+
+    /** @brief Copy constructor */
+    CommandLineParser(const CommandLineParser& parser);
+
+    /** @brief Assignment operator */
+    CommandLineParser& operator = (const CommandLineParser& parser);
+
+    /** @brief Destructor */
+    ~CommandLineParser();
+
+    /** @brief Returns application path
+
+    This method returns the path to the executable from the command line (`argv[0]`).
+
+    For example, if the application has been started with such command:
+    @code{.sh}
+    $ ./bin/my-executable
+    @endcode
+    this method will return `./bin`.
+    */
+    String getPathToApplication() const;
+
+    /** @brief Access arguments by name
+
+    Returns argument converted to selected type. If the argument is not known or can not be
+    converted to selected type, the error flag is set (can be checked with @ref check).
+
+    For example, define:
+    @code{.cpp}
+    String keys = "{N count||}";
+    @endcode
+
+    Call:
+    @code{.sh}
+    $ ./my-app -N=20
+    # or
+    $ ./my-app --count=20
+    @endcode
+
+    Access:
+    @code{.cpp}
+    int N = parser.get<int>("N");
+    @endcode
+
+    @param name name of the argument
+    @param space_delete remove spaces from the left and right of the string
+    @tparam T the argument will be converted to this type if possible
+
+    @note You can access positional arguments by their `@`-prefixed name:
+    @code{.cpp}
+    parser.get<String>("@image");
+    @endcode
+     */
+    template <typename T>
+    T get(const String& name, bool space_delete = true) const
+    {
+        T val = T();
+        getByName(name, space_delete, ParamType<T>::type, (void*)&val);
+        return val;
+    }
+
+    /** @brief Access positional arguments by index
+
+    Returns argument converted to selected type. Indexes are counted from zero.
+
+    For example, define:
+    @code{.cpp}
+    String keys = "{@arg1||}{@arg2||}"
+    @endcode
+
+    Call:
+    @code{.sh}
+    ./my-app abc qwe
+    @endcode
+
+    Access arguments:
+    @code{.cpp}
+    String val_1 = parser.get<String>(0); // returns "abc", arg1
+    String val_2 = parser.get<String>(1); // returns "qwe", arg2
+    @endcode
+
+    @param index index of the argument
+    @param space_delete remove spaces from the left and right of the string
+    @tparam T the argument will be converted to this type if possible
+     */
+    template <typename T>
+    T get(int index, bool space_delete = true) const
+    {
+        T val = T();
+        getByIndex(index, space_delete, ParamType<T>::type, (void*)&val);
+        return val;
+    }
+
+    /** @brief Check if field was provided in the command line
+
+    @param name argument name to check
+    */
+    bool has(const String& name) const;
+
+    /** @brief Check for parsing errors
+
+    Returns true if error occured while accessing the parameters (bad conversion, missing arguments,
+    etc.). Call @ref printErrors to print error messages list.
+     */
+    bool check() const;
+
+    /** @brief Set the about message
+
+    The about message will be shown when @ref printMessage is called, right before arguments table.
+     */
+    void about(const String& message);
+
+    /** @brief Print help message
+
+    This method will print standard help message containing the about message and arguments description.
+
+    @sa about
+    */
+    void printMessage() const;
+
+    /** @brief Print list of errors occured
+
+    @sa check
+    */
+    void printErrors() const;
+
+protected:
+    void getByName(const String& name, bool space_delete, int type, void* dst) const;
+    void getByIndex(int index, bool space_delete, int type, void* dst) const;
+
+    struct Impl;
+    Impl* impl;
+};
+
+//! @} core_utils
+
+//! @cond IGNORED
+
+/////////////////////////////// AutoBuffer implementation ////////////////////////////////////////
+
+template<typename _Tp, size_t fixed_size> inline
+AutoBuffer<_Tp, fixed_size>::AutoBuffer()
+{
+    ptr = buf;
+    sz = fixed_size;
+}
+
+template<typename _Tp, size_t fixed_size> inline
+AutoBuffer<_Tp, fixed_size>::AutoBuffer(size_t _size)
+{
+    ptr = buf;
+    sz = fixed_size;
+    allocate(_size);
+}
+
+template<typename _Tp, size_t fixed_size> inline
+AutoBuffer<_Tp, fixed_size>::AutoBuffer(const AutoBuffer<_Tp, fixed_size>& abuf )
+{
+    ptr = buf;
+    sz = fixed_size;
+    allocate(abuf.size());
+    for( size_t i = 0; i < sz; i++ )
+        ptr[i] = abuf.ptr[i];
+}
+
+template<typename _Tp, size_t fixed_size> inline AutoBuffer<_Tp, fixed_size>&
+AutoBuffer<_Tp, fixed_size>::operator = (const AutoBuffer<_Tp, fixed_size>& abuf)
+{
+    if( this != &abuf )
+    {
+        deallocate();
+        allocate(abuf.size());
+        for( size_t i = 0; i < sz; i++ )
+            ptr[i] = abuf.ptr[i];
+    }
+    return *this;
+}
+
+template<typename _Tp, size_t fixed_size> inline
+AutoBuffer<_Tp, fixed_size>::~AutoBuffer()
+{ deallocate(); }
+
+template<typename _Tp, size_t fixed_size> inline void
+AutoBuffer<_Tp, fixed_size>::allocate(size_t _size)
+{
+    if(_size <= sz)
+    {
+        sz = _size;
+        return;
+    }
+    deallocate();
+    sz = _size;
+    if(_size > fixed_size)
+    {
+        ptr = new _Tp[_size];
+    }
+}
+
+template<typename _Tp, size_t fixed_size> inline void
+AutoBuffer<_Tp, fixed_size>::deallocate()
+{
+    if( ptr != buf )
+    {
+        delete[] ptr;
+        ptr = buf;
+        sz = fixed_size;
+    }
+}
+
+template<typename _Tp, size_t fixed_size> inline void
+AutoBuffer<_Tp, fixed_size>::resize(size_t _size)
+{
+    if(_size <= sz)
+    {
+        sz = _size;
+        return;
+    }
+    size_t i, prevsize = sz, minsize = MIN(prevsize, _size);
+    _Tp* prevptr = ptr;
+
+    ptr = _size > fixed_size ? new _Tp[_size] : buf;
+    sz = _size;
+
+    if( ptr != prevptr )
+        for( i = 0; i < minsize; i++ )
+            ptr[i] = prevptr[i];
+    for( i = prevsize; i < _size; i++ )
+        ptr[i] = _Tp();
+
+    if( prevptr != buf )
+        delete[] prevptr;
+}
+
+template<typename _Tp, size_t fixed_size> inline size_t
+AutoBuffer<_Tp, fixed_size>::size() const
+{ return sz; }
+
+template<typename _Tp, size_t fixed_size> inline
+AutoBuffer<_Tp, fixed_size>::operator _Tp* ()
+{ return ptr; }
+
+template<typename _Tp, size_t fixed_size> inline
+AutoBuffer<_Tp, fixed_size>::operator const _Tp* () const
+{ return ptr; }
+
+#ifndef OPENCV_NOSTL
+template<> inline std::string CommandLineParser::get<std::string>(int index, bool space_delete) const
+{
+    return get<String>(index, space_delete);
+}
+template<> inline std::string CommandLineParser::get<std::string>(const String& name, bool space_delete) const
+{
+    return get<String>(name, space_delete);
+}
+#endif // OPENCV_NOSTL
+
+//! @endcond
+
+
+// Basic Node class for tree building
+template<class OBJECT>
+class CV_EXPORTS Node
+{
+public:
+    Node()
+    {
+        m_pParent  = 0;
+    }
+    Node(OBJECT& payload) : m_payload(payload)
+    {
+        m_pParent  = 0;
+    }
+    ~Node()
+    {
+        removeChilds();
+        if (m_pParent)
+        {
+            int idx = m_pParent->findChild(this);
+            if (idx >= 0)
+                m_pParent->m_childs.erase(m_pParent->m_childs.begin() + idx);
+        }
+    }
+
+    Node<OBJECT>* findChild(OBJECT& payload) const
+    {
+        for(size_t i = 0; i < this->m_childs.size(); i++)
+        {
+            if(this->m_childs[i]->m_payload == payload)
+                return this->m_childs[i];
+        }
+        return NULL;
+    }
+
+    int findChild(Node<OBJECT> *pNode) const
+    {
+        for (size_t i = 0; i < this->m_childs.size(); i++)
+        {
+            if(this->m_childs[i] == pNode)
+                return (int)i;
+        }
+        return -1;
+    }
+
+    void addChild(Node<OBJECT> *pNode)
+    {
+        if(!pNode)
+            return;
+
+        CV_Assert(pNode->m_pParent == 0);
+        pNode->m_pParent = this;
+        this->m_childs.push_back(pNode);
+    }
+
+    void removeChilds()
+    {
+        for(size_t i = 0; i < m_childs.size(); i++)
+        {
+            m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming
+            delete m_childs[i];
+        }
+        m_childs.clear();
+    }
+
+    int getDepth()
+    {
+        int   count   = 0;
+        Node *pParent = m_pParent;
+        while(pParent) count++, pParent = pParent->m_pParent;
+        return count;
+    }
+
+public:
+    OBJECT                     m_payload;
+    Node<OBJECT>*              m_pParent;
+    std::vector<Node<OBJECT>*> m_childs;
+};
+
+// Instrumentation external interface
+namespace instr
+{
+
+#if !defined OPENCV_ABI_CHECK
+
+enum TYPE
+{
+    TYPE_GENERAL = 0,   // OpenCV API function, e.g. exported function
+    TYPE_MARKER,        // Information marker
+    TYPE_WRAPPER,       // Wrapper function for implementation
+    TYPE_FUN,           // Simple function call
+};
+
+enum IMPL
+{
+    IMPL_PLAIN = 0,
+    IMPL_IPP,
+    IMPL_OPENCL,
+};
+
+struct NodeDataTls
+{
+    NodeDataTls()
+    {
+        m_ticksTotal = 0;
+    }
+    uint64      m_ticksTotal;
+};
+
+class CV_EXPORTS NodeData
+{
+public:
+    NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, void* retAddress = NULL, bool alwaysExpand = false, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN);
+    NodeData(NodeData &ref);
+    ~NodeData();
+    NodeData& operator=(const NodeData&);
+
+    cv::String          m_funName;
+    cv::instr::TYPE     m_instrType;
+    cv::instr::IMPL     m_implType;
+    const char*         m_fileName;
+    int                 m_lineNum;
+    void*               m_retAddress;
+    bool                m_alwaysExpand;
+    bool                m_funError;
+
+    volatile int         m_counter;
+    volatile uint64      m_ticksTotal;
+    TLSData<NodeDataTls> m_tls;
+    int                  m_threads;
+
+    // No synchronization
+    double getTotalMs()   const { return ((double)m_ticksTotal / cv::getTickFrequency()) * 1000; }
+    double getMeanMs()    const { return (((double)m_ticksTotal/m_counter) / cv::getTickFrequency()) * 1000; }
+};
+bool operator==(const NodeData& lhs, const NodeData& rhs);
+
+typedef Node<NodeData> InstrNode;
+
+CV_EXPORTS InstrNode* getTrace();
+
+#endif // !defined OPENCV_ABI_CHECK
+
+
+CV_EXPORTS bool       useInstrumentation();
+CV_EXPORTS void       setUseInstrumentation(bool flag);
+CV_EXPORTS void       resetTrace();
+
+enum FLAGS
+{
+    FLAGS_NONE              = 0,
+    FLAGS_MAPPING           = 0x01,
+    FLAGS_EXPAND_SAME_NAMES = 0x02,
+};
+
+CV_EXPORTS void       setFlags(FLAGS modeFlags);
+static inline void    setFlags(int modeFlags) { setFlags((FLAGS)modeFlags); }
+CV_EXPORTS FLAGS      getFlags();
+}
+
+} //namespace cv
+
+#ifndef DISABLE_OPENCV_24_COMPATIBILITY
+#include "opencv2/core/core_c.h"
+#endif
+
+#endif //OPENCV_CORE_UTILITY_H
diff --git a/thirdparty/linux/include/opencv2/core/va_intel.hpp b/thirdparty/linux/include/opencv2/core/va_intel.hpp
new file mode 100644
index 0000000..3325848
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/va_intel.hpp
@@ -0,0 +1,77 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2015, Itseez, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#ifndef OPENCV_CORE_VA_INTEL_HPP
+#define OPENCV_CORE_VA_INTEL_HPP
+
+#ifndef __cplusplus
+#  error va_intel.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core.hpp"
+#include "ocl.hpp"
+
+#if defined(HAVE_VA)
+# include "va/va.h"
+#else  // HAVE_VA
+# if !defined(_VA_H_)
+    typedef void* VADisplay;
+    typedef unsigned int VASurfaceID;
+# endif // !_VA_H_
+#endif // HAVE_VA
+
+namespace cv { namespace va_intel {
+
+/** @addtogroup core_va_intel
+This section describes Intel VA-API/OpenCL (CL-VA) interoperability.
+
+To enable CL-VA interoperability support, configure OpenCV using CMake with WITH_VA_INTEL=ON . Currently VA-API is
+supported on Linux only. You should also install Intel Media Server Studio (MSS) to use this feature. You may
+have to specify the path(s) to MSS components for cmake in environment variables: VA_INTEL_MSDK_ROOT for Media SDK
+(default is "/opt/intel/mediasdk"), and VA_INTEL_IOCL_ROOT for Intel OpenCL (default is "/opt/intel/opencl").
+
+To use CL-VA interoperability you should first create VADisplay (libva), and then call initializeContextFromVA()
+function to create OpenCL context and set up interoperability.
+*/
+//! @{
+
+/////////////////// CL-VA Interoperability Functions ///////////////////
+
+namespace ocl {
+using namespace cv::ocl;
+
+// TODO static functions in the Context class
+/** @brief Creates OpenCL context from VA.
+@param display    - VADisplay for which CL interop should be established.
+@param tryInterop - try to set up for interoperability, if true; set up for use slow copy if false.
+@return Returns reference to OpenCL Context
+ */
+CV_EXPORTS Context& initializeContextFromVA(VADisplay display, bool tryInterop = true);
+
+} // namespace cv::va_intel::ocl
+
+/** @brief Converts InputArray to VASurfaceID object.
+@param display - VADisplay object.
+@param src     - source InputArray.
+@param surface - destination VASurfaceID object.
+@param size    - size of image represented by VASurfaceID object.
+ */
+CV_EXPORTS void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface, Size size);
+
+/** @brief Converts VASurfaceID object to OutputArray.
+@param display - VADisplay object.
+@param surface - source VASurfaceID object.
+@param size    - size of image represented by VASurfaceID object.
+@param dst     - destination OutputArray.
+ */
+CV_EXPORTS void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, OutputArray dst);
+
+//! @}
+
+}} // namespace cv::va_intel
+
+#endif /* OPENCV_CORE_VA_INTEL_HPP */
diff --git a/thirdparty/linux/include/opencv2/core/version.hpp b/thirdparty/linux/include/opencv2/core/version.hpp
new file mode 100644
index 0000000..85c12d8
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/version.hpp
@@ -0,0 +1,71 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright( C) 2000-2015, Intel Corporation, all rights reserved.
+// Copyright (C) 2011-2013, NVIDIA Corporation, all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+//(including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort(including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+/*
+  definition of the current version of OpenCV
+  Usefull to test in user programs
+*/
+
+#ifndef OPENCV_VERSION_HPP
+#define OPENCV_VERSION_HPP
+
+#define CV_VERSION_MAJOR    3
+#define CV_VERSION_MINOR    2
+#define CV_VERSION_REVISION 0
+#define CV_VERSION_STATUS   "-dev"
+
+#define CVAUX_STR_EXP(__A)  #__A
+#define CVAUX_STR(__A)      CVAUX_STR_EXP(__A)
+
+#define CVAUX_STRW_EXP(__A)  L ## #__A
+#define CVAUX_STRW(__A)      CVAUX_STRW_EXP(__A)
+
+#define CV_VERSION          CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR) "." CVAUX_STR(CV_VERSION_REVISION) CV_VERSION_STATUS
+
+/* old  style version constants*/
+#define CV_MAJOR_VERSION    CV_VERSION_MAJOR
+#define CV_MINOR_VERSION    CV_VERSION_MINOR
+#define CV_SUBMINOR_VERSION CV_VERSION_REVISION
+
+#endif
diff --git a/thirdparty/linux/include/opencv2/core/wimage.hpp b/thirdparty/linux/include/opencv2/core/wimage.hpp
new file mode 100644
index 0000000..b246c89
--- /dev/null
+++ b/thirdparty/linux/include/opencv2/core/wimage.hpp
@@ -0,0 +1,603 @@
+/*M//////////////////////////////////////////////////////////////////////////////
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to
+//  this license.  If you do not agree to this license, do not download,
+//  install, copy or use the software.
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2008, Google, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//  * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//  * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//  * The name of Intel Corporation or contributors may not be used to endorse
+//     or promote products derived from this software without specific
+//     prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is"
+// and any express or implied warranties, including, but not limited to, the
+// implied warranties of merchantability and fitness for a particular purpose
+// are disclaimed. In no event shall the Intel Corporation or contributors be
+// liable for any direct, indirect, incidental, special, exemplary, or
+// consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+/////////////////////////////////////////////////////////////////////////////////
+//M*/
+
+#ifndef OPENCV_CORE_WIMAGE_HPP
+#define OPENCV_CORE_WIMAGE_HPP
+
+#include "opencv2/core/core_c.h"
+
+#ifdef __cplusplus
+
+namespace cv {
+
+//! @addtogroup core
+//! @{
+
+template <typename T> class WImage;
+template <typename T> class WImageBuffer;
+template <typename T> class WImageView;
+
+template<typename T, int C> class WImageC;
+template<typename T, int C> class WImageBufferC;
+template<typename T, int C> class WImageViewC;
+
+// Commonly used typedefs.
+typedef WImage<uchar>            WImage_b;
+typedef WImageView<uchar>        WImageView_b;
+typedef WImageBuffer<uchar>      WImageBuffer_b;
+
+typedef WImageC<uchar, 1>        WImage1_b;
+typedef WImageViewC<uchar, 1>    WImageView1_b;
+typedef WImageBufferC<uchar, 1>  WImageBuffer1_b;
+
+typedef WImageC<uchar, 3>        WImage3_b;
+typedef WImageViewC<uchar, 3>    WImageView3_b;
+typedef WImageBufferC<uchar, 3>  WImageBuffer3_b;
+
+typedef WImage<float>            WImage_f;
+typedef WImageView<float>        WImageView_f;
+typedef WImageBuffer<float>      WImageBuffer_f;
+
+typedef WImageC<float, 1>        WImage1_f;
+typedef WImageViewC<float, 1>    WImageView1_f;
+typedef WImageBufferC<float, 1>  WImageBuffer1_f;
+
+typedef WImageC<float, 3>        WImage3_f;
+typedef WImageViewC<float, 3>    WImageView3_f;
+typedef WImageBufferC<float, 3>  WImageBuffer3_f;
+
+// There isn't a standard for signed and unsigned short so be more
+// explicit in the typename for these cases.
+typedef WImage<short>            WImage_16s;
+typedef WImageView<short>        WImageView_16s;
+typedef WImageBuffer<short>      WImageBuffer_16s;
+
+typedef WImageC<short, 1>        WImage1_16s;
+typedef WImageViewC<short, 1>    WImageView1_16s;
+typedef WImageBufferC<short, 1>  WImageBuffer1_16s;
+
+typedef WImageC<short, 3>        WImage3_16s;
+typedef WImageViewC<short, 3>    WImageView3_16s;
+typedef WImageBufferC<short, 3>  WImageBuffer3_16s;
+
+typedef WImage<ushort>            WImage_16u;
+typedef WImageView<ushort>        WImageView_16u;
+typedef WImageBuffer<ushort>      WImageBuffer_16u;
+
+typedef WImageC<ushort, 1>        WImage1_16u;
+typedef WImageViewC<ushort, 1>    WImageView1_16u;
+typedef WImageBufferC<ushort, 1>  WImageBuffer1_16u;
+
+typedef WImageC<ushort, 3>        WImage3_16u;
+typedef WImageViewC<ushort, 3>    WImageView3_16u;
+typedef WImageBufferC<ushort, 3>  WImageBuffer3_16u;
+
+/** @brief Image class which provides a thin layer around an IplImage.
+
+The goals of the class design are:
+
+    -# All the data has explicit ownership to avoid memory leaks
+    -# No hidden allocations or copies for performance.
+    -# Easy access to OpenCV methods (which will access IPP if available)
+    -# Can easily treat external data as an image
+    -# Easy to create images which are subsets of other images
+    -# Fast pixel access which can take advantage of number of channels if known at compile time.
+
+The WImage class is the image class which provides the data accessors. The 'W' comes from the fact
+that it is also a wrapper around the popular but inconvenient IplImage class. A WImage can be
+constructed either using a WImageBuffer class which allocates and frees the data, or using a
+WImageView class which constructs a subimage or a view into external data. The view class does no
+memory management. Each class actually has two versions, one when the number of channels is known
+at compile time and one when it isn't. Using the one with the number of channels specified can
+provide some compile time optimizations by using the fact that the number of channels is a
+constant.
+
+We use the convention (c,r) to refer to column c and row r with (0,0) being the upper left corner.
+This is similar to standard Euclidean coordinates with the first coordinate varying in the
+horizontal direction and the second coordinate varying in the vertical direction. Thus (c,r) is
+usually in the domain [0, width) X [0, height)
+
+Example usage:
+@code
+WImageBuffer3_b  im(5,7);  // Make a 5X7 3 channel image of type uchar
+WImageView3_b  sub_im(im, 2,2, 3,3); // 3X3 submatrix
+vector<float> vec(10, 3.0f);
+WImageView1_f user_im(&vec[0], 2, 5);  // 2X5 image w/ supplied data
+
+im.SetZero();  // same as cvSetZero(im.Ipl())
+*im(2, 3) = 15;  // Modify the element at column 2, row 3
+MySetRand(&sub_im);
+
+// Copy the second row into the first.  This can be done with no memory
+// allocation and will use SSE if IPP is available.
+int w = im.Width();
+im.View(0,0, w,1).CopyFrom(im.View(0,1, w,1));
+
+// Doesn't care about source of data since using WImage
+void MySetRand(WImage_b* im) { // Works with any number of channels
+for (int r = 0; r < im->Height(); ++r) {
+ float* row = im->Row(r);
+ for (int c = 0; c < im->Width(); ++c) {
+    for (int ch = 0; ch < im->Channels(); ++ch, ++row) {
+      *row = uchar(rand() & 255);
+    }
+ }
+}
+}
+@endcode
+
+Functions that are not part of the basic image allocation, viewing, and access should come from
+OpenCV, except some useful functions that are not part of OpenCV can be found in wimage_util.h
+*/
+template<typename T>
+class WImage
+{
+public:
+    typedef T BaseType;
+
+    // WImage is an abstract class with no other virtual methods so make the
+    // destructor virtual.
+    virtual ~WImage() = 0;
+
+    // Accessors
+    IplImage* Ipl() {return image_; }
+    const IplImage* Ipl() const {return image_; }
+    T* ImageData() { return reinterpret_cast<T*>(image_->imageData); }
+    const T* ImageData() const {
+        return reinterpret_cast<const T*>(image_->imageData);
+    }
+
+    int Width() const {return image_->width; }
+    int Height() const {return image_->height; }
+
+    // WidthStep is the number of bytes to go to the pixel with the next y coord
+    int WidthStep() const {return image_->widthStep; }
+
+    int Channels() const {return image_->nChannels; }
+    int ChannelSize() const {return sizeof(T); }  // number of bytes per channel
+
+    // Number of bytes per pixel
+    int PixelSize() const {return Channels() * ChannelSize(); }
+
+    // Return depth type (e.g. IPL_DEPTH_8U, IPL_DEPTH_32F) which is the number
+    // of bits per channel and with the signed bit set.
+    // This is known at compile time using specializations.
+    int Depth() const;
+
+    inline const T* Row(int r) const {
+        return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep);
+    }
+
+    inline T* Row(int r) {
+        return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep);
+    }
+
+    // Pixel accessors which returns a pointer to the start of the channel
+    inline T* operator() (int c, int r)  {
+        return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep) +
+            c*Channels();
+    }
+
+    inline const T* operator() (int c, int r) const  {
+        return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep) +
+            c*Channels();
+    }
+
+    // Copy the contents from another image which is just a convenience to cvCopy
+    void CopyFrom(const WImage<T>& src) { cvCopy(src.Ipl(), image_); }
+
+    // Set contents to zero which is just a convenient to cvSetZero
+    void SetZero() { cvSetZero(image_); }
+
+    // Construct a view into a region of this image
+    WImageView<T> View(int c, int r, int width, int height);
+
+protected:
+    // Disallow copy and assignment
+    WImage(const WImage&);
+    void operator=(const WImage&);
+
+    explicit WImage(IplImage* img) : image_(img) {
+        assert(!img || img->depth == Depth());
+    }
+
+    void SetIpl(IplImage* image) {
+        assert(!image || image->depth == Depth());
+        image_ = image;
+    }
+
+    IplImage* image_;
+};
+
+
+/** Image class when both the pixel type and number of channels
+are known at compile time.  This wrapper will speed up some of the operations
+like accessing individual pixels using the () operator.
+*/
+template<typename T, int C>
+class WImageC : public WImage<T>
+{
+public:
+    typedef typename WImage<T>::BaseType BaseType;
+    enum { kChannels = C };
+
+    explicit WImageC(IplImage* img) : WImage<T>(img) {
+        assert(!img || img->nChannels == Channels());
+    }
+
+    // Construct a view into a region of this image
+    WImageViewC<T, C> View(int c, int r, int width, int height);
+
+    // Copy the contents from another image which is just a convenience to cvCopy
+    void CopyFrom(const WImageC<T, C>& src) {
+        cvCopy(src.Ipl(), WImage<T>::image_);
+    }
+
+    // WImageC is an abstract class with no other virtual methods so make the
+    // destructor virtual.
+    virtual ~WImageC() = 0;
+
+    int Channels() const {return C; }
+
+protected:
+    // Disallow copy and assignment
+    WImageC(const WImageC&);
+    void operator=(const WImageC&);
+
+    void SetIpl(IplImage* image) {
+        assert(!image || image->depth == WImage<T>::Depth());
+        WImage<T>::SetIpl(image);
+    }
+};
+
+/** Image class which owns the data, so it can be allocated and is always
+freed.  It cannot be copied but can be explicity cloned.
+*/
+template<typename T>
+class WImageBuffer : public WImage<T>
+{
+public:
+    typedef typename WImage<T>::BaseType BaseType;
+
+    // Default constructor which creates an object that can be
+    WImageBuffer() : WImage<T>(0) {}
+
+    WImageBuffer(int width, int height, int nchannels) : WImage<T>(0) {
+        Allocate(width, height, nchannels);
+    }
+
+    // Constructor which takes ownership of a given IplImage so releases
+    // the image on destruction.
+    explicit WImageBuffer(IplImage* img) : WImage<T>(img) {}
+
+    // Allocate an image.  Does nothing if current size is the same as
+    // the new size.
+    void Allocate(int width, int height, int nchannels);
+
+    // Set the data to point to an image, releasing the old data
+    void SetIpl(IplImage* img) {
+        ReleaseImage();
+        WImage<T>::SetIpl(img);
+    }
+
+    // Clone an image which reallocates the image if of a different dimension.
+    void CloneFrom(const WImage<T>& src) {
+        Allocate(src.Width(), src.Height(), src.Channels());
+        CopyFrom(src);
+    }
+
+    ~WImageBuffer() {
+        ReleaseImage();
+    }
+
+    // Release the image if it isn't null.
+    void ReleaseImage() {
+        if (WImage<T>::image_) {
+            IplImage* image = WImage<T>::image_;
+            cvReleaseImage(&image);
+            WImage<T>::SetIpl(0);
+        }
+    }
+
+    bool IsNull() const {return WImage<T>::image_ == NULL; }
+
+private:
+    // Disallow copy and assignment
+    WImageBuffer(const WImageBuffer&);
+    void operator=(const WImageBuffer&);
+};
+
+/** Like a WImageBuffer class but when the number of channels is known at compile time.
+*/
+template<typename T, int C>
+class WImageBufferC : public WImageC<T, C>
+{
+public:
+    typedef typename WImage<T>::BaseType BaseType;
+    enum { kChannels = C };
+
+    // Default constructor which creates an object that can be
+    WImageBufferC() : WImageC<T, C>(0) {}
+
+    WImageBufferC(int width, int height) : WImageC<T, C>(0) {
+        Allocate(width, height);
+    }
+
+    // Constructor which takes ownership of a given IplImage so releases
+    // the image on destruction.
+    explicit WImageBufferC(IplImage* img) : WImageC<T, C>(img) {}
+
+    // Allocate an image.  Does nothing if current size is the same as
+    // the new size.
+    void Allocate(int width, int height);
+
+    // Set the data to point to an image, releasing the old data
+    void SetIpl(IplImage* img) {
+        ReleaseImage();
+        WImageC<T, C>::SetIpl(img);
+    }
+
+    // Clone an image which reallocates the image if of a different dimension.
+    void CloneFrom(const WImageC<T, C>& src) {
+        Allocate(src.Width(), src.Height());
+        CopyFrom(src);
+    }
+
+    ~WImageBufferC() {
+        ReleaseImage();
+    }
+
+    // Release the image if it isn't null.
+    void ReleaseImage() {
+        if (WImage<T>::image_) {
+            IplImage* image = WImage<T>::image_;
+            cvReleaseImage(&image);
+            WImageC<T, C>::SetIpl(0);
+        }
+    }
+
+    bool IsNull() const {return WImage<T>::image_ == NULL; }
+
+private:
+    // Disallow copy and assignment
+    WImageBufferC(const WImageBufferC&);
+    void operator=(const WImageBufferC&);
+};
+
+/** View into an image class which allows treating a subimage as an image or treating external data
+as an image
+*/
+template<typename T> class WImageView : public WImage<T>
+{
+public:
+    typedef typename WImage<T>::BaseType BaseType;
+
+    // Construct a subimage.  No checks are done that the subimage lies
+    // completely inside the original image.
+    WImageView(WImage<T>* img, int c, int r, int width, int height);
+
+    // Refer to external data.
+    // If not given width_step assumed to be same as width.
+    WImageView(T* data, int width, int height, int channels, int width_step = -1);
+
+    // Refer to external data.  This does NOT take ownership
+    // of the supplied IplImage.
+    WImageView(IplImage* img) : WImage<T>(img) {}
+
+    // Copy constructor
+    WImageView(const WImage<T>& img) : WImage<T>(0) {
+        header_ = *(img.Ipl());
+        WImage<T>::SetIpl(&header_);
+    }
+
+    WImageView& operator=(const WImage<T>& img) {
+        header_ = *(img.Ipl());
+        WImage<T>::SetIpl(&header_);
+        return *this;
+    }
+
+protected:
+    IplImage header_;
+};
+
+
+template<typename T, int C>
+class WImageViewC : public WImageC<T, C>
+{
+public:
+    typedef typename WImage<T>::BaseType BaseType;
+    enum { kChannels = C };
+
+    // Default constructor needed for vectors of views.
+    WImageViewC();
+
+    virtual ~WImageViewC() {}
+
+    // Construct a subimage.  No checks are done that the subimage lies
+    // completely inside the original image.
+    WImageViewC(WImageC<T, C>* img,
+        int c, int r, int width, int height);
+
+    // Refer to external data
+    WImageViewC(T* data, int width, int height, int width_step = -1);
+
+    // Refer to external data.  This does NOT take ownership
+    // of the supplied IplImage.
+    WImageViewC(IplImage* img) : WImageC<T, C>(img) {}
+
+    // Copy constructor which does a shallow copy to allow multiple views
+    // of same data.  gcc-4.1.1 gets confused if both versions of
+    // the constructor and assignment operator are not provided.
+    WImageViewC(const WImageC<T, C>& img) : WImageC<T, C>(0) {
+        header_ = *(img.Ipl());
+        WImageC<T, C>::SetIpl(&header_);
+    }
+    WImageViewC(const WImageViewC<T, C>& img) : WImageC<T, C>(0) {
+        header_ = *(img.Ipl());
+        WImageC<T, C>::SetIpl(&header_);
+    }
+
+    WImageViewC& operator=(const WImageC<T, C>& img) {
+        header_ = *(img.Ipl());
+        WImageC<T, C>::SetIpl(&header_);
+        return *this;
+    }
+    WImageViewC& operator=(const WImageViewC<T, C>& img) {
+        header_ = *(img.Ipl());
+        WImageC<T, C>::SetIpl(&header_);
+        return *this;
+    }
+
+protected:
+    IplImage header_;
+};
+
+
+// Specializations for depth
+template<>
+inline int WImage<uchar>::Depth() const {return IPL_DEPTH_8U; }
+template<>
+inline int WImage<signed char>::Depth() const {return IPL_DEPTH_8S; }
+template<>
+inline int WImage<short>::Depth() const {return IPL_DEPTH_16S; }
+template<>
+inline int WImage<ushort>::Depth() const {return IPL_DEPTH_16U; }
+template<>
+inline int WImage<int>::Depth() const {return IPL_DEPTH_32S; }
+template<>
+inline int WImage<float>::Depth() const {return IPL_DEPTH_32F; }
+template<>
+inline int WImage<double>::Depth() const {return IPL_DEPTH_64F; }
+
+template<typename T> inline WImage<T>::~WImage() {}
+template<typename T, int C> inline WImageC<T, C>::~WImageC() {}
+
+template<typename T>
+inline void WImageBuffer<T>::Allocate(int width, int height, int nchannels)
+{
+    if (IsNull() || WImage<T>::Width() != width ||
+        WImage<T>::Height() != height || WImage<T>::Channels() != nchannels) {
+        ReleaseImage();
+        WImage<T>::image_ = cvCreateImage(cvSize(width, height),
+            WImage<T>::Depth(), nchannels);
+    }
+}
+
+template<typename T, int C>
+inline void WImageBufferC<T, C>::Allocate(int width, int height)
+{
+    if (IsNull() || WImage<T>::Width() != width || WImage<T>::Height() != height) {
+        ReleaseImage();
+        WImageC<T, C>::SetIpl(cvCreateImage(cvSize(width, height),WImage<T>::Depth(), C));
+    }
+}
+
+template<typename T>
+WImageView<T>::WImageView(WImage<T>* img, int c, int r, int width, int height)
+        : WImage<T>(0)
+{
+    header_ = *(img->Ipl());
+    header_.imageData = reinterpret_cast<char*>((*img)(c, r));
+    header_.width = width;
+    header_.height = height;
+    WImage<T>::SetIpl(&header_);
+}
+
+template<typename T>
+WImageView<T>::WImageView(T* data, int width, int height, int nchannels, int width_step)
+          : WImage<T>(0)
+{
+    cvInitImageHeader(&header_, cvSize(width, height), WImage<T>::Depth(), nchannels);
+    header_.imageData = reinterpret_cast<char*>(data);
+    if (width_step > 0) {
+        header_.widthStep = width_step;
+    }
+    WImage<T>::SetIpl(&header_);
+}
+
+template<typename T, int C>
+WImageViewC<T, C>::WImageViewC(WImageC<T, C>* img, int c, int r, int width, int height)
+        : WImageC<T, C>(0)
+{
+    header_ = *(img->Ipl());
+    header_.imageData = reinterpret_cast<char*>((*img)(c, r));
+    header_.width = width;
+    header_.height = height;
+    WImageC<T, C>::SetIpl(&header_);
+}
+
+template<typename T, int C>
+WImageViewC<T, C>::WImageViewC() : WImageC<T, C>(0) {
+    cvInitImageHeader(&header_, cvSize(0, 0), WImage<T>::Depth(), C);
+    header_.imageData = reinterpret_cast<char*>(0);
+    WImageC<T, C>::SetIpl(&header_);
+}
+
+template<typename T, int C>
+WImageViewC<T, C>::WImageViewC(T* data, int width, int height, int width_step)
+    : WImageC<T, C>(0)
+{
+    cvInitImageHeader(&header_, cvSize(width, height), WImage<T>::Depth(), C);
+    header_.imageData = reinterpret_cast<char*>(data);
+    if (width_step > 0) {
+        header_.widthStep = width_step;
+    }
+    WImageC<T, C>::SetIpl(&header_);
+}
+
+// Construct a view into a region of an image
+template<typename T>
+WImageView<T> WImage<T>::View(int c, int r, int width, int height) {
+    return WImageView<T>(this, c, r, width, height);
+}
+
+template<typename T, int C>
+WImageViewC<T, C> WImageC<T, C>::View(int c, int r, int width, int height) {
+    return WImageViewC<T, C>(this, c, r, width, height);
+}
+
+//! @} core
+
+}  // end of namespace
+
+#endif // __cplusplus
+
+#endif