diff options
Diffstat (limited to 'include/gal/opengl/glm/gtx/simd_vec4.inl')
-rw-r--r-- | include/gal/opengl/glm/gtx/simd_vec4.inl | 727 |
1 files changed, 727 insertions, 0 deletions
diff --git a/include/gal/opengl/glm/gtx/simd_vec4.inl b/include/gal/opengl/glm/gtx/simd_vec4.inl new file mode 100644 index 0000000..0b22115 --- /dev/null +++ b/include/gal/opengl/glm/gtx/simd_vec4.inl @@ -0,0 +1,727 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////// +// OpenGL Mathematics Copyright (c) 2005 - 2013 G-Truc Creation (www.g-truc.net) +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Created : 2009-05-07 +// Updated : 2009-05-07 +// Licence : This source is under MIT License +// File : glm/gtx/simd_vec4.inl +/////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace glm{ +namespace detail{ + +template <int Value> +struct mask +{ + enum{value = Value}; +}; + +////////////////////////////////////// +// Implicit basic constructors + +GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD() +#ifdef GLM_SIMD_ENABLE_DEFAULT_INIT + : Data(_mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f)) +#endif +{} + +GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(__m128 const & Data) : + Data(Data) +{} + +GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(fvec4SIMD const & v) : + Data(v.Data) +{} + +GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(tvec4<float> const & v) : + Data(_mm_set_ps(v.w, v.z, v.y, v.x)) +{} + +////////////////////////////////////// +// Explicit basic constructors + +GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s) : + Data(_mm_set1_ps(s)) +{} + +GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & x, float const & y, float const & z, float const & w) : +// Data(_mm_setr_ps(x, y, z, w)) + Data(_mm_set_ps(w, z, y, x)) +{} +/* +GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const v[4]) : + Data(_mm_load_ps(v)) +{} +*/ +////////////////////////////////////// +// Swizzle constructors + +//fvec4SIMD(ref4<float> const & r); + +////////////////////////////////////// +// Convertion vector constructors + +GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v, float const & s1, float const & s2) : + Data(_mm_set_ps(s2, s1, v.y, v.x)) +{} + +GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, vec2 const & v, float const & s2) : + Data(_mm_set_ps(s2, v.y, v.x, s1)) +{} + +GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, float const & s2, vec2 const & v) : + Data(_mm_set_ps(v.y, v.x, s2, s1)) +{} + +GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec3 const & v, float const & s) : + Data(_mm_set_ps(s, v.z, v.y, v.x)) +{} + +GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s, vec3 const & v) : + Data(_mm_set_ps(v.z, v.y, v.x, s)) +{} + +GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v1, vec2 const & v2) : + Data(_mm_set_ps(v2.y, v2.x, v1.y, v1.x)) +{} + +//GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(ivec4SIMD const & v) : +// Data(_mm_cvtepi32_ps(v.Data)) +//{} + +////////////////////////////////////// +// Unary arithmetic operators + +GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator=(fvec4SIMD const & v) +{ + this->Data = v.Data; + return *this; +} + +GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(float const & s) +{ + this->Data = _mm_add_ps(Data, _mm_set_ps1(s)); + return *this; +} + +GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(fvec4SIMD const & v) +{ + this->Data = _mm_add_ps(this->Data , v.Data); + return *this; +} + +GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(float const & s) +{ + this->Data = _mm_sub_ps(Data, _mm_set_ps1(s)); + return *this; +} + +GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(fvec4SIMD const & v) +{ + this->Data = _mm_sub_ps(this->Data , v.Data); + return *this; +} + +GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(float const & s) +{ + this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s)); + return *this; +} + +GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(fvec4SIMD const & v) +{ + this->Data = _mm_mul_ps(this->Data , v.Data); + return *this; +} + +GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(float const & s) +{ + this->Data = _mm_div_ps(Data, _mm_set1_ps(s)); + return *this; +} + +GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(fvec4SIMD const & v) +{ + this->Data = _mm_div_ps(this->Data , v.Data); + return *this; +} + +GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator++() +{ + this->Data = _mm_add_ps(this->Data , glm::detail::one); + return *this; +} + +GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator--() +{ + this->Data = _mm_sub_ps(this->Data, glm::detail::one); + return *this; +} + +////////////////////////////////////// +// Swizzle operators + +template <comp X, comp Y, comp Z, comp W> +GLM_FUNC_QUALIFIER fvec4SIMD fvec4SIMD::swizzle() const +{ + __m128 Data = _mm_shuffle_ps( + this->Data, this->Data, + mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value); + return fvec4SIMD(Data); +} + +template <comp X, comp Y, comp Z, comp W> +GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::swizzle() +{ + this->Data = _mm_shuffle_ps( + this->Data, this->Data, + mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value); + return *this; +} + +// operator+ +GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v, float s) +{ + return fvec4SIMD(_mm_add_ps(v.Data, _mm_set1_ps(s))); +} + +GLM_FUNC_QUALIFIER fvec4SIMD operator+ (float s, fvec4SIMD const & v) +{ + return fvec4SIMD(_mm_add_ps(_mm_set1_ps(s), v.Data)); +} + +GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v1, fvec4SIMD const & v2) +{ + return fvec4SIMD(_mm_add_ps(v1.Data, v2.Data)); +} + +//operator- +GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v, float s) +{ + return fvec4SIMD(_mm_sub_ps(v.Data, _mm_set1_ps(s))); +} + +GLM_FUNC_QUALIFIER fvec4SIMD operator- (float s, fvec4SIMD const & v) +{ + return fvec4SIMD(_mm_sub_ps(_mm_set1_ps(s), v.Data)); +} + +GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v1, fvec4SIMD const & v2) +{ + return fvec4SIMD(_mm_sub_ps(v1.Data, v2.Data)); +} + +//operator* +GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, float s) +{ + __m128 par0 = v.Data; + __m128 par1 = _mm_set1_ps(s); + return fvec4SIMD(_mm_mul_ps(par0, par1)); +} + +GLM_FUNC_QUALIFIER fvec4SIMD operator* (float s, fvec4SIMD const & v) +{ + __m128 par0 = _mm_set1_ps(s); + __m128 par1 = v.Data; + return fvec4SIMD(_mm_mul_ps(par0, par1)); +} + +GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v1, fvec4SIMD const & v2) +{ + return fvec4SIMD(_mm_mul_ps(v1.Data, v2.Data)); +} + +//operator/ +GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v, float s) +{ + __m128 par0 = v.Data; + __m128 par1 = _mm_set1_ps(s); + return fvec4SIMD(_mm_div_ps(par0, par1)); +} + +GLM_FUNC_QUALIFIER fvec4SIMD operator/ (float s, fvec4SIMD const & v) +{ + __m128 par0 = _mm_set1_ps(s); + __m128 par1 = v.Data; + return fvec4SIMD(_mm_div_ps(par0, par1)); +} + +GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v1, fvec4SIMD const & v2) +{ + return fvec4SIMD(_mm_div_ps(v1.Data, v2.Data)); +} + +// Unary constant operators +GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v) +{ + return fvec4SIMD(_mm_sub_ps(_mm_setzero_ps(), v.Data)); +} + +GLM_FUNC_QUALIFIER fvec4SIMD operator++ (fvec4SIMD const & v, int) +{ + return fvec4SIMD(_mm_add_ps(v.Data, glm::detail::one)); +} + +GLM_FUNC_QUALIFIER fvec4SIMD operator-- (fvec4SIMD const & v, int) +{ + return fvec4SIMD(_mm_sub_ps(v.Data, glm::detail::one)); +} + +}//namespace detail + +GLM_FUNC_QUALIFIER detail::tvec4<float> vec4_cast +( + detail::fvec4SIMD const & x +) +{ + GLM_ALIGN(16) detail::tvec4<float> Result; + _mm_store_ps(&Result[0], x.Data); + return Result; +} + +// Other possible implementation +//float abs(float a) +//{ +// return max(-a, a); +//} +GLM_FUNC_QUALIFIER detail::fvec4SIMD abs +( + detail::fvec4SIMD const & x +) +{ + return detail::sse_abs_ps(x.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD sign +( + detail::fvec4SIMD const & x +) +{ + return detail::sse_sgn_ps(x.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD floor +( + detail::fvec4SIMD const & x +) +{ + return detail::sse_flr_ps(x.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD trunc +( + detail::fvec4SIMD const & x +) +{ + //return x < 0 ? -floor(-x) : floor(x); + + __m128 Flr0 = detail::sse_flr_ps(_mm_sub_ps(_mm_setzero_ps(), x.Data)); + __m128 Sub0 = _mm_sub_ps(Flr0, x.Data); + __m128 Flr1 = detail::sse_flr_ps(x.Data); + + __m128 Cmp0 = _mm_cmplt_ps(x.Data, glm::detail::zero); + __m128 Cmp1 = _mm_cmpnlt_ps(x.Data, glm::detail::zero); + + __m128 And0 = _mm_and_ps(Sub0, Cmp0); + __m128 And1 = _mm_and_ps(Flr1, Cmp1); + + return _mm_or_ps(And0, And1); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD round +( + detail::fvec4SIMD const & x +) +{ + return detail::sse_rnd_ps(x.Data); +} + +//GLM_FUNC_QUALIFIER detail::fvec4SIMD roundEven +//( +// detail::fvec4SIMD const & x +//) +//{ + +//} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD ceil +( + detail::fvec4SIMD const & x +) +{ + return detail::sse_ceil_ps(x.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD fract +( + detail::fvec4SIMD const & x +) +{ + return detail::sse_frc_ps(x.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD mod +( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & y +) +{ + return detail::sse_mod_ps(x.Data, y.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD mod +( + detail::fvec4SIMD const & x, + float const & y +) +{ + return detail::sse_mod_ps(x.Data, _mm_set1_ps(y)); +} + +//GLM_FUNC_QUALIFIER detail::fvec4SIMD modf +//( +// detail::fvec4SIMD const & x, +// detail::fvec4SIMD & i +//) +//{ + +//} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD min +( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & y +) +{ + return _mm_min_ps(x.Data, y.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD min +( + detail::fvec4SIMD const & x, + float const & y +) +{ + return _mm_min_ps(x.Data, _mm_set1_ps(y)); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD max +( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & y +) +{ + return _mm_max_ps(x.Data, y.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD max +( + detail::fvec4SIMD const & x, + float const & y +) +{ + return _mm_max_ps(x.Data, _mm_set1_ps(y)); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp +( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & minVal, + detail::fvec4SIMD const & maxVal +) +{ + return detail::sse_clp_ps(x.Data, minVal.Data, maxVal.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp +( + detail::fvec4SIMD const & x, + float const & minVal, + float const & maxVal +) +{ + return detail::sse_clp_ps(x.Data, _mm_set1_ps(minVal), _mm_set1_ps(maxVal)); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD mix +( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & y, + detail::fvec4SIMD const & a +) +{ + __m128 Sub0 = _mm_sub_ps(y.Data, x.Data); + __m128 Mul0 = _mm_mul_ps(a.Data, Sub0); + return _mm_add_ps(x.Data, Mul0); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD step +( + detail::fvec4SIMD const & edge, + detail::fvec4SIMD const & x +) +{ + __m128 cmp0 = _mm_cmpngt_ps(x.Data, edge.Data); + return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD step +( + float const & edge, + detail::fvec4SIMD const & x +) +{ + __m128 cmp0 = _mm_cmpngt_ps(x.Data, _mm_set1_ps(edge)); + return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep +( + detail::fvec4SIMD const & edge0, + detail::fvec4SIMD const & edge1, + detail::fvec4SIMD const & x +) +{ + return detail::sse_ssp_ps(edge0.Data, edge1.Data, x.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep +( + float const & edge0, + float const & edge1, + detail::fvec4SIMD const & x +) +{ + return detail::sse_ssp_ps(_mm_set1_ps(edge0), _mm_set1_ps(edge1), x.Data); +} + +//GLM_FUNC_QUALIFIER bvec4 isnan(detail::fvec4SIMD const & x) +//{ + +//} + +//GLM_FUNC_QUALIFIER bvec4 isinf(detail::fvec4SIMD const & x) +//{ + +//} + +//GLM_FUNC_QUALIFIER detail::ivec4SIMD floatBitsToInt +//( +// detail::fvec4SIMD const & value +//) +//{ + +//} + +//GLM_FUNC_QUALIFIER detail::fvec4SIMD intBitsToFloat +//( +// detail::ivec4SIMD const & value +//) +//{ + +//} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD fma +( + detail::fvec4SIMD const & a, + detail::fvec4SIMD const & b, + detail::fvec4SIMD const & c +) +{ + return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data); +} + +GLM_FUNC_QUALIFIER float length +( + detail::fvec4SIMD const & x +) +{ + detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); + detail::fvec4SIMD sqt0 = sqrt(dot0); + float Result = 0; + _mm_store_ss(&Result, sqt0.Data); + return Result; +} + +GLM_FUNC_QUALIFIER float fastLength +( + detail::fvec4SIMD const & x +) +{ + detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); + detail::fvec4SIMD sqt0 = fastSqrt(dot0); + float Result = 0; + _mm_store_ss(&Result, sqt0.Data); + return Result; +} + +GLM_FUNC_QUALIFIER float niceLength +( + detail::fvec4SIMD const & x +) +{ + detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); + detail::fvec4SIMD sqt0 = niceSqrt(dot0); + float Result = 0; + _mm_store_ss(&Result, sqt0.Data); + return Result; +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD length4 +( + detail::fvec4SIMD const & x +) +{ + return sqrt(dot4(x, x)); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD fastLength4 +( + detail::fvec4SIMD const & x +) +{ + return fastSqrt(dot4(x, x)); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD niceLength4 +( + detail::fvec4SIMD const & x +) +{ + return niceSqrt(dot4(x, x)); +} + +GLM_FUNC_QUALIFIER float distance +( + detail::fvec4SIMD const & p0, + detail::fvec4SIMD const & p1 +) +{ + float Result = 0; + _mm_store_ss(&Result, detail::sse_dst_ps(p0.Data, p1.Data)); + return Result; +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD distance4 +( + detail::fvec4SIMD const & p0, + detail::fvec4SIMD const & p1 +) +{ + return detail::sse_dst_ps(p0.Data, p1.Data); +} + +GLM_FUNC_QUALIFIER float dot +( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & y +) +{ + float Result = 0; + _mm_store_ss(&Result, detail::sse_dot_ss(x.Data, y.Data)); + return Result; +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD dot4 +( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & y +) +{ + return detail::sse_dot_ps(x.Data, y.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD cross +( + detail::fvec4SIMD const & x, + detail::fvec4SIMD const & y +) +{ + return detail::sse_xpd_ps(x.Data, y.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD normalize +( + detail::fvec4SIMD const & x +) +{ + __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); + __m128 isr0 = inversesqrt(detail::fvec4SIMD(dot0)).Data; + __m128 mul0 = _mm_mul_ps(x.Data, isr0); + return mul0; +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD fastNormalize +( + detail::fvec4SIMD const & x +) +{ + __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); + __m128 isr0 = fastInversesqrt(dot0).Data; + __m128 mul0 = _mm_mul_ps(x.Data, isr0); + return mul0; +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD faceforward +( + detail::fvec4SIMD const & N, + detail::fvec4SIMD const & I, + detail::fvec4SIMD const & Nref +) +{ + return detail::sse_ffd_ps(N.Data, I.Data, Nref.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD reflect +( + detail::fvec4SIMD const & I, + detail::fvec4SIMD const & N +) +{ + return detail::sse_rfe_ps(I.Data, N.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD refract +( + detail::fvec4SIMD const & I, + detail::fvec4SIMD const & N, + float const & eta +) +{ + return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta)); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD sqrt(detail::fvec4SIMD const & x) +{ + return _mm_mul_ps(inversesqrt(x).Data, x.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD niceSqrt(detail::fvec4SIMD const & x) +{ + return _mm_sqrt_ps(x.Data); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD fastSqrt(detail::fvec4SIMD const & x) +{ + return _mm_mul_ps(fastInversesqrt(x.Data).Data, x.Data); +} + +// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration +// By Elan Ruskin, http://assemblyrequired.crashworks.org/ +GLM_FUNC_QUALIFIER detail::fvec4SIMD inversesqrt(detail::fvec4SIMD const & x) +{ + GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load + GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5}; + + __m128 recip = _mm_rsqrt_ps(x.Data); // "estimate" opcode + __m128 halfrecip = _mm_mul_ps(half, recip); + __m128 threeminus_xrr = _mm_sub_ps(three, _mm_mul_ps(x.Data, _mm_mul_ps(recip, recip))); + return _mm_mul_ps(halfrecip, threeminus_xrr); +} + +GLM_FUNC_QUALIFIER detail::fvec4SIMD fastInversesqrt(detail::fvec4SIMD const & x) +{ + return _mm_rsqrt_ps(x.Data); +} + +}//namespace glm |