diff options
author | Tom Rondeau | 2012-02-02 17:26:39 -0500 |
---|---|---|
committer | Tom Rondeau | 2012-02-13 14:56:34 -0500 |
commit | ae663decab658be25ac01072fa2f5c8454bd6167 (patch) | |
tree | c9bf58b3a8624764ed92f5c4bb9a8df6d54dc07b /volk | |
parent | a3b19015cb1c896aef19a7817458878337b3f5e3 (diff) | |
download | gnuradio-ae663decab658be25ac01072fa2f5c8454bd6167.tar.gz gnuradio-ae663decab658be25ac01072fa2f5c8454bd6167.tar.bz2 gnuradio-ae663decab658be25ac01072fa2f5c8454bd6167.zip |
core: moving multiply_const_ff from gengen to general to take advantage of volk.
Also adds SSE and AVX and unaligned Volk versions for this.
Diffstat (limited to 'volk')
-rw-r--r-- | volk/apps/volk_profile.cc | 1 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_multiply_32f_a.h | 75 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h | 12 |
3 files changed, 82 insertions, 6 deletions
diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc index c198ec42d..7da8651e9 100644 --- a/volk/apps/volk_profile.cc +++ b/volk/apps/volk_profile.cc @@ -110,6 +110,7 @@ int main(int argc, char *argv[]) { VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_u, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32f_s32f_multiply_32f_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32f_s32f_multiply_32f_u, 1e-4, 0, 204600, 1000, &results); char path[256]; get_config_path(path); diff --git a/volk/include/volk/volk_32f_s32f_multiply_32f_a.h b/volk/include/volk/volk_32f_s32f_multiply_32f_a.h index 37223dc81..d1c6f3f65 100644 --- a/volk/include/volk/volk_32f_s32f_multiply_32f_a.h +++ b/volk/include/volk/volk_32f_s32f_multiply_32f_a.h @@ -4,6 +4,81 @@ #include <inttypes.h> #include <stdio.h> +#ifdef LV_HAVE_SSE +#include <xmmintrin.h> +/*! + \brief Scalar float multiply + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param scalar the scalar value + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + float* cPtr = cVector; + const float* aPtr = aVector; + + __m128 aVal, bVal, cVal; + bVal = _mm_set_ps1(scalar); + for(;number < quarterPoints; number++){ + + aVal = _mm_load_ps(aPtr); + + cVal = _mm_mul_ps(aVal, bVal); + + _mm_store_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 4; + cPtr += 4; + } + + number = quarterPoints * 4; + for(;number < num_points; number++){ + *cPtr++ = (*aPtr++) * scalar; + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_AVX +#include <immintrin.h> +/*! + \brief Scalar float multiply + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param scalar the scalar value + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ + unsigned int number = 0; + const unsigned int eighthPoints = num_points / 8; + + float* cPtr = cVector; + const float* aPtr = aVector; + + __m256 aVal, bVal, cVal; + bVal = _mm256_set1_ps(scalar); + for(;number < eighthPoints; number++){ + + aVal = _mm256_load_ps(aPtr); + + cVal = _mm256_mul_ps(aVal, bVal); + + _mm256_store_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 8; + cPtr += 8; + } + + number = eighthPoints * 8; + for(;number < num_points; number++){ + *cPtr++ = (*aPtr++) * scalar; + } +} +#endif /* LV_HAVE_AVX */ + + #ifdef LV_HAVE_GENERIC /*! \brief Scalar float multiply diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h index a9dfcda19..450a89066 100644 --- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h +++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h @@ -9,10 +9,10 @@ #ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! - \brief Multiplies the two input complex vectors and stores their results in the third vector + \brief Multiplies the input vector by a scalar and stores the results in the third vector \param cVector The vector where the results will be stored - \param aVector One of the vectors to be multiplied - \param bVector One of the vectors to be multiplied + \param aVector The vector to be multiplied + \param scalar The complex scalar to multiply aVector \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ @@ -53,10 +53,10 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, cons #ifdef LV_HAVE_GENERIC /*! - \brief Multiplies the two input complex vectors and stores their results in the third vector + \brief Multiplies the input vector by a scalar and stores the results in the third vector \param cVector The vector where the results will be stored - \param aVector One of the vectors to be multiplied - \param bVector One of the vectors to be multiplied + \param aVector The vector to be multiplied + \param scalar The complex scalar to multiply aVector \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ static inline void volk_32fc_s32fc_multiply_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ |