summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Rondeau2012-01-26 20:07:51 -0500
committerTom Rondeau2012-01-26 20:07:51 -0500
commit42d9560a50bbbab143b48bda5a73a5379818ddbe (patch)
tree79e5df8ec0041073a07d8675d8d0a6cd0cb2b003
parentd8b02979cef097971bc0656b904f7b51d19b03c9 (diff)
downloadgnuradio-42d9560a50bbbab143b48bda5a73a5379818ddbe.tar.gz
gnuradio-42d9560a50bbbab143b48bda5a73a5379818ddbe.tar.bz2
gnuradio-42d9560a50bbbab143b48bda5a73a5379818ddbe.zip
volk: float_to_int and float_to_char updated to clip instead of wrap around. The float to int clips at smaller than 2^32 because of the limits of the float representation.
-rw-r--r--volk/include/volk/volk_32f_s32f_convert_32i_a.h60
-rw-r--r--volk/include/volk/volk_32f_s32f_convert_32i_u.h45
-rw-r--r--volk/include/volk/volk_32f_s32f_convert_8i_a.h53
-rw-r--r--volk/include/volk/volk_32f_s32f_convert_8i_u.h53
4 files changed, 183 insertions, 28 deletions
diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_a.h b/volk/include/volk/volk_32f_s32f_convert_32i_a.h
index aa370e614..15fa282fb 100644
--- a/volk/include/volk/volk_32f_s32f_convert_32i_a.h
+++ b/volk/include/volk/volk_32f_s32f_convert_32i_a.h
@@ -21,14 +21,22 @@ static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const
const float* inputVectorPtr = (const float*)inputVector;
int32_t* outputVectorPtr = outputVector;
+
+ float min_val = -2147483647;
+ float max_val = 2147483647;
+ float r;
+
__m256 vScalar = _mm256_set1_ps(scalar);
__m256 inputVal1;
__m256i intInputVal1;
+ __m256 vmin_val = _mm256_set1_ps(min_val);
+ __m256 vmax_val = _mm256_set1_ps(max_val);
for(;number < eighthPoints; number++){
inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
- intInputVal1 = _mm256_cvtps_epi32(_mm256_mul_ps(inputVal1, vScalar));
+ inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+ intInputVal1 = _mm256_cvtps_epi32(inputVal1);
_mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
outputVectorPtr += 8;
@@ -36,7 +44,12 @@ static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const
number = eighthPoints * 8;
for(; number < num_points; number++){
- outputVector[number] = (int32_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int32_t)(r);
}
}
#endif /* LV_HAVE_AVX */
@@ -57,14 +70,22 @@ static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const
const float* inputVectorPtr = (const float*)inputVector;
int32_t* outputVectorPtr = outputVector;
+
+ float min_val = -2147483647;
+ float max_val = 2147483647;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 inputVal1;
__m128i intInputVal1;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
for(;number < quarterPoints; number++){
inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
- intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
+ inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+ intInputVal1 = _mm_cvtps_epi32(inputVal1);
_mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
outputVectorPtr += 4;
@@ -72,7 +93,12 @@ static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const
number = quarterPoints * 4;
for(; number < num_points; number++){
- outputVector[number] = (int32_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int32_t)(r);
}
}
#endif /* LV_HAVE_SSE2 */
@@ -93,8 +119,15 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const
const float* inputVectorPtr = (const float*)inputVector;
int32_t* outputVectorPtr = outputVector;
+
+ float min_val = -2147483647;
+ float max_val = 2147483647;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 ret;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
__VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
@@ -102,7 +135,7 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const
ret = _mm_load_ps(inputVectorPtr);
inputVectorPtr += 4;
- ret = _mm_mul_ps(ret, vScalar);
+ ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
_mm_store_ps(outputFloatBuffer, ret);
*outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]);
@@ -113,7 +146,12 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const
number = quarterPoints * 4;
for(; number < num_points; number++){
- outputVector[number] = (int32_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int32_t)(r);
}
}
#endif /* LV_HAVE_SSE */
@@ -130,9 +168,17 @@ static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector, co
int32_t* outputVectorPtr = outputVector;
const float* inputVectorPtr = inputVector;
unsigned int number = 0;
+ float min_val = -2147483647;
+ float max_val = 2147483647;
+ float r;
for(number = 0; number < num_points; number++){
- *outputVectorPtr++ = ((int32_t)(*inputVectorPtr++ * scalar));
+ r = *inputVectorPtr++ * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ *outputVectorPtr++ = (int32_t)(r);
}
}
#endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_u.h b/volk/include/volk/volk_32f_s32f_convert_32i_u.h
index b4e954dc4..d8493454b 100644
--- a/volk/include/volk/volk_32f_s32f_convert_32i_u.h
+++ b/volk/include/volk/volk_32f_s32f_convert_32i_u.h
@@ -21,14 +21,24 @@ static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const
const float* inputVectorPtr = (const float*)inputVector;
int32_t* outputVectorPtr = outputVector;
+
+ //float min_val = -2147483647;
+ //float max_val = 2147483647;
+ float min_val = -2146400000;
+ float max_val = 2146400000;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 inputVal1;
__m128i intInputVal1;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
for(;number < quarterPoints; number++){
inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
- intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
+ inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+ intInputVal1 = _mm_cvtps_epi32(inputVal1);
_mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
outputVectorPtr += 4;
@@ -36,7 +46,12 @@ static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const
number = quarterPoints * 4;
for(; number < num_points; number++){
- outputVector[number] = (int32_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int32_t)(r);
}
}
#endif /* LV_HAVE_SSE2 */
@@ -58,8 +73,15 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const
const float* inputVectorPtr = (const float*)inputVector;
int32_t* outputVectorPtr = outputVector;
+
+ float min_val = -2147483647;
+ float max_val = 2147483647;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 ret;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
__VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
@@ -67,7 +89,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const
ret = _mm_loadu_ps(inputVectorPtr);
inputVectorPtr += 4;
- ret = _mm_mul_ps(ret, vScalar);
+ ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
_mm_store_ps(outputFloatBuffer, ret);
*outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]);
@@ -78,7 +100,12 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const
number = quarterPoints * 4;
for(; number < num_points; number++){
- outputVector[number] = (int32_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int32_t)(r);
}
}
#endif /* LV_HAVE_SSE */
@@ -96,9 +123,17 @@ static inline void volk_32f_s32f_convert_32i_u_generic(int32_t* outputVector, co
int32_t* outputVectorPtr = outputVector;
const float* inputVectorPtr = inputVector;
unsigned int number = 0;
+ float min_val = -2147483647;
+ float max_val = 2147483647;
+ float r;
for(number = 0; number < num_points; number++){
- *outputVectorPtr++ = ((int32_t)(*inputVectorPtr++ * scalar));
+ r = *inputVectorPtr++ * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ *outputVectorPtr++ = (int32_t)(r);
}
}
#endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_a.h b/volk/include/volk/volk_32f_s32f_convert_8i_a.h
index 8d87a07d7..05172171c 100644
--- a/volk/include/volk/volk_32f_s32f_convert_8i_a.h
+++ b/volk/include/volk/volk_32f_s32f_convert_8i_a.h
@@ -21,9 +21,16 @@ static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const f
const float* inputVectorPtr = (const float*)inputVector;
int8_t* outputVectorPtr = outputVector;
+
+ float min_val = -128;
+ float max_val = 127;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 inputVal1, inputVal2, inputVal3, inputVal4;
__m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
for(;number < sixteenthPoints; number++){
inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
@@ -31,10 +38,15 @@ static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const f
inputVal3 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
inputVal4 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
- intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
- intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar));
- intInputVal3 = _mm_cvtps_epi32(_mm_mul_ps(inputVal3, vScalar));
- intInputVal4 = _mm_cvtps_epi32(_mm_mul_ps(inputVal4, vScalar));
+ inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+ inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
+ inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
+ inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(inputVal1);
+ intInputVal2 = _mm_cvtps_epi32(inputVal2);
+ intInputVal3 = _mm_cvtps_epi32(inputVal3);
+ intInputVal4 = _mm_cvtps_epi32(inputVal4);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
@@ -47,7 +59,12 @@ static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const f
number = sixteenthPoints * 16;
for(; number < num_points; number++){
- outputVector[number] = (int8_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int8_t)(r);
}
}
#endif /* LV_HAVE_SSE2 */
@@ -67,9 +84,16 @@ static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const fl
const unsigned int quarterPoints = num_points / 4;
const float* inputVectorPtr = (const float*)inputVector;
+
+ float min_val = -128;
+ float max_val = 127;
+ float r;
+
int8_t* outputVectorPtr = outputVector;
__m128 vScalar = _mm_set_ps1(scalar);
__m128 ret;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
__VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
@@ -77,7 +101,7 @@ static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const fl
ret = _mm_load_ps(inputVectorPtr);
inputVectorPtr += 4;
- ret = _mm_mul_ps(ret, vScalar);
+ ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
_mm_store_ps(outputFloatBuffer, ret);
*outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
@@ -88,7 +112,12 @@ static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const fl
number = quarterPoints * 4;
for(; number < num_points; number++){
- outputVector[number] = (int8_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int8_t)(r);
}
}
#endif /* LV_HAVE_SSE */
@@ -105,9 +134,17 @@ static inline void volk_32f_s32f_convert_8i_a_generic(int8_t* outputVector, cons
int8_t* outputVectorPtr = outputVector;
const float* inputVectorPtr = inputVector;
unsigned int number = 0;
+ float min_val = -128;
+ float max_val = 127;
+ float r;
for(number = 0; number < num_points; number++){
- *outputVectorPtr++ = (int8_t)(*inputVectorPtr++ * scalar);
+ r = *inputVectorPtr++ * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ *outputVectorPtr++ = (int8_t)(r);
}
}
#endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_u.h b/volk/include/volk/volk_32f_s32f_convert_8i_u.h
index 1c6bf87c9..12991e9c1 100644
--- a/volk/include/volk/volk_32f_s32f_convert_8i_u.h
+++ b/volk/include/volk/volk_32f_s32f_convert_8i_u.h
@@ -21,9 +21,16 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f
const float* inputVectorPtr = (const float*)inputVector;
int8_t* outputVectorPtr = outputVector;
+
+ float min_val = -128;
+ float max_val = 127;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 inputVal1, inputVal2, inputVal3, inputVal4;
__m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
for(;number < sixteenthPoints; number++){
inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
@@ -31,10 +38,15 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f
inputVal3 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
inputVal4 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
- intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
- intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar));
- intInputVal3 = _mm_cvtps_epi32(_mm_mul_ps(inputVal3, vScalar));
- intInputVal4 = _mm_cvtps_epi32(_mm_mul_ps(inputVal4, vScalar));
+ inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+ inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
+ inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
+ inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(inputVal1);
+ intInputVal2 = _mm_cvtps_epi32(inputVal2);
+ intInputVal3 = _mm_cvtps_epi32(inputVal3);
+ intInputVal4 = _mm_cvtps_epi32(inputVal4);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
@@ -47,7 +59,12 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f
number = sixteenthPoints * 16;
for(; number < num_points; number++){
- outputVector[number] = (int8_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int16_t)(r);
}
}
#endif /* LV_HAVE_SSE2 */
@@ -69,8 +86,15 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl
const float* inputVectorPtr = (const float*)inputVector;
int8_t* outputVectorPtr = outputVector;
+
+ float min_val = -128;
+ float max_val = 127;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 ret;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
__VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
@@ -78,7 +102,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl
ret = _mm_loadu_ps(inputVectorPtr);
inputVectorPtr += 4;
- ret = _mm_mul_ps(ret, vScalar);
+ ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
_mm_store_ps(outputFloatBuffer, ret);
*outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
@@ -89,7 +113,12 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl
number = quarterPoints * 4;
for(; number < num_points; number++){
- outputVector[number] = (int8_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int16_t)(r);
}
}
#endif /* LV_HAVE_SSE */
@@ -107,9 +136,17 @@ static inline void volk_32f_s32f_convert_8i_u_generic(int8_t* outputVector, cons
int8_t* outputVectorPtr = outputVector;
const float* inputVectorPtr = inputVector;
unsigned int number = 0;
+ float min_val = -128;
+ float max_val = 127;
+ float r;
for(number = 0; number < num_points; number++){
- *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ * scalar));
+ r = *inputVectorPtr++ * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ *outputVectorPtr++ = (int16_t)(r);
}
}
#endif /* LV_HAVE_GENERIC */