volk/include/volk/volk_16s_convert_8s_unaligned16.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71

#ifndef INCLUDED_VOLK_16s_CONVERT_8s_UNALIGNED16_H
#define INCLUDED_VOLK_16s_CONVERT_8s_UNALIGNED16_H

#include <inttypes.h>
#include <stdio.h>

#if LV_HAVE_SSE2
#include <emmintrin.h>
/*!
  \brief Converts the input 16 bit integer data into 8 bit integer data
  \param inputVector The 16 bit input data buffer
  \param outputVector The 8 bit output data buffer
  \param num_points The number of data values to be converted
  \note Input and output buffers do NOT need to be properly aligned
*/
static inline void volk_16s_convert_8s_unaligned16_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
    unsigned int number = 0;
    const unsigned int sixteenthPoints = num_points / 16;
    
     int8_t* outputVectorPtr = outputVector;
    int16_t* inputPtr = (int16_t*)inputVector;
    __m128i inputVal1;
    __m128i inputVal2;
    __m128i ret;

    for(;number < sixteenthPoints; number++){

      // Load the 16 values
      inputVal1 = _mm_loadu_si128((__m128i*)inputPtr); inputPtr += 8;
      inputVal2 = _mm_loadu_si128((__m128i*)inputPtr); inputPtr += 8;

      inputVal1 = _mm_srai_epi16(inputVal1, 8);
      inputVal2 = _mm_srai_epi16(inputVal2, 8);
      
      ret = _mm_packs_epi16(inputVal1, inputVal2);

      _mm_storeu_si128((__m128i*)outputVectorPtr, ret);

      outputVectorPtr += 16;
    }

    number = sixteenthPoints * 16;
    for(; number < num_points; number++){
      outputVector[number] =(int8_t)(inputVector[number] >> 8);
    }
}
#endif /* LV_HAVE_SSE2 */

#ifdef LV_HAVE_GENERIC
/*!
  \brief Converts the input 16 bit integer data into 8 bit integer data
  \param inputVector The 16 bit input data buffer
  \param outputVector The 8 bit output data buffer
  \param num_points The number of data values to be converted
  \note Input and output buffers do NOT need to be properly aligned
*/
static inline void volk_16s_convert_8s_unaligned16_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
  int8_t* outputVectorPtr = outputVector;
  const int16_t* inputVectorPtr = inputVector;
  unsigned int number = 0;

  for(number = 0; number < num_points; number++){
    *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++  >> 8));
  }
}
#endif /* LV_HAVE_GENERIC */


#endif /* INCLUDED_VOLK_16s_CONVERT_8s_UNALIGNED16_H */