summaryrefslogtreecommitdiff
path: root/volk/include/volk/volk_32f_convert_64f_aligned16.h
blob: 91a85581352b672379fcaafa1dcb5be77c73830c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#ifndef INCLUDED_VOLK_32f_CONVERT_64f_ALIGNED16_H
#define INCLUDED_VOLK_32f_CONVERT_64f_ALIGNED16_H

#include <inttypes.h>
#include <stdio.h>

#if LV_HAVE_SSE2
#include <emmintrin.h>
  /*!
    \brief Converts the float values into double values
    \param dVector The converted double vector values
    \param fVector The float vector values to be converted
    \param num_points The number of points in the two vectors to be converted
  */
static inline void volk_32f_convert_64f_aligned16_sse2(double* outputVector, const float* inputVector, unsigned int num_points){
  unsigned int number = 0;

  const unsigned int quarterPoints = num_points / 4;
    
  const float* inputVectorPtr = (const float*)inputVector;
  double* outputVectorPtr = outputVector;
  __m128d ret;
  __m128 inputVal;

  for(;number < quarterPoints; number++){
    inputVal = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
 
    ret = _mm_cvtps_pd(inputVal);

    _mm_store_pd(outputVectorPtr, ret);
    outputVectorPtr += 2;

    inputVal = _mm_movehl_ps(inputVal, inputVal);

    ret = _mm_cvtps_pd(inputVal);

    _mm_store_pd(outputVectorPtr, ret);
    outputVectorPtr += 2;
  }

  number = quarterPoints * 4;    
  for(; number < num_points; number++){
    outputVector[number] = (double)(inputVector[number]);
  }
}
#endif /* LV_HAVE_SSE2 */


#ifdef LV_HAVE_GENERIC
/*!
  \brief Converts the float values into double values
  \param dVector The converted double vector values
  \param fVector The float vector values to be converted
  \param num_points The number of points in the two vectors to be converted
*/
static inline void volk_32f_convert_64f_aligned16_generic(double* outputVector, const float* inputVector, unsigned int num_points){
  double* outputVectorPtr = outputVector;
  const float* inputVectorPtr = inputVector;
  unsigned int number = 0;

  for(number = 0; number < num_points; number++){
    *outputVectorPtr++ = ((double)(*inputVectorPtr++));
  }
}
#endif /* LV_HAVE_GENERIC */




#endif /* INCLUDED_VOLK_32f_CONVERT_64f_ALIGNED16_H */