diff options
Diffstat (limited to 'gr-vocoder/lib/codec2/sine.c')
-rw-r--r-- | gr-vocoder/lib/codec2/sine.c | 638 |
1 files changed, 638 insertions, 0 deletions
diff --git a/gr-vocoder/lib/codec2/sine.c b/gr-vocoder/lib/codec2/sine.c new file mode 100644 index 000000000..45cc9de71 --- /dev/null +++ b/gr-vocoder/lib/codec2/sine.c @@ -0,0 +1,638 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: sine.c + AUTHOR......: David Rowe + DATE CREATED: 19/8/2010 + + Sinusoidal analysis and synthesis functions. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 1990-2010 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 2.1, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +/*---------------------------------------------------------------------------*\ + + INCLUDES + +\*---------------------------------------------------------------------------*/ + +#include <stdlib.h> +#include <stdio.h> +#include <math.h> + +#include "defines.h" +#include "sine.h" +#include "fft.h" + +#define HPF_BETA 0.125 + +/*---------------------------------------------------------------------------*\ + + HEADERS + +\*---------------------------------------------------------------------------*/ + +void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, + float pstep); + +/*---------------------------------------------------------------------------*\ + + FUNCTIONS + +\*---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: make_analysis_window + AUTHOR......: David Rowe + DATE CREATED: 11/5/94 + + Init function that generates the time domain analysis window and it's DFT. + +\*---------------------------------------------------------------------------*/ + +void make_analysis_window(float w[],COMP W[]) +{ + float m; + COMP temp; + int i,j; + + /* + Generate Hamming window centered on M-sample pitch analysis window + + 0 M/2 M-1 + |-------------|-------------| + |-------|-------| + NW samples + + All our analysis/synthsis is centred on the M/2 sample. + */ + + m = 0.0; + for(i=0; i<M/2-NW/2; i++) + w[i] = 0.0; + for(i=M/2-NW/2,j=0; i<M/2+NW/2; i++,j++) { + w[i] = 0.5 - 0.5*cos(TWO_PI*j/(NW-1)); + m += w[i]*w[i]; + } + for(i=M/2+NW/2; i<M; i++) + w[i] = 0.0; + + /* Normalise - makes freq domain amplitude estimation straight + forward */ + + m = 1.0/sqrt(m*FFT_ENC); + for(i=0; i<M; i++) { + w[i] *= m; + } + + /* + Generate DFT of analysis window, used for later processing. Note + we modulo FFT_ENC shift the time domain window w[], this makes the + imaginary part of the DFT W[] equal to zero as the shifted w[] is + even about the n=0 time axis if NW is odd. Having the imag part + of the DFT W[] makes computation easier. + + 0 FFT_ENC-1 + |-------------------------| + + ----\ /---- + \ / + \ / <- shifted version of window w[n] + \ / + \ / + ------- + + |---------| |---------| + NW/2 NW/2 + */ + + for(i=0; i<FFT_ENC; i++) { + W[i].real = 0.0; + W[i].imag = 0.0; + } + for(i=0; i<NW/2; i++) + W[i].real = w[i+M/2]; + for(i=FFT_ENC-NW/2,j=M/2-NW/2; i<FFT_ENC; i++,j++) + W[i].real = w[j]; + + fft(&W[0].real,FFT_ENC,-1); /* "Numerical Recipes in C" FFT */ + + /* + Re-arrange W[] to be symmetrical about FFT_ENC/2. Makes later + analysis convenient. + + Before: + + + 0 FFT_ENC-1 + |----------|---------| + __ _ + \ / + \_______________/ + + After: + + 0 FFT_ENC-1 + |----------|---------| + ___ + / \ + ________/ \_______ + + */ + + + for(i=0; i<FFT_ENC/2; i++) { + temp.real = W[i].real; + temp.imag = W[i].imag; + W[i].real = W[i+FFT_ENC/2].real; + W[i].imag = W[i+FFT_ENC/2].imag; + W[i+FFT_ENC/2].real = temp.real; + W[i+FFT_ENC/2].imag = temp.imag; + } + +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: hpf + AUTHOR......: David Rowe + DATE CREATED: 16 Nov 2010 + + High pass filter with a -3dB point of about 160Hz. + + y(n) = -HPF_BETA*y(n-1) + x(n) - x(n-1) + +\*---------------------------------------------------------------------------*/ + +float hpf(float x, float states[]) +{ + states[0] += -HPF_BETA*states[0] + x - states[1]; + states[1] = x; + + return states[0]; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: dft_speech + AUTHOR......: David Rowe + DATE CREATED: 27/5/94 + + Finds the DFT of the current speech input speech frame. + +\*---------------------------------------------------------------------------*/ + +void dft_speech(COMP Sw[], float Sn[], float w[]) +{ + int i; + + for(i=0; i<FFT_ENC; i++) { + Sw[i].real = 0.0; + Sw[i].imag = 0.0; + } + + /* Centre analysis window on time axis, we need to arrange input + to FFT this way to make FFT phases correct */ + + /* move 2nd half to start of FFT input vector */ + + for(i=0; i<NW/2; i++) + Sw[i].real = Sn[i+M/2]*w[i+M/2]; + + /* move 1st half to end of FFT input vector */ + + for(i=0; i<NW/2; i++) + Sw[FFT_ENC-NW/2+i].real = Sn[i+M/2-NW/2]*w[i+M/2-NW/2]; + + fft(&Sw[0].real,FFT_ENC,-1); +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: two_stage_pitch_refinement + AUTHOR......: David Rowe + DATE CREATED: 27/5/94 + + Refines the current pitch estimate using the harmonic sum pitch + estimation technique. + +\*---------------------------------------------------------------------------*/ + +void two_stage_pitch_refinement(MODEL *model, COMP Sw[]) +{ + float pmin,pmax,pstep; /* pitch refinment minimum, maximum and step */ + + /* Coarse refinement */ + + pmax = TWO_PI/model->Wo + 5; + pmin = TWO_PI/model->Wo - 5; + pstep = 1.0; + hs_pitch_refinement(model,Sw,pmin,pmax,pstep); + + /* Fine refinement */ + + pmax = TWO_PI/model->Wo + 1; + pmin = TWO_PI/model->Wo - 1; + pstep = 0.25; + hs_pitch_refinement(model,Sw,pmin,pmax,pstep); + + /* Limit range */ + + if (model->Wo < TWO_PI/P_MAX) + model->Wo = TWO_PI/P_MAX; + if (model->Wo > TWO_PI/P_MIN) + model->Wo = TWO_PI/P_MIN; + + model->L = floor(PI/model->Wo); +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: hs_pitch_refinement + AUTHOR......: David Rowe + DATE CREATED: 27/5/94 + + Harmonic sum pitch refinement function. + + pmin pitch search range minimum + pmax pitch search range maximum + step pitch search step size + model current pitch estimate in model.Wo + + model refined pitch estimate in model.Wo + +\*---------------------------------------------------------------------------*/ + +void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float pstep) +{ + int m; /* loop variable */ + int b; /* bin for current harmonic centre */ + float E; /* energy for current pitch*/ + float Wo; /* current "test" fundamental freq. */ + float Wom; /* Wo that maximises E */ + float Em; /* mamimum energy */ + float r; /* number of rads/bin */ + float p; /* current pitch */ + + /* Initialisation */ + + model->L = PI/model->Wo; /* use initial pitch est. for L */ + Wom = model->Wo; + Em = 0.0; + r = TWO_PI/FFT_ENC; + + /* Determine harmonic sum for a range of Wo values */ + + for(p=pmin; p<=pmax; p+=pstep) { + E = 0.0; + Wo = TWO_PI/p; + + /* Sum harmonic magnitudes */ + + for(m=1; m<=model->L; m++) { + b = floor(m*Wo/r + 0.5); + E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag; + } + + /* Compare to see if this is a maximum */ + + if (E > Em) { + Em = E; + Wom = Wo; + } + } + + model->Wo = Wom; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: estimate_amplitudes + AUTHOR......: David Rowe + DATE CREATED: 27/5/94 + + Estimates the complex amplitudes of the harmonics. + +\*---------------------------------------------------------------------------*/ + +void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[]) +{ + int i,m; /* loop variables */ + int am,bm; /* bounds of current harmonic */ + int b; /* DFT bin of centre of current harmonic */ + float den; /* denominator of amplitude expression */ + float r; /* number of rads/bin */ + int offset; + COMP Am; + + r = TWO_PI/FFT_ENC; + + for(m=1; m<=model->L; m++) { + den = 0.0; + am = floor((m - 0.5)*model->Wo/r + 0.5); + bm = floor((m + 0.5)*model->Wo/r + 0.5); + b = floor(m*model->Wo/r + 0.5); + + /* Estimate ampltude of harmonic */ + + den = 0.0; + Am.real = Am.imag = 0.0; + for(i=am; i<bm; i++) { + den += Sw[i].real*Sw[i].real + Sw[i].imag*Sw[i].imag; + offset = i + FFT_ENC/2 - floor(m*model->Wo/r + 0.5); + Am.real += Sw[i].real*W[offset].real; + Am.imag += Sw[i].imag*W[offset].real; + } + + model->A[m] = sqrt(den); + + /* Estimate phase of harmonic */ + + model->phi[m] = atan2(Sw[b].imag,Sw[b].real); + } +} + +/*---------------------------------------------------------------------------*\ + + est_voicing_mbe() + + Returns the error of the MBE cost function for a fiven F0. + + Note: I think a lot of the operations below can be simplified as + W[].imag = 0 and has been normalised such that den always equals 1. + +\*---------------------------------------------------------------------------*/ + +float est_voicing_mbe( + MODEL *model, + COMP Sw[], + COMP W[], + COMP Sw_[], /* DFT of all voiced synthesised signal */ + /* useful for debugging/dump file */ + COMP Ew[], /* DFT of error */ + float prev_Wo) +{ + int i,l,al,bl,m; /* loop variables */ + COMP Am; /* amplitude sample for this band */ + int offset; /* centers Hw[] about current harmonic */ + float den; /* denominator of Am expression */ + float error; /* accumulated error between original and synthesised */ + float Wo; + float sig, snr; + float elow, ehigh, eratio; + float dF0, sixty; + + sig = 0.0; + for(l=1; l<=model->L/4; l++) { + sig += model->A[l]*model->A[l]; + } + for(i=0; i<FFT_ENC; i++) { + Sw_[i].real = 0.0; + Sw_[i].imag = 0.0; + Ew[i].real = 0.0; + Ew[i].imag = 0.0; + } + + Wo = model->Wo; + error = 0.0; + + /* Just test across the harmonics in the first 1000 Hz (L/4) */ + + for(l=1; l<=model->L/4; l++) { + Am.real = 0.0; + Am.imag = 0.0; + den = 0.0; + al = ceil((l - 0.5)*Wo*FFT_ENC/TWO_PI); + bl = ceil((l + 0.5)*Wo*FFT_ENC/TWO_PI); + + /* Estimate amplitude of harmonic assuming harmonic is totally voiced */ + + for(m=al; m<bl; m++) { + offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5; + Am.real += Sw[m].real*W[offset].real + Sw[m].imag*W[offset].imag; + Am.imag += Sw[m].imag*W[offset].real - Sw[m].real*W[offset].imag; + den += W[offset].real*W[offset].real + W[offset].imag*W[offset].imag; + } + + Am.real = Am.real/den; + Am.imag = Am.imag/den; + + /* Determine error between estimated harmonic and original */ + + for(m=al; m<bl; m++) { + offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5; + Sw_[m].real = Am.real*W[offset].real - Am.imag*W[offset].imag; + Sw_[m].imag = Am.real*W[offset].imag + Am.imag*W[offset].real; + Ew[m].real = Sw[m].real - Sw_[m].real; + Ew[m].imag = Sw[m].imag - Sw_[m].imag; + error += Ew[m].real*Ew[m].real; + error += Ew[m].imag*Ew[m].imag; + } + } + + snr = 10.0*log10(sig/error); + if (snr > V_THRESH) + model->voiced = 1; + else + model->voiced = 0; + + /* post processing, helps clean up some voicing errors ------------------*/ + + /* + Determine the ratio of low freancy to high frequency energy, + voiced speech tends to be dominated by low frequency energy, + unvoiced by high frequency. This measure can be used to + determine if we have made any gross errors. + */ + + elow = ehigh = 0.0; + for(l=1; l<=model->L/2; l++) { + elow += model->A[l]*model->A[l]; + } + for(l=model->L/2; l<=model->L; l++) { + ehigh += model->A[l]*model->A[l]; + } + eratio = 10.0*log10(elow/ehigh); + dF0 = 0.0; + + /* Look for Type 1 errors, strongly V speech that has been + accidentally declared UV */ + + if (model->voiced == 0) + if (eratio > 10.0) + model->voiced = 1; + + /* Look for Type 2 errors, strongly UV speech that has been + accidentally declared V */ + + if (model->voiced == 1) { + if (eratio < -10.0) + model->voiced = 0; + + /* If pitch is jumping about it's likely this is UV */ + + dF0 = (model->Wo - prev_Wo)*FS/TWO_PI; + if (fabs(dF0) > 15.0) + model->voiced = 0; + + /* A common source of Type 2 errors is the pitch estimator + gives a low (50Hz) estimate for UV speech, which gives a + good match with noise due to the close harmoonic spacing. + These errors are much more common than people with 50Hz + pitch, so we have just a small eratio threshold. */ + + sixty = 60.0*TWO_PI/FS; + if ((eratio < -4.0) && (model->Wo <= sixty)) + model->voiced = 0; + } + //printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0); + + return snr; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: make_synthesis_window + AUTHOR......: David Rowe + DATE CREATED: 11/5/94 + + Init function that generates the trapezoidal (Parzen) sythesis window. + +\*---------------------------------------------------------------------------*/ + +void make_synthesis_window(float Pn[]) +{ + int i; + float win; + + /* Generate Parzen window in time domain */ + + win = 0.0; + for(i=0; i<N/2-TW; i++) + Pn[i] = 0.0; + win = 0.0; + for(i=N/2-TW; i<N/2+TW; win+=1.0/(2*TW), i++ ) + Pn[i] = win; + for(i=N/2+TW; i<3*N/2-TW; i++) + Pn[i] = 1.0; + win = 1.0; + for(i=3*N/2-TW; i<3*N/2+TW; win-=1.0/(2*TW), i++) + Pn[i] = win; + for(i=3*N/2+TW; i<2*N; i++) + Pn[i] = 0.0; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: synthesise + AUTHOR......: David Rowe + DATE CREATED: 20/2/95 + + Synthesise a speech signal in the frequency domain from the + sinusodal model parameters. Uses overlap-add with a trapezoidal + window to smoothly interpolate betwen frames. + +\*---------------------------------------------------------------------------*/ + +void synthesise( + float Sn_[], /* time domain synthesised signal */ + MODEL *model, /* ptr to model parameters for this frame */ + float Pn[], /* time domain Parzen window */ + int shift /* flag used to handle transition frames */ +) +{ + int i,l,j,b; /* loop variables */ + COMP Sw_[FFT_DEC]; /* DFT of synthesised signal */ + + if (shift) { + /* Update memories */ + + for(i=0; i<N-1; i++) { + Sn_[i] = Sn_[i+N]; + } + Sn_[N-1] = 0.0; + } + + for(i=0; i<FFT_DEC; i++) { + Sw_[i].real = 0.0; + Sw_[i].imag = 0.0; + } + + /* + Nov 2010 - found that synthesis using time domain cos() functions + gives better results for synthesis frames greater than 10ms. Inverse + FFT synthesis using a 512 pt FFT works well for 10ms window. I think + (but am not sure) that the problem is realted to the quantisation of + the harmonic frequencies to the FFT bin size, e.g. there is a + 8000/512 Hz step between FFT bins. For some reason this makes + the speech from longer frame > 10ms sound poor. The effect can also + be seen when synthesising test signals like single sine waves, some + sort of amplitude modulation at the frame rate. + + Another possibility is using a larger FFT size (1024 or 2048). + */ + +#define FFT_SYNTHESIS +#ifdef FFT_SYNTHESIS + /* Now set up frequency domain synthesised speech */ + for(l=1; l<=model->L; l++) { + b = floor(l*model->Wo*FFT_DEC/TWO_PI + 0.5); + if (b > ((FFT_DEC/2)-1)) { + b = (FFT_DEC/2)-1; + } + Sw_[b].real = model->A[l]*cos(model->phi[l]); + Sw_[b].imag = model->A[l]*sin(model->phi[l]); + Sw_[FFT_DEC-b].real = Sw_[b].real; + Sw_[FFT_DEC-b].imag = -Sw_[b].imag; + } + + /* Perform inverse DFT */ + + fft(&Sw_[0].real,FFT_DEC,1); +#else + /* + Direct time domain synthesis using the cos() function. Works + well at 10ms and 20ms frames rates. Note synthesis window is + still used to handle overlap-add between adjacent frames. This + could be simplified as we don't need to synthesise where Pn[] + is zero. + */ + for(l=1; l<=model->L; l++) { + for(i=0,j=-N+1; i<N-1; i++,j++) { + Sw_[FFT_DEC-N+1+i].real += 2.0*model->A[l]*cos(j*model->Wo*l + model->phi[l]); + } + for(i=N-1,j=0; i<2*N; i++,j++) + Sw_[j].real += 2.0*model->A[l]*cos(j*model->Wo*l + model->phi[l]); + } +#endif + + /* Overlap add to previous samples */ + + for(i=0; i<N-1; i++) { + Sn_[i] += Sw_[FFT_DEC-N+1+i].real*Pn[i]; + } + + if (shift) + for(i=N-1,j=0; i<2*N; i++,j++) + Sn_[i] = Sw_[j].real*Pn[i]; + else + for(i=N-1,j=0; i<2*N; i++,j++) + Sn_[i] += Sw_[j].real*Pn[i]; +} + |