summaryrefslogtreecommitdiff
path: root/gr-vocoder/lib/codec2/nlp.c
diff options
context:
space:
mode:
Diffstat (limited to 'gr-vocoder/lib/codec2/nlp.c')
-rw-r--r--gr-vocoder/lib/codec2/nlp.c364
1 files changed, 0 insertions, 364 deletions
diff --git a/gr-vocoder/lib/codec2/nlp.c b/gr-vocoder/lib/codec2/nlp.c
deleted file mode 100644
index 0d5e530ce..000000000
--- a/gr-vocoder/lib/codec2/nlp.c
+++ /dev/null
@@ -1,364 +0,0 @@
-/*---------------------------------------------------------------------------*\
-
- FILE........: nlp.c
- AUTHOR......: David Rowe
- DATE CREATED: 23/3/93
-
- Non Linear Pitch (NLP) estimation functions.
-
-\*---------------------------------------------------------------------------*/
-
-/*
- Copyright (C) 2009 David Rowe
-
- All rights reserved.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License version 2.1, as
- published by the Free Software Foundation. This program is
- distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
- License for more details.
-
- You should have received a copy of the GNU Lesser General Public License
- along with this program; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "defines.h"
-#include "nlp.h"
-#include "dump.h"
-#include "fft.h"
-
-#include <assert.h>
-#include <math.h>
-#include <stdlib.h>
-
-/*---------------------------------------------------------------------------*\
-
- DEFINES
-
-\*---------------------------------------------------------------------------*/
-
-#define PMAX_M 600 /* maximum NLP analysis window size */
-#define COEFF 0.95 /* notch filter parameter */
-#define PE_FFT_SIZE 512 /* DFT size for pitch estimation */
-#define DEC 5 /* decimation factor */
-#define SAMPLE_RATE 8000
-#define PI 3.141592654 /* mathematical constant */
-#define T 0.1 /* threshold for local minima candidate */
-#define F0_MAX 500
-#define CNLP 0.3 /* post processor constant */
-#define NLP_NTAP 48 /* Decimation LPF order */
-
-/*---------------------------------------------------------------------------*\
-
- GLOBALS
-
-\*---------------------------------------------------------------------------*/
-
-/* 48 tap 600Hz low pass FIR filter coefficients */
-
-const float nlp_fir[] = {
- -1.0818124e-03,
- -1.1008344e-03,
- -9.2768838e-04,
- -4.2289438e-04,
- 5.5034190e-04,
- 2.0029849e-03,
- 3.7058509e-03,
- 5.1449415e-03,
- 5.5924666e-03,
- 4.3036754e-03,
- 8.0284511e-04,
- -4.8204610e-03,
- -1.1705810e-02,
- -1.8199275e-02,
- -2.2065282e-02,
- -2.0920610e-02,
- -1.2808831e-02,
- 3.2204775e-03,
- 2.6683811e-02,
- 5.5520624e-02,
- 8.6305944e-02,
- 1.1480192e-01,
- 1.3674206e-01,
- 1.4867556e-01,
- 1.4867556e-01,
- 1.3674206e-01,
- 1.1480192e-01,
- 8.6305944e-02,
- 5.5520624e-02,
- 2.6683811e-02,
- 3.2204775e-03,
- -1.2808831e-02,
- -2.0920610e-02,
- -2.2065282e-02,
- -1.8199275e-02,
- -1.1705810e-02,
- -4.8204610e-03,
- 8.0284511e-04,
- 4.3036754e-03,
- 5.5924666e-03,
- 5.1449415e-03,
- 3.7058509e-03,
- 2.0029849e-03,
- 5.5034190e-04,
- -4.2289438e-04,
- -9.2768838e-04,
- -1.1008344e-03,
- -1.0818124e-03
-};
-
-typedef struct {
- float sq[PMAX_M]; /* squared speech samples */
- float mem_x,mem_y; /* memory for notch filter */
- float mem_fir[NLP_NTAP]; /* decimation FIR filter memory */
-} NLP;
-
-float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax);
-float post_process_sub_multiples(COMP Fw[],
- int pmin, int pmax, float gmax, int gmax_bin,
- float *prev_Wo);
-
-/*---------------------------------------------------------------------------*\
-
- nlp_create()
-
- Initialisation function for NLP pitch estimator.
-
-\*---------------------------------------------------------------------------*/
-
-void *nlp_create()
-{
- NLP *nlp;
- int i;
-
- nlp = (NLP*)malloc(sizeof(NLP));
- if (nlp == NULL)
- return NULL;
-
- for(i=0; i<PMAX_M; i++)
- nlp->sq[i] = 0.0;
- nlp->mem_x = 0.0;
- nlp->mem_y = 0.0;
- for(i=0; i<NLP_NTAP; i++)
- nlp->mem_fir[i] = 0.0;
-
- return (void*)nlp;
-}
-
-/*---------------------------------------------------------------------------*\
-
- nlp_destory()
-
- Initialisation function for NLP pitch estimator.
-
-\*---------------------------------------------------------------------------*/
-
-void nlp_destroy(void *nlp_state)
-{
- assert(nlp_state != NULL);
- free(nlp_state);
-}
-
-/*---------------------------------------------------------------------------*\
-
- nlp()
-
- Determines the pitch in samples using the Non Linear Pitch (NLP)
- algorithm [1]. Returns the fundamental in Hz. Note that the actual
- pitch estimate is for the centre of the M sample Sn[] vector, not
- the current N sample input vector. This is (I think) a delay of 2.5
- frames with N=80 samples. You should align further analysis using
- this pitch estimate to be centred on the middle of Sn[].
-
- Two post processors have been tried, the MBE version (as discussed
- in [1]), and a post processor that checks sub-multiples. Both
- suffer occasional gross pitch errors (i.e. neither are perfect). In
- the presence of background noise the sub-multiple algorithm tends
- towards low F0 which leads to better sounding background noise than
- the MBE post processor.
-
- A good way to test and develop the NLP pitch estimator is using the
- tnlp (codec2/unittest) and the codec2/octave/plnlp.m Octave script.
-
- A pitch tracker searching a few frames forward and backward in time
- would be a useful addition.
-
- References:
-
- [1] http://www.itr.unisa.edu.au/~steven/thesis/dgr.pdf Chapter 4
-
-\*---------------------------------------------------------------------------*/
-
-float nlp(
- void *nlp_state,
- float Sn[], /* input speech vector */
- int n, /* frames shift (no. new samples in Sn[]) */
- int m, /* analysis window size */
- int pmin, /* minimum pitch value */
- int pmax, /* maximum pitch value */
- float *pitch, /* estimated pitch period in samples */
- COMP Sw[], /* Freq domain version of Sn[] */
- float *prev_Wo
-)
-{
- NLP *nlp;
- float notch; /* current notch filter output */
- COMP Fw[PE_FFT_SIZE]; /* DFT of squared signal */
- float gmax;
- int gmax_bin;
- int i,j;
- float best_f0;
-
- assert(nlp_state != NULL);
- nlp = (NLP*)nlp_state;
-
- /* Square, notch filter at DC, and LP filter vector */
-
- for(i=m-n; i<M; i++) /* square latest speech samples */
- nlp->sq[i] = Sn[i]*Sn[i];
-
- for(i=m-n; i<m; i++) { /* notch filter at DC */
- notch = nlp->sq[i] - nlp->mem_x;
- notch += COEFF*nlp->mem_y;
- nlp->mem_x = nlp->sq[i];
- nlp->mem_y = notch;
- nlp->sq[i] = notch;
- }
-
- for(i=m-n; i<m; i++) { /* FIR filter vector */
-
- for(j=0; j<NLP_NTAP-1; j++)
- nlp->mem_fir[j] = nlp->mem_fir[j+1];
- nlp->mem_fir[NLP_NTAP-1] = nlp->sq[i];
-
- nlp->sq[i] = 0.0;
- for(j=0; j<NLP_NTAP; j++)
- nlp->sq[i] += nlp->mem_fir[j]*nlp_fir[j];
- }
-
- /* Decimate and DFT */
-
- for(i=0; i<PE_FFT_SIZE; i++) {
- Fw[i].real = 0.0;
- Fw[i].imag = 0.0;
- }
- for(i=0; i<m/DEC; i++) {
- Fw[i].real = nlp->sq[i*DEC]*(0.5 - 0.5*cos(2*PI*i/(m/DEC-1)));
- }
-#ifdef DUMP
- dump_dec(Fw);
-#endif
- fft(&Fw[0].real,PE_FFT_SIZE,1);
- for(i=0; i<PE_FFT_SIZE; i++)
- Fw[i].real = Fw[i].real*Fw[i].real + Fw[i].imag*Fw[i].imag;
-
-#ifdef DUMP
- dump_sq(nlp->sq);
- dump_Fw(Fw);
-#endif
-
- /* find global peak */
-
- gmax = 0.0;
- gmax_bin = PE_FFT_SIZE*DEC/pmax;
- for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) {
- if (Fw[i].real > gmax) {
- gmax = Fw[i].real;
- gmax_bin = i;
- }
- }
-
- best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin,
- prev_Wo);
-
- /* Shift samples in buffer to make room for new samples */
-
- for(i=0; i<m-n; i++)
- nlp->sq[i] = nlp->sq[i+n];
-
- /* return pitch and F0 estimate */
-
- *pitch = (float)SAMPLE_RATE/best_f0;
- return(best_f0);
-}
-
-/*---------------------------------------------------------------------------*\
-
- post_process_sub_multiples()
-
- Given the global maximma of Fw[] we search interger submultiples for
- local maxima. If local maxima exist and they are above an
- experimentally derived threshold (OK a magic number I pulled out of
- the air) we choose the submultiple as the F0 estimate.
-
- The rational for this is that the lowest frequency peak of Fw[]
- should be F0, as Fw[] can be considered the autocorrelation function
- of Sw[] (the speech spectrum). However sometimes due to phase
- effects the lowest frequency maxima may not be the global maxima.
-
- This works OK in practice and favours low F0 values in the presence
- of background noise which means the sinusoidal codec does an OK job
- of synthesising the background noise. High F0 in background noise
- tends to sound more periodic introducing annoying artifacts.
-
-\*---------------------------------------------------------------------------*/
-
-float post_process_sub_multiples(COMP Fw[],
- int pmin, int pmax, float gmax, int gmax_bin,
- float *prev_Wo)
-{
- int min_bin, cmax_bin;
- int mult;
- float thresh, best_f0;
- int b, bmin, bmax, lmax_bin;
- float lmax, cmax;
- int prev_f0_bin;
-
- /* post process estimate by searching submultiples */
-
- mult = 2;
- min_bin = PE_FFT_SIZE*DEC/pmax;
- cmax_bin = gmax_bin;
- prev_f0_bin = *prev_Wo*(4000.0/PI)*(PE_FFT_SIZE*DEC)/SAMPLE_RATE;
-
- while(gmax_bin/mult >= min_bin) {
-
- b = gmax_bin/mult; /* determine search interval */
- bmin = 0.8*b;
- bmax = 1.2*b;
- if (bmin < min_bin)
- bmin = min_bin;
-
- /* lower threshold to favour previous frames pitch estimate,
- this is a form of pitch tracking */
-
- if ((prev_f0_bin > bmin) && (prev_f0_bin < bmax))
- thresh = CNLP*0.5*gmax;
- else
- thresh = CNLP*gmax;
-
- lmax = 0;
- lmax_bin = bmin;
- for (b=bmin; b<=bmax; b++) /* look for maximum in interval */
- if (Fw[b].real > lmax) {
- lmax = Fw[b].real;
- lmax_bin = b;
- }
-
- if (lmax > thresh)
- if ((lmax > Fw[lmax_bin-1].real) && (lmax > Fw[lmax_bin+1].real)) {
- cmax = lmax;
- cmax_bin = lmax_bin;
- }
-
- mult++;
- }
-
- best_f0 = (float)cmax_bin*SAMPLE_RATE/(PE_FFT_SIZE*DEC);
-
- return best_f0;
-}
-