view libgsmefr/cod_12k2.c @ 242:f081a6850fb5

libgsmfrp: new refined implementation The previous implementation exhibited the following defects, which are now fixed: 1) The last received valid SID was cached forever for the purpose of handling future invalid SIDs - we could have received some valid SID ages ago, then lots of speech or NO_DATA, and if we then get an invalid SID, we would resurrect the last valid SID from ancient history - a bad design. In our new design, we handle invalid SID based on the current state, much like BFI. 2) GSM 06.11 spec says clearly that after the second lost SID (received BFI=1 && TAF=1 in CN state) we need to gradually decrease the output level, rather than jump directly to emitting silence frames - we previously failed to implement such logic. 3) Per GSM 06.12 section 5.2, Xmaxc should be the same in all 4 subframes in a SID frame. What should we do if we receive an otherwise valid SID frame with different Xmaxc? Our previous approach would replicate this Xmaxc oddity in every subsequent generated CN frame, which is rather bad. In our new design, the very first CN frame (which can be seen as a transformation of the SID frame itself) retains the original 4 distinct Xmaxc, but all subsequent CN frames are based on the Xmaxc from the last subframe of the most recent SID.
author Mychaela Falconia <falcon@freecalypso.org>
date Tue, 09 May 2023 05:16:31 +0000
parents d9ad0f5121e8
children
line wrap: on
line source

/***************************************************************************
 *
 *  FILE NAME:    cod_12k2.c
 *
 *  FUNCTIONS DEFINED IN THIS FILE:
 *                   Coder_12k2  and  Init_Coder_12k2
 *
 *
 *  Init_Coder_12k2(void):
 *      Initialization of variables for the coder section.
 *
 *  Coder_12k2(Word16 ana[], Word16 synth[]):
 *      Speech encoder routine operating on a frame basis.
 *

***************************************************************************/

#include "gsm_efr.h"
#include "typedef.h"
#include "namespace.h"
#include "basic_op.h"
#include "sig_proc.h"
#include "memops.h"
#include "no_count.h"
#include "codec.h"
#include "cnst.h"
#include "enc_state.h"

#include "window2.tab"

#include "vad.h"
#include "dtx.h"

/*-----------------------------------------------------------*
 *    Coder constant parameters (defined in "cnst.h")        *
 *-----------------------------------------------------------*
 *   L_WINDOW    : LPC analysis window size                  *
 *   L_FRAME     : Frame size                                *
 *   L_FRAME_BY2 : Half the frame size                       *
 *   L_SUBFR     : Sub-frame size                            *
 *   M           : LPC order                                 *
 *   MP1         : LPC order+1                               *
 *   L_TOTAL     : Total size of speech buffer               *
 *   PIT_MIN     : Minimum pitch lag                         *
 *   PIT_MAX     : Maximum pitch lag                         *
 *   L_INTERPOL  : Length of filter for interpolation        *
 *-----------------------------------------------------------*/

 /* Spectral expansion factors */

static const Word16 F_gamma1[M] =
{
    29491, 26542, 23888, 21499, 19349,
    17414, 15672, 14105, 12694, 11425
};
static const Word16 F_gamma2[M] =
{
    19661, 11797, 7078, 4247, 2548,
    1529, 917, 550, 330, 198
};

/***************************************************************************
 *  FUNCTION:   Init_Coder_12k2
 *
 *  PURPOSE:   Initialization of variables for the coder section.
 *
 *  DESCRIPTION:
 *       - initilize pointers to speech buffer
 *       - initialize static  pointers
 *       - set static vectors to zero
 *
 ***************************************************************************/

void Init_Coder_12k2 (struct EFR_encoder_state *st)
{
    /* Static vectors to zero */

    Set_zero (st->old_speech, L_TOTAL);
    Set_zero (st->old_exc, PIT_MAX + L_INTERPOL);
    Set_zero (st->old_wsp, PIT_MAX);
    Set_zero (st->mem_syn, M);
    Set_zero (st->mem_w, M);
    Set_zero (st->mem_w0, M);
    Set_zero (st->mem_err, M);
    Set_zero (st->ai_zero + MP1, L_SUBFR);
    Set_zero (st->hvec, L_SUBFR);   /* set to zero "h1[-L_SUBFR..-1]" */

    /* Initialize lsp_old [] */

    st->lsp_old[0] = 30000;
    st->lsp_old[1] = 26000;
    st->lsp_old[2] = 21000;
    st->lsp_old[3] = 15000;
    st->lsp_old[4] = 8000;
    st->lsp_old[5] = 0;
    st->lsp_old[6] = -8000;
    st->lsp_old[7] = -15000;
    st->lsp_old[8] = -21000;
    st->lsp_old[9] = -26000;

    /* Initialize lsp_old_q[] */

    Copy (st->lsp_old, st->lsp_old_q, M);

    return;
}

/***************************************************************************
 *   FUNCTION:   Coder_12k2
 *
 *   PURPOSE:  Principle encoder routine.
 *
 *   DESCRIPTION: This function is called every 20 ms speech frame,
 *       operating on the newly read 160 speech samples. It performs the
 *       principle encoding functions to produce the set of encoded parameters
 *       which include the LSP, adaptive codebook, and fixed codebook
 *       quantization indices (addresses and gains).
 *
 *   INPUTS:
 *       No input arguments are passed to this function. However, before
 *       calling this function, 160 new speech data samples should be copied to
 *       the vector new_speech[]. This is a global pointer which is declared in
 *       this file (it points to the end of speech buffer minus 160).
 *
 *   OUTPUTS:
 *
 *       ana[]:     vector of analysis parameters.
 *       synth[]:   Local synthesis speech (for debugging purposes)
 *
 ***************************************************************************/

void Coder_12k2 (
    struct EFR_encoder_state *st,
    Word16 ana[],    /* output  : Analysis parameters */
    Word16 synth[]   /* output  : Local synthesis     */
)
{
    /* handy pointers that were static vars in the original code */
    Word16 *speech = st->old_speech + L_TOTAL - L_FRAME;
    Word16 *p_window = st->old_speech + L_TOTAL - L_WINDOW;
    Word16 *wsp = st->old_wsp + PIT_MAX;
    Word16 *exc = st->old_exc + PIT_MAX + L_INTERPOL;
    Word16 *zero = st->ai_zero + MP1;
    Word16 *h1 = st->hvec + L_SUBFR;
    Word16 *error = st->mem_err + M;

    /* LPC coefficients */

    Word16 r_l[MP1], r_h[MP1];      /* Autocorrelations lo and hi           */
    Word16 A_t[(MP1) * 4];          /* A(z) unquantized for the 4 subframes */
    Word16 Aq_t[(MP1) * 4];         /* A(z)   quantized for the 4 subframes */
    Word16 Ap1[MP1];                /* A(z) with spectral expansion         */
    Word16 Ap2[MP1];                /* A(z) with spectral expansion         */
    Word16 *A, *Aq;                 /* Pointer on A_t and Aq_t              */
    Word16 lsp_new[M], lsp_new_q[M];/* LSPs at 4th subframe                 */
    Word16 lsp_mid[M], lsp_mid_q[M];/* LSPs at 2nd subframe                 */

    /* Other vectors */

    Word16 xn[L_SUBFR];            /* Target vector for pitch search        */
    Word16 xn2[L_SUBFR];           /* Target vector for codebook search     */
    Word16 res2[L_SUBFR];          /* Long term prediction residual         */
    Word16 code[L_SUBFR];          /* Fixed codebook excitation             */
    Word16 y1[L_SUBFR];            /* Filtered adaptive excitation          */
    Word16 y2[L_SUBFR];            /* Filtered fixed codebook excitation    */

    /* Scalars */

    Word16 i, j, k, i_subfr;
    Word16 T_op, T0, T0_min, T0_max, T0_frac;
    Word16 gain_pit, gain_code, pit_flag, pit_sharp;
    Word16 temp;
    Word32 L_temp;

    Word16 scal_acf, VAD_flag, lags[2], rc[4];

    /*----------------------------------------------------------------------*
     *  - Perform LPC analysis: (twice per frame)                           *
     *       * autocorrelation + lag windowing                              *
     *       * Levinson-Durbin algorithm to find a[]                        *
     *       * convert a[] to lsp[]                                         *
     *       * quantize and code the LSPs                                   *
     *       * find the interpolated LSPs and convert to a[] for all        *
     *         subframes (both quantized and unquantized)                   *
     *----------------------------------------------------------------------*/

    /* LP analysis centered at 2nd subframe */

    scal_acf = Autocorr (p_window, M, r_h, r_l, window_160_80);
                                /* Autocorrelations */

    Lag_window (M, r_h, r_l);   /* Lag windowing    */

    Levinson (st, r_h, r_l, &A_t[MP1], rc); /* Levinson-Durbin  */

    Az_lsp (&A_t[MP1], lsp_mid, st->lsp_old); /* From A(z) to lsp */

    /* LP analysis centered at 4th subframe */

    /* Autocorrelations */
    scal_acf = Autocorr (p_window, M, r_h, r_l, window_232_8);

    Lag_window (M, r_h, r_l);   /* Lag windowing    */

    Levinson (st, r_h, r_l, &A_t[MP1 * 3], rc); /* Levinson-Durbin  */

    Az_lsp (&A_t[MP1 * 3], lsp_new, lsp_mid); /* From A(z) to lsp */

    if (st->dtx_mode)
    {
        /* DTX enabled, make voice activity decision */
        VAD_flag = vad_computation (st, r_h, r_l, scal_acf, rc, st->ptch);
        tx_dtx (st, VAD_flag); /* TX DTX handler */
    }
    else
    {
        /* DTX disabled, active speech in every frame */
        VAD_flag = 1;
        st->txdtx_ctrl = TX_VAD_FLAG | TX_SP_FLAG;
    }

    /* LSP quantization (lsp_mid[] and lsp_new[] jointly quantized) */

    Q_plsf_5 (st, lsp_mid, lsp_new, lsp_mid_q, lsp_new_q, ana, st->txdtx_ctrl);

    ana += 5;

    /*--------------------------------------------------------------------*
     * Find interpolated LPC parameters in all subframes (both quantized  *
     * and unquantized).                                                  *
     * The interpolated parameters are in array A_t[] of size (M+1)*4     *
     * and the quantized interpolated parameters are in array Aq_t[]      *
     *--------------------------------------------------------------------*/

    Int_lpc2 (st->lsp_old, lsp_mid, lsp_new, A_t);

    if ((st->txdtx_ctrl & TX_SP_FLAG) != 0)
    {
        Int_lpc (st->lsp_old_q, lsp_mid_q, lsp_new_q, Aq_t);

        /* update the LSPs for the next frame */
        Copy (lsp_new, st->lsp_old, M);
        Copy (lsp_new_q, st->lsp_old_q, M);
    }
    else
    {
        /* Use unquantized LPC parameters in case of no speech activity */
        for (i = 0; i < MP1; i++)
        {
            Aq_t[i] = A_t[i];                                   move16 (); 
            Aq_t[i + MP1] = A_t[i + MP1];                       move16 (); 
            Aq_t[i + MP1 * 2] = A_t[i + MP1 * 2];               move16 (); 
            Aq_t[i + MP1 * 3] = A_t[i + MP1 * 3];               move16 (); 
        }

        /* update the LSPs for the next frame */
        Copy (lsp_new, st->lsp_old, M);
        Copy (lsp_new, st->lsp_old_q, M);
    }

    /*----------------------------------------------------------------------*
     * - Find the weighted input speech wsp[] for the whole speech frame    *
     * - Find the open-loop pitch delay for first 2 subframes               *
     * - Set the range for searching closed-loop pitch in 1st subframe      *
     * - Find the open-loop pitch delay for last 2 subframes                *
     *----------------------------------------------------------------------*/

    A = A_t;                                                    move16 (); 
    for (i = 0; i < L_FRAME; i += L_SUBFR)
    {
        Weight_Ai (A, F_gamma1, Ap1);

        Weight_Ai (A, F_gamma2, Ap2);

        Residu (Ap1, &speech[i], &wsp[i], L_SUBFR);

        Syn_filt (Ap2, &wsp[i], &wsp[i], L_SUBFR, st->mem_w, 1);

        A += MP1;                                               move16 (); 
    }

    /* Find open loop pitch lag for first two subframes */

    T_op = Pitch_ol (wsp, PIT_MIN, PIT_MAX, L_FRAME_BY2);       move16 (); 

    lags[0] = T_op;                                             move16 (); 

    if ((st->txdtx_ctrl & TX_SP_FLAG) != 0)
    {
        /* Range for closed loop pitch search in 1st subframe */

        T0_min = sub (T_op, 3);
        if (T0_min < PIT_MIN)
        {
            T0_min = PIT_MIN;                                   move16 (); 
        }
        T0_max = add (T0_min, 6);
        if (T0_max > PIT_MAX)
        {
            T0_max = PIT_MAX;                                   move16 (); 
            T0_min = sub (T0_max, 6);
        }
    }
    /* Find open loop pitch lag for last two subframes */

    T_op = Pitch_ol (&wsp[L_FRAME_BY2], PIT_MIN, PIT_MAX, L_FRAME_BY2);

    if (st->dtx_mode)
    {
        lags[1] = T_op;                                         move16 (); 
        periodicity_update (st, lags);
    }
    /*----------------------------------------------------------------------*
     *          Loop for every subframe in the analysis frame               *
     *----------------------------------------------------------------------*
     *  To find the pitch and innovation parameters. The subframe size is   *
     *  L_SUBFR and the loop is repeated L_FRAME/L_SUBFR times.             *
     *     - find the weighted LPC coefficients                             *
     *     - find the LPC residual signal res[]                             *
     *     - compute the target signal for pitch search                     *
     *     - compute impulse response of weighted synthesis filter (h1[])   *
     *     - find the closed-loop pitch parameters                          *
     *     - encode the pitch delay                                         *
     *     - update the impulse response h1[] by including pitch            *
     *     - find target vector for codebook search                         *
     *     - codebook search                                                *
     *     - encode codebook address                                        *
     *     - VQ of pitch and codebook gains                                 *
     *     - find synthesis speech                                          *
     *     - update states of weighting filter                              *
     *----------------------------------------------------------------------*/

    /* pointer to interpolated LPC parameters          */
    A = A_t;                                                    move16 ();
    /* pointer to interpolated quantized LPC parameters */    
    Aq = Aq_t;                                                  move16 (); 

    for (i_subfr = 0; i_subfr < L_FRAME; i_subfr += L_SUBFR)
    {
        if ((st->txdtx_ctrl & TX_SP_FLAG) != 0)
        {

            /*---------------------------------------------------------------*
             * Find the weighted LPC coefficients for the weighting filter.  *
             *---------------------------------------------------------------*/

            Weight_Ai (A, F_gamma1, Ap1);

            Weight_Ai (A, F_gamma2, Ap2);

            /*---------------------------------------------------------------*
             * Compute impulse response, h1[], of weighted synthesis filter  *
             *---------------------------------------------------------------*/

            Copy (Ap1, st->ai_zero, M+1);

            Syn_filt (Aq, st->ai_zero, h1, L_SUBFR, zero, 0);

            Syn_filt (Ap2, h1, h1, L_SUBFR, zero, 0);

        }
        /*---------------------------------------------------------------*
         *          Find the target vector for pitch search:             *
         *---------------------------------------------------------------*/

        Residu (Aq, &speech[i_subfr], res2, L_SUBFR);   /* LPC residual */

        if ((st->txdtx_ctrl & TX_SP_FLAG) == 0)
        {
            /* Compute comfort noise excitation gain based on
            LP residual energy */

            st->CN_excitation_gain = compute_CN_excitation_gain (res2);
        }
        else
        {
            Copy (res2, &exc[i_subfr], L_SUBFR);

            Syn_filt (Aq, &exc[i_subfr], error, L_SUBFR, st->mem_err, 0);

            Residu (Ap1, error, xn, L_SUBFR);

            /* target signal xn[] */
            Syn_filt (Ap2, xn, xn, L_SUBFR, st->mem_w0, 0);

            /*--------------------------------------------------------------*
             *                 Closed-loop fractional pitch search          *
             *--------------------------------------------------------------*/

            /* flag for first and 3th subframe */            
            pit_flag = i_subfr;                                 move16 (); 
            /* set t0_min and t0_max for 3th subf.*/
            if (i_subfr == L_FRAME_BY2)
            {
                T0_min = sub (T_op, 3);

                if (T0_min < PIT_MIN)
                {
                    T0_min = PIT_MIN;                           move16 (); 
                }
                T0_max = add (T0_min, 6);
                if (T0_max > PIT_MAX)
                {
                    T0_max = PIT_MAX;                           move16 (); 
                    T0_min = sub (T0_max, 6);
                }
                pit_flag = 0;                                   move16 (); 
            }

            T0 = Pitch_fr6 (&exc[i_subfr], xn, h1, L_SUBFR, T0_min, T0_max,
                            pit_flag, &T0_frac);                move16 (); 

            *ana = Enc_lag6 (T0, &T0_frac, &T0_min, &T0_max, PIT_MIN,
                             PIT_MAX, pit_flag);
        }
        ana++;
        /* Incrementation of ana is done here to work also
        when no speech activity is present */

        if ((st->txdtx_ctrl & TX_SP_FLAG) != 0)
        {

            /*---------------------------------------------------------------*
             * - find unity gain pitch excitation (adaptive codebook entry)  *
             *   with fractional interpolation.                              *
             * - find filtered pitch exc. y1[]=exc[] convolved with h1[]     *
             * - compute pitch gain and limit between 0 and 1.2              *
             * - update target vector for codebook search                    *
             * - find LTP residual.                                          *
             *---------------------------------------------------------------*/

            Pred_lt_6 (&exc[i_subfr], T0, T0_frac, L_SUBFR);

            Convolve (&exc[i_subfr], h1, y1, L_SUBFR);

            gain_pit = G_pitch (xn, y1, L_SUBFR);      move16 (); 

            *ana = q_gain_pitch (&gain_pit);                    move16 (); 

        }
        else
        {
            gain_pit = 0;                                       move16 (); 
        }

        ana++;                  /* Incrementation of ana is done here to work
                                   also when no speech activity is present */

        if ((st->txdtx_ctrl & TX_SP_FLAG) != 0)
        {
            /* xn2[i]   = xn[i] - y1[i] * gain_pit  */
            /* res2[i] -= exc[i+i_subfr] * gain_pit */

            for (i = 0; i < L_SUBFR; i++)
            {
                L_temp = L_mult (y1[i], gain_pit);
                L_temp = L_shl (L_temp, 3);
                xn2[i] = sub (xn[i], extract_h (L_temp));       move16 (); 

                L_temp = L_mult (exc[i + i_subfr], gain_pit);
                L_temp = L_shl (L_temp, 3);
                res2[i] = sub (res2[i], extract_h (L_temp));    move16 (); 
            }

            /*-------------------------------------------------------------*
             * - include pitch contribution into impulse resp. h1[]        *
             *-------------------------------------------------------------*/

            /* pit_sharp = gain_pit;                   */
            /* if (pit_sharp > 1.0) pit_sharp = 1.0;   */

            pit_sharp = shl (gain_pit, 3);

            for (i = T0; i < L_SUBFR; i++)
            {
                temp = mult (h1[i - T0], pit_sharp);
                h1[i] = add (h1[i], temp);                      move16 (); 
            }

            /*--------------------------------------------------------------*
             * - Innovative codebook search (find index and gain)           *
             *--------------------------------------------------------------*/

            code_10i40_35bits (xn2, res2, h1, code, y2, ana);
        }
        else
        {
            build_CN_code (code, &st->L_pn_seed_tx);
        }
        ana += 10;                                              move16 (); 

        if ((st->txdtx_ctrl & TX_SP_FLAG) != 0)
        {

            /*-------------------------------------------------------*
             * - Add the pitch contribution to code[].               *
             *-------------------------------------------------------*/

            for (i = T0; i < L_SUBFR; i++)
            {
                temp = mult (code[i - T0], pit_sharp);
                code[i] = add (code[i], temp);                  move16 (); 
            }

            /*------------------------------------------------------*
             * - Quantization of fixed codebook gain.               *
             *------------------------------------------------------*/

            gain_code = G_code (xn2, y2);                       move16 (); 
        }
        *ana++ = q_gain_code (st, code, L_SUBFR, &gain_code, st->txdtx_ctrl,
                              i_subfr);

        /*------------------------------------------------------*
         * - Find the total excitation                          *
         * - find synthesis speech corresponding to exc[]       *
         * - update filter memories for finding the target      *
         *   vector in the next subframe                        *
         *   (update mem_err[] and mem_w0[])                    *
         *------------------------------------------------------*/

        for (i = 0; i < L_SUBFR; i++)
        {
            /* exc[i] = gain_pit*exc[i] + gain_code*code[i]; */

            L_temp = L_mult (exc[i + i_subfr], gain_pit);
            L_temp = L_mac (L_temp, code[i], gain_code);
            L_temp = L_shl (L_temp, 3);
            exc[i + i_subfr] = round (L_temp);                  move16 (); 
        }

        Syn_filt (Aq, &exc[i_subfr], &synth[i_subfr], L_SUBFR, st->mem_syn, 1);

        if ((st->txdtx_ctrl & TX_SP_FLAG) != 0)
        {

            for (i = L_SUBFR - M, j = 0; i < L_SUBFR; i++, j++)
            {
                st->mem_err[j] = sub (speech[i_subfr + i], synth[i_subfr + i]);
                temp = extract_h (L_shl (L_mult (y1[i], gain_pit), 3));
                k = extract_h (L_shl (L_mult (y2[i], gain_code), 5));
                st->mem_w0[j] = sub (xn[i], add (temp, k));
            }
        }
        else
        {
            Set_zero (st->mem_err, M);
            Set_zero (st->mem_w0, M);
        }

        /* interpolated LPC parameters for next subframe */
        A += MP1;                                               move16 (); 
        Aq += MP1;                                              move16 (); 
    }

    /*--------------------------------------------------*
     * Update signal for next frame.                    *
     * -> shift to the left by L_FRAME:                 *
     *     speech[], wsp[] and  exc[]                   *
     *--------------------------------------------------*/

    Copy (&st->old_speech[L_FRAME], &st->old_speech[0], L_TOTAL - L_FRAME);

    Copy (&st->old_wsp[L_FRAME], &st->old_wsp[0], PIT_MAX);

    Copy (&st->old_exc[L_FRAME], &st->old_exc[0], PIT_MAX + L_INTERPOL);

    return;
}