diff options
Diffstat (limited to 'libs/libopus/src/mlp.c')
-rw-r--r-- | libs/libopus/src/mlp.c | 155 |
1 files changed, 77 insertions, 78 deletions
diff --git a/libs/libopus/src/mlp.c b/libs/libopus/src/mlp.c index ff9e50df4..964c6a98f 100644 --- a/libs/libopus/src/mlp.c +++ b/libs/libopus/src/mlp.c @@ -1,5 +1,5 @@ /* Copyright (c) 2008-2011 Octasic Inc. - Written by Jean-Marc Valin */ + 2012-2017 Jean-Marc Valin */ /* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -29,42 +29,13 @@ #include "config.h" #endif +#include <math.h> #include "opus_types.h" #include "opus_defines.h" - -#include <math.h> -#include "mlp.h" #include "arch.h" #include "tansig_table.h" -#define MAX_NEURONS 100 +#include "mlp.h" -#if 0 -static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ -{ - int i; - opus_val16 xx; /* Q11 */ - /*double x, y;*/ - opus_val16 dy, yy; /* Q14 */ - /*x = 1.9073e-06*_x;*/ - if (_x>=QCONST32(8,19)) - return QCONST32(1.,14); - if (_x<=-QCONST32(8,19)) - return -QCONST32(1.,14); - xx = EXTRACT16(SHR32(_x, 8)); - /*i = lrint(25*x);*/ - i = SHR32(ADD32(1024,MULT16_16(25, xx)),11); - /*x -= .04*i;*/ - xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8)); - /*x = xx*(1./2048);*/ - /*y = tansig_table[250+i];*/ - yy = tansig_table[250+i]; - /*y = yy*(1./16384);*/ - dy = 16384-MULT16_16_Q14(yy,yy); - yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx))); - return yy; -} -#else -/*extern const float tansig_table[501];*/ static OPUS_INLINE float tansig_approx(float x) { int i; @@ -92,54 +63,82 @@ static OPUS_INLINE float tansig_approx(float x) y = y + x*dy*(1 - y*x); return sign*y; } -#endif -#if 0 -void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out) +static OPUS_INLINE float sigmoid_approx(float x) { - int j; - opus_val16 hidden[MAX_NEURONS]; - const opus_val16 *W = m->weights; - /* Copy to tmp_in */ - for (j=0;j<m->topo[1];j++) - { - int k; - opus_val32 sum = SHL32(EXTEND32(*W++),8); - for (k=0;k<m->topo[0];k++) - sum = MAC16_16(sum, in[k],*W++); - hidden[j] = tansig_approx(sum); - } - for (j=0;j<m->topo[2];j++) - { - int k; - opus_val32 sum = SHL32(EXTEND32(*W++),14); - for (k=0;k<m->topo[1];k++) - sum = MAC16_16(sum, hidden[k], *W++); - out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17))); - } + return .5f + .5f*tansig_approx(.5f*x); +} + +static void gemm_accum(float *out, const opus_int8 *weights, int rows, int cols, int col_stride, const float *x) +{ + int i, j; + for (i=0;i<rows;i++) + { + for (j=0;j<cols;j++) + out[i] += weights[j*col_stride + i]*x[j]; + } } -#else -void mlp_process(const MLP *m, const float *in, float *out) + +void compute_dense(const DenseLayer *layer, float *output, const float *input) { - int j; - float hidden[MAX_NEURONS]; - const float *W = m->weights; - /* Copy to tmp_in */ - for (j=0;j<m->topo[1];j++) - { - int k; - float sum = *W++; - for (k=0;k<m->topo[0];k++) - sum = sum + in[k]**W++; - hidden[j] = tansig_approx(sum); - } - for (j=0;j<m->topo[2];j++) - { - int k; - float sum = *W++; - for (k=0;k<m->topo[1];k++) - sum = sum + hidden[k]**W++; - out[j] = tansig_approx(sum); - } + int i; + int N, M; + int stride; + M = layer->nb_inputs; + N = layer->nb_neurons; + stride = N; + for (i=0;i<N;i++) + output[i] = layer->bias[i]; + gemm_accum(output, layer->input_weights, N, M, stride, input); + for (i=0;i<N;i++) + output[i] *= WEIGHTS_SCALE; + if (layer->sigmoid) { + for (i=0;i<N;i++) + output[i] = sigmoid_approx(output[i]); + } else { + for (i=0;i<N;i++) + output[i] = tansig_approx(output[i]); + } } -#endif + +void compute_gru(const GRULayer *gru, float *state, const float *input) +{ + int i; + int N, M; + int stride; + float tmp[MAX_NEURONS]; + float z[MAX_NEURONS]; + float r[MAX_NEURONS]; + float h[MAX_NEURONS]; + M = gru->nb_inputs; + N = gru->nb_neurons; + stride = 3*N; + /* Compute update gate. */ + for (i=0;i<N;i++) + z[i] = gru->bias[i]; + gemm_accum(z, gru->input_weights, N, M, stride, input); + gemm_accum(z, gru->recurrent_weights, N, N, stride, state); + for (i=0;i<N;i++) + z[i] = sigmoid_approx(WEIGHTS_SCALE*z[i]); + + /* Compute reset gate. */ + for (i=0;i<N;i++) + r[i] = gru->bias[N + i]; + gemm_accum(r, &gru->input_weights[N], N, M, stride, input); + gemm_accum(r, &gru->recurrent_weights[N], N, N, stride, state); + for (i=0;i<N;i++) + r[i] = sigmoid_approx(WEIGHTS_SCALE*r[i]); + + /* Compute output. */ + for (i=0;i<N;i++) + h[i] = gru->bias[2*N + i]; + for (i=0;i<N;i++) + tmp[i] = state[i] * r[i]; + gemm_accum(h, &gru->input_weights[2*N], N, M, stride, input); + gemm_accum(h, &gru->recurrent_weights[2*N], N, N, stride, tmp); + for (i=0;i<N;i++) + h[i] = z[i]*state[i] + (1-z[i])*tansig_approx(WEIGHTS_SCALE*h[i]); + for (i=0;i<N;i++) + state[i] = h[i]; +} + |