media/libaom/src/av1/encoder/ml.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

#ifndef AOM_AV1_ENCODER_ML_H_
#define AOM_AV1_ENCODER_ML_H_

#ifdef __cplusplus
extern "C" {
#endif

#include "config/av1_rtcd.h"

#define NN_MAX_HIDDEN_LAYERS 10
#define NN_MAX_NODES_PER_LAYER 128

struct NN_CONFIG {
  int num_inputs;         // Number of input nodes, i.e. features.
  int num_outputs;        // Number of output nodes.
  int num_hidden_layers;  // Number of hidden layers, maximum 10.
  // Number of nodes for each hidden layer.
  int num_hidden_nodes[NN_MAX_HIDDEN_LAYERS];
  // Weight parameters, indexed by layer.
  const float *weights[NN_MAX_HIDDEN_LAYERS + 1];
  // Bias parameters, indexed by layer.
  const float *bias[NN_MAX_HIDDEN_LAYERS + 1];
};
// Typedef from struct NN_CONFIG to NN_CONFIG is in rtcd_defs

#if CONFIG_NN_V2
// Fully-connectedly layer configuration
struct FC_LAYER {
  const int num_inputs;   // Number of input nodes, i.e. features.
  const int num_outputs;  // Number of output nodes.

  float *weights;               // Weight parameters.
  float *bias;                  // Bias parameters.
  const ACTIVATION activation;  // Activation function.

  float *output;  // The output array.
  float *dY;      // Gradient of outputs
  float *dW;      // Gradient of weights.
  float *db;      // Gradient of bias
};

// NN configure structure V2
struct NN_CONFIG_V2 {
  const int num_hidden_layers;  // Number of hidden layers, max = 10.
  FC_LAYER layer[NN_MAX_HIDDEN_LAYERS + 1];  // The layer array
  const int num_logits;                      // Number of output nodes.
  float *logits;    // Raw prediction (same as output of final layer)
  const LOSS loss;  // Loss function
};

// Calculate prediction based on the given input features and neural net config.
// Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden
// layer.
void av1_nn_predict_v2(const float *features, NN_CONFIG_V2 *nn_config,
                       int reduce_prec, float *output);
#endif  // CONFIG_NN_V2

// Applies the softmax normalization function to the input
// to get a valid probability distribution in the output:
// output[i] = exp(input[i]) / sum_{k \in [0,n)}(exp(input[k]))
void av1_nn_softmax(const float *input, float *output, int n);

// Applies a precision reduction to output of av1_nn_predict to prevent
// mismatches between C and SIMD implementations.
void av1_nn_output_prec_reduce(float *const output, int num_output);

#ifdef __cplusplus
}  // extern "C"
#endif

#endif  // AOM_AV1_ENCODER_ML_H_