HPM SDK
HPMicro Software Development Kit
NN Convolution Functions

The convolution functions transform the input matrix into a column vector with im2col, and then use matrix-matrix multiplication to get the convolution result. More...

Functions

static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
 This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with shift-based quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias (const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
 This function performs signed 8-bit integer convolution for RGB images with shift-based quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast (const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
 This function performs fast signed 8-bit integer convolution for RGB images with shift-based quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias (const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
 This function performs signed 8-bit integer convolution with shift-based quantization on the outputs. More...
 
static void hpm_nn_conv_HWC_s8_s8_s8_sft_bias_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
 This function performs signed 8-bit integer convolution in any x and y dimensions with shift-based quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast (const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
 This function performs fast signed 8-bit integer convolution with shift-based quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
 This function performs fast signed 8-bit integer convolution in any x and y dimensions with shift-based quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias (const q15_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q15_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q15_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
 This function performs signed 16-bit integer convolution with shift-based quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast (const q15_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q15_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q15_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
 This function performs fast signed 16-bit integer convolution with shift-based quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any (const q15_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q15_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q15_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
 This function performs fast signed 16-bit integer convolution in any x and y dimensions with shift-based quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias (const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
 This function performs signed 8-bit integer depthwise convolution with shift-based quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
 This function performs signed 8-bit integer depthwise convolution in any x and y dimensions with shift-based quantization on the outputs. More...
 
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.. More...
 
static int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs 1x1 kernels convolution for signed 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs/outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs 1x1 kernels convolution for signed 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs/outputs in any x and y dimensions with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast (const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
 This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast (const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
 This function performs fast convolution on RGB images for signed 8-bit integer inputs and signed 16-bit integer outputs with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast (const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
 This function performs fast convolution on RGB images for unsigned 8-bit integer inputs/outputs with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast (const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
 This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast (const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
 This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 16-bit integer outputs with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_fast (const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
 This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_fast (const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
 This function performs fast convolution on RGB images for signed 8-bit integer inputs and signed 16-bit integer outputs with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_fast (const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
 This function performs fast convolution on RGB images for unsigned 8-bit integer inputs/outputs with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_fast (const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
 This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 8-bit integer outputs with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_fast (const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
 This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 16-bit integer outputs with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast (const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs fast convolution for signed 8-bit integer inputs/outputs with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast (const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer outputs with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast (const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs fast convolution for unsigned 8-bit integer inputs/outputs with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast (const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast (const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast (const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs fast convolution for signed 8-bit integer inputs/outputs with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast (const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer outputs with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast (const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs fast convolution for unsigned 8-bit integer inputs/outputs with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast (const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast (const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs fast convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs fast convolution for unsigned 8-bit integer inputs/outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs fast convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs fast convolution for unsigned 8-bit integer inputs/outputs in any x and y dimensions with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias (const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs depthwise convolution for signed 8-bit integer inputs/outputs with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias (const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit integer outputs with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias (const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias (const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias (const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym (const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs depthwise convolution for signed 8-bit integer inputs/outputs with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym (const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit integer outputs with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym (const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym (const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs, and with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym (const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
 This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs depthwise convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs depthwise convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y dimensions with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_any (const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint16_t in_tensor_group, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *tmp_buf)
 This function performs 1x1 kernels convolution for signed 8-bit interger inputs/outputs in any x and y dimensions with asymmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size (const uint16_t in_tensor_ch)
 This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any. More...
 
static int hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_ch, const uint16_t in_tensor_group, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t pad_x, const uint16_t stride_x, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t out_tensor_dim_x, q15_t *in_tmp_buf)
 This function performs 1xn kernels convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with asymmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size (const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y)
 This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any. More...
 
static int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint16_t in_tensor_group, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
 This function performs convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with asymmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size (const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y)
 This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any. More...
 
static int32_t hpm_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any (const int8_t *in_tensor, const int32_t in_tensor_dim_x, const int32_t in_tensor_dim_y, const int32_t in_tensor_ch, const int8_t *ker_weight, const int32_t out_tensor_ch, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, int8_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_tensor_dim_x, const int32_t out_tensor_dim_y, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const int32_t dilation_x, const int32_t dilation_y, int16_t *tmp_buf)
 This function performs depthwise 3x3 kernels convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with asymmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ch_mult, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t dilation_x, const uint16_t dilation_y, q15_t *tmp_buf)
 This function performs depthwise convolution for signed 8-bit interger inputs/outputs in any x and y dimensions with asymmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any (const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t dilation_x, const uint16_t dilation_y, q15_t *in_tmp_buf)
 This function performs fast depthwise convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with asymmetric quantization on the outputs. More...
 
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size (const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y)
 This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any. More...
 
static int32_t hpm_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any (const uint8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint8_t *ker_weight, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const int16_t ch_mult, const int16_t pad_x, const int16_t pad_y, const int16_t stride_x, const int16_t stride_y, const int16_t dilation_x, const int16_t dilation_y, const int32_t *bias, const int32_t in_offset, const int32_t ker_offset, const int32_t out_offset, uint8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, const int32_t act_min, const int32_t act_max, const int32_t out_shift, const int32_t out_scale)
 This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y dimensions with asymmetric quantization on the outputs. More...
 

Detailed Description

The convolution functions transform the input matrix into a column vector with im2col, and then use matrix-matrix multiplication to get the convolution result.

Function Documentation

◆ hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any()

static int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs 1x1 kernels convolution for signed 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints (see the Note below for details).
Note
  • The input constraints of this function are:
    • in_tensor_ch is a multiple of 4
    • out_tensor_ch is a multiple of 2
    • ker_dim_x is 1
    • ker_dim_y is 1
    • pad_x is 0
    • pad_y is 0
    • stride_x is 1
    • stride_y is 1
  • The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any()

static int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs 1x1 kernels convolution for signed 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to 2 * in_tensor_ch * ker_dim_x * ker_dim_y.
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints (see the Note below for details).
Note
  • The input constraints of this function are:
    • in_tensor_ch is a multiple of 4
    • out_tensor_ch is a multiple of 2
    • ker_dim_x is 1
    • ker_dim_y is 1
    • pad_x is 0
    • pad_y is 0
    • stride_x is 1
    • stride_y is 1
  • The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any()

static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const uint16_t  in_tensor_group,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const int32_t *  bias,
q7_t *  out_tensor,
const int32_t *  out_shift,
const int32_t *  out_scale,
const int32_t  out_offset,
const int32_t  in_offset,
const int32_t  act_min,
const int32_t  act_max,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs 1x1 kernels convolution for signed 8-bit interger inputs/outputs in any x and y dimensions with asymmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]in_tensor_groupnumber of input tensor groups
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[out]out_tensorpointer of the output tensor
[in]out_shiftpointer of the shift vector for output tensor
[in]out_scalepointer of the scaling vector for output tensor
[in]out_offsetvalue of offset for the output tensor. It should be in the range of -128 to 127.
[in]in_offsetvalue of offset for the input tensor It should be in the range of -127 to 128.
[in]act_minminimum value to clip out the ouput tensor. It should be in the range of -128 to 127.
[in]act_maxmaximum value to clip out the ouput tensor. It should be in the range of -128 to 127.
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]tmp_bufdummy
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints (see the Note below for details).
Note
  • The input constraints of this function are:
    • in_tensor_ch is a multiple of 4
    • pad_x is 0
    • pad_y is 0
    • stride_x is 1
    • stride_y is 1

◆ hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size()

static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size ( const uint16_t  in_tensor_ch)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any.

Parameters
[in]in_tensor_chnumber of input tensor channels
Returns
This function returns the needed size by the temporary buffer.

◆ hpm_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any()

static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q7_t *  bias,
const uint16_t  bias_lshift,
const uint16_t  out_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf,
q7_t *  tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with shift-based quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]bias_lshiftleft shift amount for the bias
[in]out_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
[in]tmp_bufdummy
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints (see the Note below for details).
Note
  • The input constraints of this function are:
    • in_tensor_ch is a multiple of 4
    • out_tensor_ch is a multiple of 2
    • ker_dim_x is 1
    • ker_dim_y is 1
    • pad_x is 0
    • pad_y is 0
    • stride_x is 1
    • stride_y is 1

Example:

//Convolve a 160x120x20 input tensor with a 1x1 kernel and generate a
//160x120x8 output tensor. Let both dimensions padding be 0 and their
//stride be 1.
#define IN_X 160
#define IN_Y 120
#define IN_CH 20
#define OUT_CH 8
#define KER_DIM_X 1
#define KER_DIM_Y 1
#define PAD_X 0
#define PAD_Y 0
#define STRIDE_X 1
#define STRIDE_Y 1
#define BIAS_LSHIFT 6 //Scale up the bias by 2^6
#define OUT_RSHIFT 9 //Scale down the output tensor by 1/2^9
#define OUT_X 160
#define OUT_Y 120
q7_t in_data[IN_CH * IN_X * IN_Y] = {...};
q7_t weight[IN_CH * KER_DIM_X * KER_DIM_Y * OUT_CH] = {...};
q7_t bias[OUT_CH] = {...};
q15_t in_tmp_buf[2 * IN_CH * KER_DIM_X * KER_DIM_Y] = {0};
q7_t out_data[OUT_CH * OUT_X * OUT_Y];
riscv_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(in_data, IN_X, IN_Y ,
IN_CH, weight, OUT_CH, KER_DIM_X, KER_DIM_Y, PAD_X, PAD_Y, STRIDE_X,
STRIDE_Y, bias, BIAS_LSHIFT, OUT_RSHIFT, out_data, OUT_X, OUT_Y,
in_tmp_buf, NULL);
static int32_t in_tmp_buf
Definition: hpm_math.h:14080
static int32_t bias
Definition: hpm_math.h:14079

◆ hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any()

static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs..

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints (see the Note below for details).
Note
  • The input constraints of this function are:
    • in_tensor_ch is a multiple of 4
    • out_tensor_ch is a multiple of 2
    • ker_dim_x is 1
    • ker_dim_y is 1
    • pad_x is 0
    • pad_y is 0
    • stride_x is 1
    • stride_y is 1
  • The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any()

static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to 2 * in_tensor_ch * ker_dim_x * ker_dim_y.
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints (see the Note below for details).
Note
  • The input constraints of this function are:
    • in_tensor_ch is a multiple of 4
    • out_tensor_ch is a multiple of 2
    • ker_dim_x is 1
    • ker_dim_y is 1
    • pad_x is 0
    • pad_y is 0
    • stride_x is 1
    • stride_y is 1
  • The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any()

static int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints (see the Note below for details).
Note
  • The input constraints of this function are:
    • in_tensor_ch is a multiple of 4
    • out_tensor_ch is a multiple of 2
    • ker_dim_x is 1
    • ker_dim_y is 1
    • pad_x is 0
    • pad_y is 0
    • stride_x is 1
    • stride_y is 1
  • The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any()

static int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints (see the Note below for details).
Note
  • The input constraints of this function are:
    • in_tensor_ch is a multiple of 4
    • out_tensor_ch is a multiple of 2
    • ker_dim_x is 1
    • ker_dim_y is 1
    • pad_x is 0
    • pad_y is 0
    • stride_x is 1
    • stride_y is 1
  • The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any()

static int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints (see the Note below for details).
Note
  • The input constraints of this function are:
    • in_tensor_ch is a multiple of 4
    • out_tensor_ch is a multiple of 2
    • ker_dim_x is 1
    • ker_dim_y is 1
    • pad_x is 0
    • pad_y is 0
    • stride_x is 1
    • stride_y is 1
  • The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any()

static int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to 2 * in_tensor_ch * ker_dim_x * ker_dim_y.
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints (see the Note below for details).
Note
  • The input constraints of this function are:
    • in_tensor_ch is a multiple of 4
    • out_tensor_ch is a multiple of 2
    • ker_dim_x is 1
    • ker_dim_y is 1
    • pad_x is 0
    • pad_y is 0
    • stride_x is 1
    • stride_y is 1
  • The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any()

static int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
u8_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs/outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints (see the Note below for details).
Note
  • The input constraints of this function are:
    • in_tensor_ch is a multiple of 4
    • out_tensor_ch is a multiple of 2
    • ker_dim_x is 1
    • ker_dim_y is 1
    • pad_x is 0
    • pad_y is 0
    • stride_x is 1
    • stride_y is 1
  • The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any()

static int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
u8_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs/outputs in any x and y dimensions with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints (see the Note below for details).
Note
  • The input constraints of this function are:
    • in_tensor_ch is a multiple of 4
    • out_tensor_ch is a multiple of 2
    • ker_dim_x is 1
    • ker_dim_y is 1
    • pad_x is 0
    • pad_y is 0
    • stride_x is 1
    • stride_y is 1
  • The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any()

static int hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_ch,
const uint16_t  in_tensor_group,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  pad_x,
const uint16_t  stride_x,
const int32_t *  bias,
q7_t *  out_tensor,
const int32_t *  out_shift,
const int32_t *  out_scale,
const int32_t  out_offset,
const int32_t  in_offset,
const int32_t  act_min,
const int32_t  act_max,
const uint16_t  out_tensor_dim_x,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs 1xn kernels convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with asymmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]in_tensor_groupdummy
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]stride_xconvolution stride in the x dimension
[in]biaspointer of the bias vector
[out]out_tensorpointer of the output tensor
[in]out_shiftpointer of the shift vector for output tensor
[in]out_scalepointer of the scaling vector for output tensor
[in]out_offsetvalue of offset for the output tensor. It should be in the range of -128 to 127.
[in]in_offsetvalue of offset for the input tensor It should be in the range of -127 to 128.
[in]act_minminimum value to clip out the ouput tensor. It should be in the range of -128 to 127.
[in]act_maxmaximum value to clip out the ouput tensor. It should be in the range of -128 to 127.
[in]out_tensor_dim_xx dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its needed size could be get by calling riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size.
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraint that out_tensor_dim_x is a multiple of 4.

◆ hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size()

static int32_t hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size ( const uint16_t  in_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any.

Parameters
[in]in_tensor_chnumber of input tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel. It is always 1 here.
Returns
This function returns the needed size by the temporary buffer.

◆ hpm_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any()

static int32_t hpm_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any ( const int8_t *  in_tensor,
const int32_t  in_tensor_dim_x,
const int32_t  in_tensor_dim_y,
const int32_t  in_tensor_ch,
const int8_t *  ker_weight,
const int32_t  out_tensor_ch,
const int32_t  pad_x,
const int32_t  pad_y,
const int32_t  stride_x,
const int32_t  stride_y,
const int32_t *  bias,
int8_t *  out_tensor,
const int32_t *  out_shift,
const int32_t *  out_scale,
const int32_t  out_tensor_dim_x,
const int32_t  out_tensor_dim_y,
const int32_t  out_offset,
const int32_t  in_offset,
const int32_t  act_min,
const int32_t  act_max,
const int32_t  dilation_x,
const int32_t  dilation_y,
int16_t *  tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise 3x3 kernels convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with asymmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[out]out_tensorpointer of the output tensor
[in]out_shiftpointer of the shift vector for output tensor
[in]out_scalepointer of the scaling vector for output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]out_offsetvalue of offset for the output tensor. It should be in the range of -128 to 127.
[in]in_offsetvalue of offset for the input tensor It should be in the range of -127 to 128.
[in]act_minminimum value to clip out the ouput tensor. It should be in the range of -128 to 127.
[in]act_maxmaximum value to clip out the ouput tensor. It should be in the range of -128 to 127.
[in]dilation_xdummy
[in]dilation_ydummy
[in]tmp_bufdummy
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch has to be equal to out_tensor_ch and pad_x is less than 1.

◆ hpm_nn_conv_dw_HWC_s8_s16_s8_sym()

static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit integer outputs with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch has to be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_s8_s16_s8_sym_any()

static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim_x * ker_dim_y + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch must be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias()

static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit integer outputs with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch has to be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any()

static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim_x * ker_dim_y + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch must be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any()

static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ch_mult,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const int32_t *  bias,
q7_t *  out_tensor,
const int32_t *  out_shift,
const int32_t *  out_scale,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
const int32_t  out_offset,
const int32_t  in_offset,
const int32_t  act_min,
const int32_t  act_max,
const uint16_t  dilation_x,
const uint16_t  dilation_y,
q15_t *  tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for signed 8-bit interger inputs/outputs in any x and y dimensions with asymmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels. out_tensor_ch is equal to ch_mult * in_tensor_ch.
[in]ch_multmultiplier of input tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[out]out_tensorpointer of the output tensor
[in]out_shiftpointer of the shift vector for output tensor
[in]out_scalepointer of the scaling vector for output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]out_offsetvalue of offset for the output tensor. It should be in the range of -128 to 127.
[in]in_offsetvalue of offset for the input tensor It should be in the range of -127 to 128.
[in]act_minminimum value to clip out the ouput tensor. It should be in the range of -128 to 127.
[in]act_maxmaximum value to clip out the ouput tensor. It should be in the range of -128 to 127.
[in]dilation_xdummy
[in]dilation_ydummy
[in]tmp_bufdummy
Returns
This function only returns 0.

Example:

to be modified...

◆ hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any()

static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const int32_t *  bias,
q7_t *  out_tensor,
const int32_t *  out_shift,
const int32_t *  out_scale,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
const int32_t  out_offset,
const int32_t  in_offset,
const int32_t  act_min,
const int32_t  act_max,
const uint16_t  dilation_x,
const uint16_t  dilation_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast depthwise convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with asymmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[out]out_tensorpointer of the output tensor
[in]out_shiftpointer of the shift vector for output tensor
[in]out_scalepointer of the scaling vector for output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]out_offsetvalue of offset for the output tensor. It should be in the range of -128 to 127.
[in]in_offsetvalue of offset for the input tensor It should be in the range of -127 to 128.
[in]act_minminimum value to clip out the ouput tensor. It should be in the range of -128 to 127.
[in]act_maxmaximum value to clip out the ouput tensor. It should be in the range of -128 to 127.
[in]dilation_xdummy
[in]dilation_ydummy
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its needed size could be get by calling riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size.
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraint that in_tensor_ch has to be equal to out_tensor_ch.

◆ hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size()

static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size ( const uint16_t  in_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any.

Parameters
[in]in_tensor_chnumber of input tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
Returns
This function returns the needed size by the temporary buffer.

◆ hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias()

static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q7_t *  bias,
const uint16_t  bias_lshift,
const uint16_t  out_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q7_t *  tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs signed 8-bit integer depthwise convolution with shift-based quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]bias_lshiftleft shift amount for the bias
[in]out_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
[in]tmp_bufdummy
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch has to be equal to out_tensor_ch.

Example:

//Convolve a 11x11x28 input tensor with a 3x3 kernel and generate a 9x9x48
//output tensor. Let both dimensions padding be 0 and their stride be 1.
#define IN_DIM 11
#define IN_CH 28
#define OUT_CH 48
#define KER_DIM 3
#define PAD 0
#define STRIDE 1
#define OUT_RSHIFT 7
#define OUT_DIM 9
q7_t in_data[IN_CH * IN_DIM * IN_DIM] = {...};
q7_t weight[IN_CH * KER_DIM * KER_DIM * IN_CH] = {...};
q7_t bias[IN_CH] = {...};
q15_t in_tmp_buf[2 * OUT_CH * KER_DIM * KER_DIM] = {0};
q7_t out_data[OUT_CH * OUT_DIM * OUT_DIM];
riscv_nn_conv_dw_HWC_s8_s8_s8_sft_bias(in_data, IN_DIM, IN_CH, weight,
OUT_CH, KER_DIM, PAD, STRIDE, bias, 0, OUT_RSHIFT, out_data, OUT_DIM,
in_tmp_buf, NULL);

◆ hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any()

static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q7_t *  bias,
const uint16_t  bias_lshift,
const uint16_t  out_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf,
q7_t *  tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs signed 8-bit integer depthwise convolution in any x and y dimensions with shift-based quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]bias_lshiftleft shift amount for the bias
[in]out_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim_x * ker_dim_y + 1) / 2".
[in]tmp_bufdummy
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch must be equal to out_tensor_ch.

Example:

//Perform a depth-wise convolution for a 79x59x12 input tensor with a 3x3
//kernel and generate a 77x57x12 output tensor. Let both dimensions padding
//be 0 and their stride be 1.
#define IN_DIM_X 79
#define IN_DIM_Y 59
#define IN_CH 12
#define OUT_CH 12
#define KER_DIM 3
#define PAD 0
#define STRIDE 1
#define BIAS_SHIFT 0
#define OUT_RSHIFT 7
#define OUT_DIM_X 77
#define OUT_DIM_Y 57
q7_t in_data[IN_CH * IN_DIM_X * IN_DIM_Y] = {...};
q7_t weight[IN_CH * KER_DIM * KER_DIM * IN_CH] = {...};
q7_t bias[IN_CH] = {...};
q15_t in_tmp_buf[2 * OUT_CH * KER_DIM * KER_DIM] = {0};
q7_t out_data[OUT_CH * OUT_DIM_X * OUT_DIM_Y];
riscv_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(in_data, IN_DIM_X, IN_DIM_Y,
IN_CH, weight, OUT_CH, KER_DIM, KER_DIM, PAD, PAD, STRIDE, STRIDE, bias,
BIAS_SHIFT, OUT_RSHIFT, out_data, OUT_DIM_X, OUT_DIM_Y, in_tmp_buf,
NULL);

◆ hpm_nn_conv_dw_HWC_s8_s8_s8_sym()

static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for signed 8-bit integer inputs/outputs with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch has to be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_s8_s8_s8_sym_any()

static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim_x * ker_dim_y + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch must be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias()

static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for signed 8-bit integer inputs/outputs with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch has to be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any()

static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim_x * ker_dim_y + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch must be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_u8_s16_s8_sym()

static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch has to be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_u8_s16_s8_sym_any()

static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim_x * ker_dim_y + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch must be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias()

static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch has to be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any()

static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim_x * ker_dim_y + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch must be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_u8_s8_s8_sym()

static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs, and with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch has to be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_u8_s8_s8_sym_any()

static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim_x * ker_dim_y + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch must be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias()

static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch has to be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any()

static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim_x * ker_dim_y + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch must be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_u8_u8_s8_sym()

static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
u8_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to (in_tensor_ch * ker_dim * ker_dim + 1) / 2.
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch has to be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_u8_u8_s8_sym_any()

static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
u8_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y dimensions with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim_x * ker_dim_y + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch must be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias()

static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
u8_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim * ker_dim + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch has to be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any()

static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
u8_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "(in_tensor_ch * ker_dim_x * ker_dim_y + 1) / 2".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch must be equal to out_tensor_ch.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any()

static int32_t hpm_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any ( const uint8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const uint8_t *  ker_weight,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const int16_t  ch_mult,
const int16_t  pad_x,
const int16_t  pad_y,
const int16_t  stride_x,
const int16_t  stride_y,
const int16_t  dilation_x,
const int16_t  dilation_y,
const int32_t *  bias,
const int32_t  in_offset,
const int32_t  ker_offset,
const int32_t  out_offset,
uint8_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
const int32_t  act_min,
const int32_t  act_max,
const int32_t  out_shift,
const int32_t  out_scale 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y dimensions with asymmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]ch_multmultiplier of input tensor channels
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]dilation_xdummy
[in]dilation_ydummy
[in]biaspointer of the bias vector
[in]in_offsetvalue of offset for the input tensor It should be in the range of -255 to 0.
[in]ker_offsetvalue of offset for the filter kernel It should be in the range of -255 to 0.
[in]out_offsetvalue of offset for the output tensor. It should be in the range of 0 to 255.
[in]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]act_minminimum value to clip out the ouput tensor. It should be in the range of 0 to 255.
[in]act_maxmaximum value to clip out the ouput tensor. It should be in the range of 0 to 255.
[in]out_shiftshift amount for the output tensor
[in]out_scalevalue of sacling for the output tensor
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraint that both ch_mult and ker_dim_x are multiple of 2.

◆ hpm_nn_conv_HWC_s16_s16_s16_sft_bias()

static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias ( const q15_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q15_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q15_t *  bias,
const uint16_t  bias_lshift,
const uint16_t  out_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q7_t *  tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs signed 16-bit integer convolution with shift-based quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]bias_lshiftleft shift amount for the bias
[in]out_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "in_tensor_ch * ker_dim * ker_dim".
[in]tmp_bufdummy
Returns
This function only returns 0.

Example:

//Convolve a 28x28x1 input tensor with a 5x5 kernel and generate a 24x24x20
//output tensor. Let both dimensions padding be 0 and their stride be 1.
#define IN_DIM 28
#define IN_CH 1
#define KER_DIM 5
#define PAD 0
#define STRIDE 1
#define BIAS_LSHIFT 6
#define OUT_RSHIFT 10
#define OUT_CH 20
#define OUT_DIM 24
q15_t input_data[IN_CH * IN_DIM * IN_DIM] = {...};
q15_t weight[IN_CH * KER_DIM * KER_DIM * OUT_CH] = {...};
q15_t bias[OUT_CH] = {...};
q15_t in_tmp_buf[IN_CH * KER_DIM * KER_DIM] = {0};
q15_t out_data[OUT_CH * OUT_DIM * OUT_DIM];
riscv_nn_conv_HWC_s16_s16_s16_sft_bias(input_data, IN_DIM, IN_CH, weight,
OUT_CH, KER_DIM, PAD, STRIDE, bias, BIAS_LSHIFT, OUT_RSHIFT, out_data,
OUT_DIM, in_tmp_buf, NULL);

◆ hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast()

static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast ( const q15_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q15_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q15_t *  bias,
const uint16_t  bias_lshift,
const uint16_t  out_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q7_t *  tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast signed 16-bit integer convolution with shift-based quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]bias_lshiftleft shift amount for the bias
[in]out_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "2 * in_tensor_ch * ker_dim * ker_dim".
[in]tmp_bufdummy
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that both in_tensor_ch and out_tensor_ch are multiple of 2.

Example:

//Convolve a 28x28x4 input tensor with a 5x5 kernel and generate a 24x24x8
//output tensor. Let both dimensions padding be 0 and their stride be 1.
#define IN_DIM 28
#define IN_CH 4
#define KER_DIM 5
#define PAD 0
#define STRIDE 1
#define BIAS_LSHIFT 6
#define OUT_RSHIFT 10
#define OUT_CH 8
#define OUT_DIM 24
q15_t in_data[IN_CH * IN_DIM * IN_DIM] = {...};
q15_t weight[IN_CH * KER_DIM * KER_DIM * OUT_CH] = {...};
q15_t bias[OUT_CH] = {...};
q15_t in_tmp_buf[IN_CH * KER_DIM * KER_DIM] = {0};
q15_t out_data[OUT_CH * OUT_DIM * OUT_DIM];
riscv_nn_conv_HWC_s16_s16_s16_sft_bias_fast(in_data, IN_DIM, IN_CH, weight,
OUT_CH, KER_DIM, PAD, STRIDE, bias, BIAS_LSHIFT, OUT_RSHIFT, out_data,
OUT_DIM, in_tmp_buf, NULL);

◆ hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any()

static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any ( const q15_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q15_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q15_t *  bias,
const uint16_t  bias_lshift,
const uint16_t  out_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf,
q7_t *  tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast signed 16-bit integer convolution in any x and y dimensions with shift-based quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]bias_lshiftleft shift amount for the bias
[in]out_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
[in]tmp_bufdummy
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that both in_tensor_ch and out_tensor_ch are multiple of 2.

Example:

//Convolve a 160x120x20 input tensor with a 3x5 kernel and generate a
//80x59x8 output tensor. Let both dimensions padding be 1 and their stride
//be 2.
#define IN_X 160
#define IN_Y 120
#define IN_CH 20
#define OUT_CH 8
#define KER_DIM_X 3
#define KER_DIM_Y 5
#define PAD_X 1
#define PAD_Y 1
#define STRIDE_X 2
#define STRIDE_Y 2
#define BIAS_LSHIFT 6
#define OUT_RSHIFT 9
#define OUT_X 80
#define OUT_Y 59
q15_t in_data[IN_CH * IN_X * IN_Y] = {...};
q15_t weight[IN_CH * KER_DIM_X * KER_DIM_Y * OUT_CH] = {...};
q15_t bias[OUT_CH] = {...};
q15_t in_tmp_buf[2 * IN_CH * KER_DIM_X * KER_DIM_Y] = {0};
q15_t out_data[OUT_CH * OUT_X * OUT_Y];
riscv_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(in_data, IN_X, IN_Y , IN_CH,
weight, OUT_CH, KER_DIM_X, KER_DIM_Y, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y,
bias, BIAS_LSHIFT, OUT_RSHIFT, out_data, OUT_X, OUT_Y, in_tmp_buf,
NULL);

◆ hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast()

static int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q15_t *  wt_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution on RGB images for signed 8-bit integer inputs and signed 16-bit integer outputs with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_diminput tensor dimension
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be "2 * (3 * ker_dim * ker_dim + 1)".
[in]wt_tmp_buftemporary buffer for kernel weights. It is required when -mext-dsp or -mext-vector enabled and its size must be "out_tensor_ch * (3 * ker_dim * ker_dim + 1)".
Returns
This function only returns 0.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_fast()

static int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_fast ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q15_t *  wt_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution on RGB images for signed 8-bit integer inputs and signed 16-bit integer outputs with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_diminput tensor dimension
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be "2 * (3 * ker_dim * ker_dim + 1)".
[in]wt_tmp_buftemporary buffer for kernel weights. It is required when -mext-dsp or -mext-vector enabled and its size must be "out_tensor_ch * (3 * ker_dim * ker_dim + 1)".
Returns
This function only returns 0.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast()

static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer outputs with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim * ker_dim".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any()

static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_s8_s16_s8_sym_fast()

static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer outputs with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim * ker_dim".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_s8_s16_s8_sym_fast_any()

static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any()

static int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const uint16_t  in_tensor_group,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const int32_t *  bias,
q7_t *  out_tensor,
const int32_t *  out_shift,
const int32_t *  out_scale,
const int32_t  out_offset,
const int32_t  in_offset,
const int32_t  act_min,
const int32_t  act_max,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with asymmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]in_tensor_groupnumber of input tensor groups
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[out]out_tensorpointer of the output tensor
[in]out_shiftpointer of the shift vector for output tensor
[in]out_scalepointer of the scaling vector for output tensor
[in]out_offsetvalue of offset for the output tensor. It should be in the range of -128 to 127.
[in]in_offsetvalue of offset for the input tensor It should be in the range of -127 to 128.
[in]act_minminimum value to clip out the ouput tensor. It should be in the range of -128 to 127.
[in]act_maxmaximum value to clip out the ouput tensor. It should be in the range of -128 to 127.
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its needed size could be get by calling riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size.
Returns
This function only returns 0.

◆ hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size()

static int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size ( const uint16_t  in_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any.

Parameters
[in]in_tensor_chnumber of input tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
Returns
This function returns the needed size by the temporary buffer.

◆ hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias()

static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q7_t *  bias,
const uint16_t  bias_lshift,
const uint16_t  out_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q7_t *  tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs signed 8-bit integer convolution for RGB images with shift-based quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_diminput tensor dimension
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]bias_lshiftleft shift amount for the bias
[in]out_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * (3 * ker_dim * ker_dim + 1)".
[in]tmp_buftemporary buffer for kernel weights. It is required when -mext-vector enabled and its size must be "out_tensor_ch * (3 * ker_dim * ker_dim + 1)".
Returns
This function only returns 0.

Example:

//Convolve a 28x28x3 input tensor with a 5x5 kernel and generate a 24x24x20
//output tensor. Let both dimensions padding be 0 and their stride be 1.
#define IN_DIM 28
#define KER_DIM 5
#define PAD 0
#define STRIDE 1
#define BIAS_LSHIFT 6
#define OUT_RSHIFT 10
#define OUT_CH 20
#define OUT_DIM 24
q7_t in_data[3 * IN_DIM * IN_DIM] = {...};
q7_t weight[3 * KER_DIM * KER_DIM * OUT_CH] = {...};
q7_t bias[OUT_CH] = {...};
q15_t in_tmp_buf[2 * 3 * KER_DIM * KER_DIM] = {0};
q7_t out_data[OUT_CH * OUT_DIM * OUT_DIM];
riscv_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(in_data, IN_DIM, weight, OUT_CH,
KER_DIM, PAD, STRIDE, bias, BIAS_LSHIFT, OUT_RSHIFT, out_data, OUT_DIM,
in_tmp_buf, NULL);

◆ hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast()

static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q7_t *  bias,
const uint16_t  bias_lshift,
const uint16_t  out_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q15_t *  wt_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast signed 8-bit integer convolution for RGB images with shift-based quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]bias_lshiftleft shift amount for the bias
[in]out_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be "2 * (3 * ker_dim * ker_dim + 1)".
[in]wt_tmp_buftemporary buffer for kernel weights. It is required when -mext-dsp or -mext-vector enabled and its size must be "out_tensor_ch * (3 * ker_dim * ker_dim + 1)".
Returns
This function only returns 0.

Example:

//Convolve a 28x28x3 input tensor with a 5x5 kernel and generate a 24x24x20
//output tensor. Let both dimensions padding be 0 and their stride be 1.
#define IN_DIM 28
#define KER_DIM 5
#define PAD 0
#define STRIDE 1
#define BIAS_LSHIFT 6
#define OUT_RSHIFT 10
#define OUT_CH 20
#define OUT_DIM 24
q7_t in_data[3 * IN_DIM * IN_DIM] = {...};
q7_t weight[3 * KER_DIM * KER_DIM * OUT_CH] = {...};
q7_t bias[OUT_CH] = {...};
q15_t in_tmp_buf[2 * (3 * KER_DIM * KER_DIM + 1)] = {0};
q15_t wt_tmp_buf[OUT_CH * (3 * KER_DIM * KER_DIM + 1)];
q7_t out_data[OUT_CH * OUT_DIM * OUT_DIM];
riscv_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(in_data, IN_DIM, weight,
OUT_CH, KER_DIM, PAD, STRIDE, bias, BIAS_LSHIFT, OUT_RSHIFT, out_data,
OUT_DIM, in_tmp_buf, wt_tmp_buf);

◆ hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast()

static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q15_t *  wt_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_diminput tensor dimension
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be "2 * (3 * ker_dim * ker_dim + 1)".
[in]wt_tmp_buftemporary buffer for kernel weights. It is required when -mext-dsp or -mext-vector enabled and its size must be "out_tensor_ch * (3 * ker_dim * ker_dim + 1)".
Returns
This function only returns 0.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_fast()

static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_fast ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q15_t *  wt_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_diminput tensor dimension
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be "2 * (3 * ker_dim * ker_dim + 1)".
[in]wt_tmp_buftemporary buffer for kernel weights. It is required when -mext-dsp or -mext-vector enabled and its size must be "out_tensor_ch * (3 * ker_dim * ker_dim + 1)".
Returns
This function only returns 0.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_s8_s8_s8_sft_bias()

static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q7_t *  bias,
const uint16_t  bias_lshift,
const uint16_t  out_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q7_t *  tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs signed 8-bit integer convolution with shift-based quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]bias_lshiftleft shift amount for the bias
[in]out_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "2 * in_tensor_ch * ker_dim * ker_dim".
[in]tmp_bufdummy
Returns
This function only returns 0.

Example:

//Convolve a 28x28x1 input tensor with a 5x5 kernel and generate a 24x24x20
//output tensor. Let both dimensions padding be 0 and their stride be 1.
#define IN_DIM 28
#define IN_CH 1
#define KER_DIM 5
#define PAD 0
#define STRIDE 1
#define BIAS_LSHIFT 6
#define OUT_RSHIFT 10
#define OUT_CH 20
#define OUT_DIM 24
q7_t in_data[IN_CH * IN_DIM * IN_DIM] = {...};
q7_t weight[IN_CH * KER_DIM * KER_DIM * OUT_CH] = {...};
q7_t bias[OUT_CH] = {...};
q15_t in_tmp_buf[2 * IN_CH * KER_DIM * KER_DIM] = {0};
q7_t out_data[OUT_CH * OUT_DIM * OUT_DIM];
riscv_nn_conv_HWC_s8_s8_s8_sft_bias(in_data, IN_DIM, IN_CH, weight, OUT_CH,
KER_DIM, PAD, STRIDE, bias, BIAS_LSHIFT, OUT_RSHIFT, out_data, OUT_DIM,
in_tmp_buf, NULL);

◆ hpm_nn_conv_HWC_s8_s8_s8_sft_bias_any()

static void hpm_nn_conv_HWC_s8_s8_s8_sft_bias_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q7_t *  bias,
const uint16_t  bias_lshift,
const uint16_t  out_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf,
q7_t *  tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs signed 8-bit integer convolution in any x and y dimensions with shift-based quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]bias_lshiftleft shift amount for the bias
[in]out_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector is enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
[in]tmp_bufdummy

Example:

//Convolve a 160x120x3 input tensor with a 3x5 kernel and generate a 80x59x5
//output tensor. Let both dimensions padding be 1 and their stride be 2.
#define IN_X 160
#define IN_Y 120
#define IN_CH 3
#define OUT_CH 5
#define KER_DIM_X 3
#define KER_DIM_Y 5
#define PAD_X 1
#define PAD_Y 1
#define STRIDE_X 2
#define STRIDE_Y 2
#define BIAS_LSHIFT 6
#define OUT_RSHIFT 9
#define OUT_X 40
#define OUT_Y 30
q7_t in_data[IN_CH * IN_X * IN_Y] = {...};
q7_t weight[IN_CH * KER_DIM_X * KER_DIM_Y * OUT_CH] = {...};
q7_t bias[OUT_CH] = {...};
q15_t in_tmp_buf[2 * IN_CH * KER_DIM_X * KER_DIM_Y] = {0};
q7_t out_data[OUT_CH * OUT_X * OUT_Y];
riscv_nn_conv_HWC_s8_s8_s8_sft_bias_any(in_data, IN_X, IN_Y , IN_CH, weight,
OUT_CH, KER_DIM_X, KER_DIM_Y, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y, bias,
BIAS_LSHIFT, OUT_RSHIFT, out_data, OUT_X, OUT_Y, in_tmp_buf, NULL);

◆ hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast()

static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q7_t *  bias,
const uint16_t  bias_lshift,
const uint16_t  out_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q7_t *  tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast signed 8-bit integer convolution with shift-based quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]bias_lshiftleft shift amount for the bias
[in]out_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim * ker_dim".
[in]tmp_bufdummy
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.

Example:

//Convolve a 12x12x20 input tensor with a 5x5 kernel and generate a 8x8x50
//output tensor. Let both dimensions padding be 0 and their stride be 1.
#define IN_DIM 12
#define IN_CH 20
#define KER_DIM 5
#define PAD 0
#define STRIDE 1
#define BIAS_LSHIFT 6
#define OUT_RSHIFT 10
#define OUT_CH 50
#define OUT_DIM 8
q7_t in_data[IN_CH * IN_DIM * IN_DIM] = {...};
q7_t weight[IN_CH * KER_DIM * KER_DIM * OUT_CH] = {...};
q7_t bias[OUT_CH] = {...};
q15_t in_tmp_buf[2 * IN_CH * KER_DIM * KER_DIM] = {0};
q7_t out_data[OUT_CH * OUT_DIM * OUT_DIM];
riscv_nn_conv_HWC_s8_s8_s8_sft_bias_fast(in_data, IN_DIM, IN_CH, weight,
OUT_CH, KER_DIM, PAD, STRIDE, bias, BIAS_LSHIFT, OUT_RSHIFT, out_data,
OUT_DIM, in_tmp_buf, NULL);

◆ hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any()

static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q7_t *  bias,
const uint16_t  bias_lshift,
const uint16_t  out_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf,
q7_t *  tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast signed 8-bit integer convolution in any x and y dimensions with shift-based quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]bias_lshiftleft shift amount for the bias
[in]out_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
[in]tmp_bufdummy
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.

Example:

//Convolve a 160x120x20 input tensor with a 3x5 kernel and generate a
//80x59x8 output tensor. Let both dimensions padding be 1 and their stride
//be 2.
#define IN_X 160
#define IN_Y 120
#define IN_CH 20
#define OUT_CH 8
#define KER_DIM_X 3
#define KER_DIM_Y 5
#define PAD_X 1
#define PAD_Y 1
#define STRIDE_X 2
#define STRIDE_Y 2
#define BIAS_LSHIFT 6
#define OUT_RSHIFT 9
#define OUT_X 80
#define OUT_Y 59
q7_t in_data[IN_CH * IN_X * IN_Y] = {...};
q7_t weight[IN_CH * KER_DIM_X * KER_DIM_Y * OUT_CH] = {...};
q7_t bias[OUT_CH] = {...};
q15_t in_tmp_buf[2 * IN_CH * KER_DIM_X * KER_DIM_Y] = {0};
q7_t out_data[OUT_CH * OUT_Y * OUT_X];
riscv_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(in_data, IN_W, IN_Y , IN_CH,
weight, OUT_CH, KER_DIM_X, KER_DIM_Y, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y,
bias, BIAS_LSHIFT, OUT_RSHIFT, out_data, OUT_X, OUT_Y, in_tmp_buf,
NULL);

◆ hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast()

static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for signed 8-bit integer inputs/outputs with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim * ker_dim".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any()

static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_s8_s8_s8_sym_fast()

static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for signed 8-bit integer inputs/outputs with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim * ker_dim".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_s8_s8_s8_sym_fast_any()

static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast_any ( const q7_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for signed 8-bit integer inputs/outputs in any x and y dimensions with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast()

static int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q15_t *  wt_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 16-bit integer outputs with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_diminput tensor dimension
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be "2 * (3 * ker_dim * ker_dim + 1)".
[in]wt_tmp_buftemporary buffer for kernel weights. It is required when -mext-dsp or -mext-vector enabled and its size must be "out_tensor_ch * (3 * ker_dim * ker_dim + 1)".
Returns
This function only returns 0.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_fast()

static int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_fast ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q15_t *  wt_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 16-bit integer outputs with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_diminput tensor dimension
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be "2 * (3 * ker_dim * ker_dim + 1)".
[in]wt_tmp_buftemporary buffer for kernel weights. It is required when -mext-dsp or -mext-vector enabled and its size must be "out_tensor_ch * (3 * ker_dim * ker_dim + 1)".
Returns
This function only returns 0.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast()

static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim * ker_dim".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any()

static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_s16_s8_sym_fast()

static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim * ker_dim".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_s16_s8_sym_fast_any()

static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q15_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast()

static int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q15_t *  wt_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_diminput tensor dimension
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be "2 * (3 * ker_dim * ker_dim + 1)".
[in]wt_tmp_buftemporary buffer for kernel weights. It is required when -mext-dsp or -mext-vector enabled and its size must be "out_tensor_ch * (3 * ker_dim * ker_dim + 1)".
Returns
This function only returns 0.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_fast()

static int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_fast ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q15_t *  wt_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 8-bit integer outputs with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_diminput tensor dimension
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be "2 * (3 * ker_dim * ker_dim + 1)".
[in]wt_tmp_buftemporary buffer for kernel weights. It is required when -mext-dsp or -mext-vector enabled and its size must be "out_tensor_ch * (3 * ker_dim * ker_dim + 1)".
Returns
This function only returns 0.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast()

static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim * ker_dim".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any()

static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_s8_s8_sym_fast()

static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim * ker_dim".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_s8_s8_sym_fast_any()

static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
q7_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer outputs in any x and y dimensions with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast()

static int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
u8_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q15_t *  wt_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution on RGB images for unsigned 8-bit integer inputs/outputs with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_diminput tensor dimension
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be "2 * (3 * ker_dim * ker_dim + 1)".
[in]wt_tmp_buftemporary buffer for kernel weights. It is required when -mext-dsp or -mext-vector enabled and its size must be "out_tensor_ch * (3 * ker_dim * ker_dim + 1)".
Returns
This function only returns 0.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_fast()

static int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_fast ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
u8_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf,
q15_t *  wt_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution on RGB images for unsigned 8-bit integer inputs/outputs with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input tensor
[in]in_tensor_diminput tensor dimension
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be "2 * (3 * ker_dim * ker_dim + 1)".
[in]wt_tmp_buftemporary buffer for kernel weights. It is required when -mext-dsp or -mext-vector enabled and its size must be "out_tensor_ch * (3 * ker_dim * ker_dim + 1)".
Returns
This function only returns 0.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast()

static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
u8_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for unsigned 8-bit integer inputs/outputs with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim * ker_dim".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any()

static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q31_t *  bias,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
u8_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for unsigned 8-bit integer inputs/outputs in any x and y dimensions with bias inputs and symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]biaspointer of the bias vector
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_u8_s8_sym_fast()

static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim,
const uint16_t  pad,
const uint16_t  stride,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
u8_t *  out_tensor,
const uint16_t  out_tensor_dim,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for unsigned 8-bit integer inputs/outputs with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dimdimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dimdimension of the filter kernel
[in]padpadding size
[in]strideconvolution stride
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dimdimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim * ker_dim".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.

◆ hpm_nn_conv_HWC_u8_u8_s8_sym_fast_any()

static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast_any ( const u8_t *  in_tensor,
const uint16_t  in_tensor_dim_x,
const uint16_t  in_tensor_dim_y,
const uint16_t  in_tensor_ch,
const q7_t *  ker_weight,
const uint16_t  out_tensor_ch,
const uint16_t  ker_dim_x,
const uint16_t  ker_dim_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const uint16_t  pre_rshift,
const uint16_t  out_scale,
const uint16_t  post_rshift,
u8_t *  out_tensor,
const uint16_t  out_tensor_dim_x,
const uint16_t  out_tensor_dim_y,
q15_t *  in_tmp_buf 
)
inlinestatic

#include <middleware/hpm_math/hpm_math.h>

This function performs fast convolution for unsigned 8-bit integer inputs/outputs in any x and y dimensions with symmetric quantization on the outputs.

Parameters
[in]in_tensorpointer of the input vector
[in]in_tensor_dim_xx dimension of the input tensor
[in]in_tensor_dim_yy dimension of the input tensor
[in]in_tensor_chnumber of input tensor channels
[in]ker_weightpointer of kernel weights
[in]out_tensor_chnumber of output tensor channels
[in]ker_dim_xx dimension of the filter kernel
[in]ker_dim_yy dimension of the filter kernel
[in]pad_xpadding size in the x dimension
[in]pad_ypadding size in the y dimension
[in]stride_xconvolution stride in the x dimension
[in]stride_yconvolution stride in the y dimension
[in]pre_rshiftright shift amount for the output
[in]out_scalevalue of scaling for the output
[in]post_rshiftright shift amount for the output
[out]out_tensorpointer of the output tensor
[in]out_tensor_dim_xx dimension of the output tensor
[in]out_tensor_dim_yy dimension of the output tensor
[in]in_tmp_buftemporary buffer for the input tensor. It is required when -mext-dsp or -mext-vector enabled and its size must be equal to "2 * in_tensor_ch * ker_dim_x * ker_dim_y".
Returns
This function returns 0 on success; otherwise, it returns -1 if its inputs do not meet the constraints that in_tensor_ch is a multiple of 4 and out_tensor_ch is a multiple of 2.
Note
The outputs will be 2-stage shifted before being stored, i.e., out = ((out >> pre_rshift) *out_scale) >> post_rshift.