17 #define HPM_DSP_HW_NDS32 1
19 #ifdef CONFIG_HPM_MATH_HAS_EXTRA_CONFIG
20 #include CONFIG_HPM_MATH_HAS_EXTRA_CONFIG
28 #define HPM_MATH_DSP_STATISTICS 1
29 #define HPM_MATH_DSP_BASIC 1
30 #define HPM_MATH_DSP_COMPLEX 1
31 #define HPM_MATH_DSP_CONTROLLER 1
32 #define HPM_MATH_DSP_DISTANCE 1
33 #define HPM_MATH_DSP_FILTERING 1
34 #define HPM_MATH_DSP_MATRIX 1
35 #define HPM_MATH_DSP_SVM 1
36 #define HPM_MATH_DSP_TRANSFORM 1
37 #define HPM_MATH_DSP_UTILS 1
38 #define HPM_MATH_DSP_SORT 1
40 #define HPM_MATH_NN_ACTIVATION 1
41 #define HPM_MATH_NN_TINYENGINE 1
42 #define HPM_MATH_NN_BASIC 1
43 #define HPM_MATH_NN_CONCATENATION 1
44 #define HPM_MATH_NN_CONVOLUTION 1
45 #define HPM_MATH_NN_CONNECTED 1
46 #define HPM_MATH_NN_POOLING 1
47 #define HPM_MATH_NN_SOFTMAX 1
48 #define HPM_MATH_NN_UTIL 1
50 #define HPM_DSP_CORE HPM_DSP_HW_NDS32
52 #define HPM_MATH_PI (3.14159265358979323846)
62 #define HPM_MATH_SW_FFT_CHECKLIST
71 #ifdef HPM_MATH_DSP_STATISTICS
79 #ifdef HPM_EN_MATH_DSP_LIB
85 #include "riscv_dsp_statistics_math.h"
97 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
100 tpt_max_f32(&res, index, src,
size);
103 return riscv_dsp_max_f32(src,
size, index);
109 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
110 return riscv_dsp_max_val_f32(src,
size);
122 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
125 tpt_max_q15(&res, index, src,
size);
128 return riscv_dsp_max_q15(src,
size, index);
142 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
145 tpt_max_q31(&res, index, src,
size);
148 return riscv_dsp_max_q31(src,
size, index);
162 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
165 tpt_max_q7(&res, index, src,
size);
168 return riscv_dsp_max_q7(src,
size, index);
182 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
183 return riscv_dsp_max_u8(src,
size, index);
197 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
200 tpt_min_f32(&res, index, src,
size);
203 return riscv_dsp_min_f32(src,
size, index);
217 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
220 tpt_min_q15(&res, index, src,
size);
223 return riscv_dsp_min_q15(src,
size, index);
237 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
240 tpt_min_q31(&res, index, src,
size);
243 return riscv_dsp_min_q31(src,
size, index);
257 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
260 tpt_min_q7(&res, index, src,
size);
263 return riscv_dsp_min_q7(src,
size, index);
277 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
278 return riscv_dsp_min_u8(src,
size, index);
291 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
294 tpt_mean_f32(&res, src,
size);
297 return riscv_dsp_mean_f32(src,
size);
315 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
318 tpt_mean_q15(&res, src,
size);
321 return riscv_dsp_mean_q15(src,
size);
339 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
342 tpt_mean_q31(&res, src,
size);
345 return riscv_dsp_mean_q31(src,
size);
363 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
366 tpt_mean_q7(&res, src,
size);
369 return riscv_dsp_mean_q7(src,
size);
385 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
386 return riscv_dsp_mean_u8(src,
size);
399 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
402 tpt_power_f32(&res, src,
size);
405 return riscv_dsp_pwr_f32(src,
size);
424 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
427 tpt_power_q15(&res, src,
size);
430 return riscv_dsp_pwr_q15(src,
size);
450 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
453 tpt_power_q31(&res, src,
size);
456 return riscv_dsp_pwr_q31(src,
size);
475 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
478 tpt_power_q7(&res, src,
size);
481 return riscv_dsp_pwr_q7(src,
size);
495 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
498 tpt_rms_f32(&res, src,
size);
501 return riscv_dsp_rms_f32(src,
size);
521 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
524 tpt_rms_q15(&res, src,
size);
527 return riscv_dsp_rms_q15(src,
size);
547 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
550 tpt_rms_q31(&res, src,
size);
553 return riscv_dsp_rms_q31(src,
size);
567 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
570 tpt_std_f32(&res, src,
size);
573 return riscv_dsp_std_f32(src,
size);
593 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
596 tpt_std_q15(&res, src,
size);
599 return riscv_dsp_std_q15(src,
size);
619 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
622 tpt_std_q31(&res, src,
size);
625 return riscv_dsp_std_q31(src,
size);
644 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
645 return riscv_dsp_std_u8(src,
size);
658 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
661 tpt_var_f32(&res, src,
size);
664 return riscv_dsp_var_f32(src,
size);
684 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
687 tpt_var_q15(&res, src,
size);
690 return riscv_dsp_var_q15(src,
size);
710 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
713 tpt_var_q31(&res, src,
size);
716 return riscv_dsp_var_q31(src,
size);
731 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
733 return tpt_entropy_f32(src,
size);
735 return riscv_dsp_entropy_f32(src,
size);
754 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
756 return tpt_relative_entropy_f32(src1, src2,
size);
758 return riscv_dsp_relative_entropy_f32(src1, src2,
size);
772 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
774 tpt_lse_f32(src,
size);
776 return riscv_dsp_lse_f32(src,
size);
792 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
794 return tpt_lse_dprod_f32(src1, src2,
size, buffer);
796 return riscv_dsp_lse_dprod_f32(src1, src2,
size, buffer);
812 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
813 return riscv_dsp_gaussian_naive_bayes_est_f32(instance, src, buf);
826 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
827 return riscv_dsp_absmax_f32(src,
size, index);
840 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
841 return riscv_dsp_absmax_q15(src,
size, index);
854 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
855 return riscv_dsp_absmax_q31(src,
size, index);
868 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
869 return riscv_dsp_absmax_q7(src,
size, index);
882 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
883 return riscv_dsp_absmin_f32(src,
size, index);
896 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
897 return riscv_dsp_absmin_q31(src,
size, index);
910 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
911 return riscv_dsp_absmin_q15(src,
size, index);
924 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
925 return riscv_dsp_absmin_q7(src,
size, index);
937 #ifdef HPM_MATH_DSP_BASIC
944 #ifdef HPM_EN_MATH_DSP_LIB
947 #include "tpt_math.h"
950 #include "riscv_dsp_basic_math.h"
961 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
963 tpt_abs_f32(dst, src,
size);
965 riscv_dsp_abs_f32(src, dst,
size);
981 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
983 tpt_abs_q31(dst, src,
size);
985 riscv_dsp_abs_q31(src, dst,
size);
1002 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1004 tpt_abs_q15(dst, src,
size);
1006 riscv_dsp_abs_q15(src, dst,
size);
1022 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1024 tpt_abs_q7(dst, src,
size);
1026 riscv_dsp_abs_q7(src, dst,
size);
1041 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1043 tpt_add_f32(dst, src1, src2,
size);
1045 riscv_dsp_add_f32(src1, src2, dst,
size);
1061 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1063 tpt_add_q31(dst, src1, src2,
size);
1065 riscv_dsp_add_q31(src1, src2, dst,
size);
1081 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1083 tpt_add_q15(dst, src1, src2,
size);
1085 riscv_dsp_add_q15(src1, src2, dst,
size);
1101 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1103 tpt_add_q7(dst, src1, src2,
size);
1105 riscv_dsp_add_q7(src1, src2, dst,
size);
1121 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1123 tpt_add_u8_u16(dst, src1, src2,
size);
1125 riscv_dsp_add_u8_u16(src1, src2, dst,
size);
1140 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1142 tpt_sub_f32(dst, src1, src2,
size);
1144 riscv_dsp_sub_f32(src1, src2, dst,
size);
1160 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1162 tpt_sub_q31(dst, src1, src2,
size);
1164 riscv_dsp_sub_q31(src1, src2, dst,
size);
1180 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1182 tpt_sub_q15(dst, src1, src2,
size);
1184 riscv_dsp_sub_q15(src1, src2, dst,
size);
1200 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1202 tpt_sub_q7(dst, src1, src2,
size);
1204 riscv_dsp_sub_q7(src1, src2, dst,
size);
1220 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1221 riscv_dsp_sub_u8_q7(src1, src2, dst,
size);
1235 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1237 tpt_mult_f32(dst, src1, src2,
size);
1239 riscv_dsp_mul_f32(src1, src2, dst,
size);
1255 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1257 tpt_mult_q31(dst, src1, src2,
size);
1259 riscv_dsp_mul_q31(src1, src2, dst,
size);
1275 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1277 tpt_mult_q15(dst, src1, src2,
size);
1279 riscv_dsp_mul_q15(src1, src2, dst,
size);
1295 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1297 tpt_mult_q7(dst, src1, src2,
size);
1299 riscv_dsp_mul_q7(src1, src2, dst,
size);
1315 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1316 riscv_dsp_mul_u8_u16(src1, src2, dst,
size);
1330 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1332 tpt_div_f32(dst, src1, src2,
size);
1334 riscv_dsp_div_f32(src1, src2, dst,
size);
1347 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1349 return tpt_div_q31(src1, src2);
1351 return riscv_dsp_div_q31(src1, src2);
1364 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1366 return tpt_div_s64_u32(src1, src2);
1368 return riscv_dsp_div_s64_u32(src1, src2);
1381 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1383 return tpt_div_u64_u32(src1, src2);
1385 return riscv_dsp_div_u64_u32(src1, src2);
1399 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1401 tpt_negate_f32(dst, src,
size);
1403 riscv_dsp_neg_f32(src, dst,
size);
1419 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1421 tpt_negate_q31(dst, src,
size);
1423 riscv_dsp_neg_q31(src, dst,
size);
1439 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1441 tpt_negate_q15(dst, src,
size);
1443 riscv_dsp_neg_q15(src, dst,
size);
1459 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1461 tpt_negate_q7(dst, src,
size);
1463 riscv_dsp_neg_q7(src, dst,
size);
1478 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1481 tpt_dot_prod_f32(&res, src1, src2,
size);
1484 return riscv_dsp_dprod_f32(src1, src2,
size);
1503 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1506 tpt_dot_prod_q31(&res, src1, src2,
size);
1509 return riscv_dsp_dprod_q31(src1, src2,
size);
1526 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1529 tpt_dot_prod_q15(&res, src1, src2,
size);
1532 return riscv_dsp_dprod_q15(src1, src2,
size);
1550 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1551 return riscv_dsp_dprod_u8xq15(src1, src2,
size);
1568 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1571 tpt_dot_prod_q7(&res, src1, src2,
size);
1574 return riscv_dsp_dprod_q7(src1, src2,
size);
1591 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1592 return riscv_dsp_dprod_q7xq15(src1, src2,
size);
1608 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1609 return riscv_dsp_dprod_u8(src1, src2,
size);
1623 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1625 tpt_offset_f32(dst, src, offset,
size);
1627 riscv_dsp_offset_f32(src, offset, dst,
size);
1643 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1645 tpt_offset_q31(dst, src, offset,
size);
1647 riscv_dsp_offset_q31(src, offset, dst,
size);
1663 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1665 tpt_offset_q15(dst, src, offset,
size);
1667 riscv_dsp_offset_q15(src, offset, dst,
size);
1683 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1685 tpt_offset_q7(dst, src, offset,
size);
1687 riscv_dsp_offset_q7(src, offset, dst,
size);
1703 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1704 riscv_dsp_offset_u8(src, offset, dst,
size);
1718 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1720 tpt_scale_f32(dst, src, scale,
size);
1722 riscv_dsp_scale_f32(src, scale, dst,
size);
1741 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1743 tpt_scale_q31(dst, src, scalefract, shift,
size);
1745 riscv_dsp_scale_q31(src, scalefract, shift, dst,
size);
1764 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1766 tpt_scale_q15(dst, src, scalefract, shift,
size);
1768 riscv_dsp_scale_q15(src, scalefract, shift, dst,
size);
1787 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1789 tpt_scale_q7(dst, src, scalefract, shift,
size);
1791 riscv_dsp_scale_q7(src, scalefract, shift, dst,
size);
1809 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1810 riscv_dsp_scale_u8(src, scalefract, shift, dst,
size);
1827 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1829 tpt_shift_q15(dst, src, shift,
size);
1831 riscv_dsp_shift_q15(src, shift, dst,
size);
1848 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1850 tpt_shift_q31(dst, src, shift,
size);
1852 riscv_dsp_shift_q31(src, shift, dst,
size);
1869 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1871 tpt_shift_q7(dst, src, shift,
size);
1873 riscv_dsp_shift_q7(src, shift, dst,
size);
1890 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1891 riscv_dsp_shift_u8(src, shift, dst,
size);
1910 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1912 tpt_clip_f32(dst, src, low, high,
size);
1914 riscv_dsp_clip_f32(src, dst, low, high,
size);
1929 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1931 tpt_clip_q31(dst, src, low, high,
size);
1933 riscv_dsp_clip_q31(src, dst, low, high,
size);
1948 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1950 tpt_clip_q15(dst, src, low, high,
size);
1952 riscv_dsp_clip_q15(src, dst, low, high,
size);
1967 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1969 tpt_clip_q7(dst, src, low, high,
size);
1971 riscv_dsp_clip_q7(src, dst, low, high,
size);
2000 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2002 tpt_and_32bit(dst, src1, src2,
size);
2004 riscv_dsp_and_u32(src1, src2, dst,
size);
2018 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2020 tpt_and_8bit(dst, src1, src2,
size);
2022 riscv_dsp_and_u8(src1, src2, dst,
size);
2051 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2053 tpt_or_32bit(dst, src1, src2,
size);
2055 riscv_dsp_or_u32(src1, src2, dst,
size);
2069 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2071 tpt_or_16bit(dst, src1, src2,
size);
2073 riscv_dsp_or_u16(src1, src2, dst,
size);
2087 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2089 tpt_or_8bit(dst, src1, src2,
size);
2091 riscv_dsp_or_u8(src1, src2, dst,
size);
2120 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2122 tpt_xor_32bit(dst, src1, src2,
size);
2124 riscv_dsp_xor_u32(src1, src2, dst,
size);
2138 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2140 tpt_xor_16bit(dst, src1, src2,
size);
2142 riscv_dsp_xor_u16(src1, src2, dst,
size);
2156 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2158 tpt_xor_8bit(dst, src1, src2,
size);
2160 riscv_dsp_xor_u8(src1, src2, dst,
size);
2188 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2190 tpt_not_32bit(dst, src,
size);
2192 riscv_dsp_not_u32(src, dst,
size);
2205 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2207 tpt_not_16bit(dst, src,
size);
2209 riscv_dsp_not_u16(src, dst,
size);
2222 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2224 tpt_not_8bit(dst, src,
size);
2226 riscv_dsp_not_u8(src, dst,
size);
2272 #ifdef HPM_MATH_DSP_COMPLEX
2283 #ifdef HPM_EN_MATH_DSP_LIB
2286 #include "tpt_math.h"
2289 #include "riscv_dsp_complex_math.h"
2300 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2302 tpt_cmplx_conj_f32(dst, src,
size);
2304 riscv_dsp_cconj_f32(src, dst,
size);
2320 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2322 tpt_cmplx_conj_q15(dst, src,
size);
2324 riscv_dsp_cconj_q15(src, dst,
size);
2340 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2342 tpt_cmplx_conj_q31(dst, src,
size);
2344 riscv_dsp_cconj_q31(src, dst,
size);
2359 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2360 riscv_dsp_cdprod_f32(src1, src2,
size, dst);
2374 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2376 tpt_cmplx_dot_prod_f32(rout, iout, src1, src2,
size);
2378 riscv_dsp_cdprod_typ2_f32(src1, src2,
size, rout, iout);
2395 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2396 riscv_dsp_cdprod_q15(src1, src2,
size, dst);
2413 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2414 riscv_dsp_cdprod_typ2_q15(src1, src2,
size, rout, iout);
2430 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2431 riscv_dsp_cdprod_q31(src1, src2,
size, dst);
2449 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2451 tpt_cmplx_dot_prod_q31(rout, iout, src1, src2,
size);
2453 riscv_dsp_cdprod_typ2_q31(src1, src2,
size, rout, iout);
2467 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2469 tpt_cmplx_mag_f32(dst, src,
size);
2471 riscv_dsp_cmag_f32(src, dst,
size);
2487 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2489 tpt_cmplx_mag_q15(dst, src,
size);
2491 riscv_dsp_cmag_q15(src, dst,
size);
2507 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2509 tpt_cmplx_mag_q31(dst, src,
size);
2511 riscv_dsp_cmag_q31(src, dst,
size);
2526 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2528 tpt_cmplx_mag_squared_f32(dst, src,
size);
2530 riscv_dsp_cmag_sqr_f32(src, dst,
size);
2546 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2548 tpt_cmplx_mag_squared_q15(dst, src,
size);
2550 riscv_dsp_cmag_sqr_q15(src, dst,
size);
2566 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2568 tpt_cmplx_mag_squared_q31(dst, src,
size);
2570 riscv_dsp_cmag_sqr_q31(src, dst,
size);
2585 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2587 tpt_cmplx_mult_cmplx_f32(dst, src1, src2,
size);
2589 riscv_dsp_cmul_f32(src1, src2, dst,
size);
2606 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2608 tpt_cmplx_mult_cmplx_q15(dst, src1, src2,
size);
2610 riscv_dsp_cmul_q15(src1, src2, dst,
size);
2627 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2629 tpt_cmplx_mult_cmplx_q31(dst, src1, src2,
size);
2631 riscv_dsp_cmul_q31(src1, src2, dst,
size);
2646 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2648 tpt_cmplx_mult_real_f32(dst, src, real,
size);
2650 riscv_dsp_cmul_real_f32(src, real, dst,
size);
2666 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2668 tpt_cmplx_mult_real_q15(dst, src, real,
size);
2670 riscv_dsp_cmul_real_q15(src, real, dst,
size);
2686 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2688 tpt_cmplx_mult_real_q31(dst, src, real,
size);
2690 riscv_dsp_cmul_real_q31(src, real, dst,
size);
2702 #ifdef HPM_MATH_DSP_CONTROLLER
2709 #ifdef HPM_EN_MATH_DSP_LIB
2711 #include "riscv_dsp_controller_math.h"
2723 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2724 riscv_dsp_clarke_f32(a, b, alpha, beta);
2739 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2740 riscv_dsp_clarke_q31(a, b, alpha, beta);
2754 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2755 riscv_dsp_inv_clarke_f32(alpha, beta, a, b);
2770 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2771 riscv_dsp_inv_clarke_q31(alpha, beta, a, b);
2785 static inline void hpm_dsp_park_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b, float32_t sin, float32_t cos)
2787 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2788 riscv_dsp_park_f32(alpha, beta, a, b, sin, cos);
2804 static inline void hpm_dsp_park_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b, q31_t sin, q31_t cos)
2806 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2807 riscv_dsp_park_q31(alpha, beta, a, b, sin, cos);
2821 static inline void hpm_dsp_inv_park_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta, float32_t sin, float32_t cos)
2823 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2824 riscv_dsp_inv_park_f32(a, b, alpha, beta, sin, cos);
2841 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2842 riscv_dsp_inv_park_q31(a, b, alpha, beta, sin, cos);
2854 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2855 return riscv_dsp_pid_f32(instance, src);
2872 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2873 riscv_dsp_init_pid_f32(instance, set);
2886 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2887 return riscv_dsp_pid_q31(instance, src);
2905 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2906 riscv_dsp_init_pid_q31(instance, set);
2912 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2913 return riscv_dsp_pid_q15(instance, src);
2930 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2931 riscv_dsp_init_pid_q15(instance, set);
2942 #ifdef HPM_MATH_DSP_DISTANCE
2949 #ifdef HPM_EN_MATH_DSP_LIB
2951 #include "tpt_math.h"
2953 #include "riscv_dsp_distance_math.h"
2965 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2967 return tpt_braycurtis_distance_f32(src1, src2,
size);
2969 return riscv_dsp_dist_bray_curtis_f32(src1, src2,
size);
2983 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2985 return tpt_canberra_distance_f32(src1, src2,
size);
2987 return riscv_dsp_dist_canberra_f32(src1, src2,
size);
3001 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3003 return tpt_chebyshev_distance_f32(src1, src2,
size);
3005 return riscv_dsp_dist_chebyshev_f32(src1, src2,
size);
3019 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3021 return tpt_cityblock_distance_f32(src1, src2,
size);
3023 return riscv_dsp_dist_city_block_f32(src1, src2,
size);
3037 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3039 return tpt_correlation_distance_f32(src1, src2,
size);
3041 return riscv_dsp_dist_corr_f32(src1, src2,
size);
3055 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3057 return tpt_cosine_distance_f32(src1, src2,
size);
3059 return riscv_dsp_dist_cos_f32(src1, src2,
size);
3073 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3075 return tpt_euclidean_distance_f32(src1, src2,
size);
3077 return riscv_dsp_dist_euclidean_f32(src1, src2,
size);
3091 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3093 return tpt_jensenshannon_distance_f32(src1, src2,
size);
3095 return riscv_dsp_dist_jensen_shannon_f32(src1, src2,
size);
3110 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3112 return tpt_minkowski_distance_f32(src1, src2, order,
size);
3114 return riscv_dsp_dist_minkowski_f32(src1, src2, order,
size);
3128 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3130 return tpt_dice_distance(src1, src2, numofbool);
3132 return riscv_dsp_bdist_dice_u32_f32(src1, src2, numofbool);
3146 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3148 return tpt_hamming_distance(src1, src2, numofbool);
3150 return riscv_dsp_bdist_hamming_u32_f32(src1, src2, numofbool);
3164 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3166 return tpt_jaccard_distance(src1, src2, numofbool);
3168 return riscv_dsp_bdist_jaccard_u32_f32(src1, src2, numofbool);
3182 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3184 return tpt_kulsinski_distance(src1, src2, numofbool);
3186 return riscv_dsp_bdist_kulsinski_u32_f32(src1, src2, numofbool);
3200 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3202 return tpt_sokalmichener_distance(src1, src2, numofbool);
3204 return riscv_dsp_bdist_sokal_michener_u32_f32(src1, src2, numofbool);
3218 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3220 return tpt_sokalsneath_distance(src1, src2, numofbool);
3222 return riscv_dsp_bdist_sokal_sneath_u32_f32(src1, src2, numofbool);
3236 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3238 return tpt_rogerstanimoto_distance(src1, src2, numofbool);
3240 return riscv_dsp_bdist_rogers_tanimoto_u32_f32(src1, src2, numofbool);
3254 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3256 return tpt_yule_distance(src1, src2, numofbool);
3258 return riscv_dsp_bdist_yule_u32_f32(src1, src2, numofbool);
3272 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3274 return tpt_russellrao_distance(src1, src2, numofbool);
3276 return riscv_dsp_bdist_russell_rao_u32_f32(src1, src2, numofbool);
3288 #ifdef HPM_MATH_DSP_FILTERING
3295 #ifdef HPM_EN_MATH_DSP_LIB
3298 #include "tpt_math.h"
3301 #include "riscv_dsp_filtering_math.h"
3310 static inline void hpm_dsp_fir_f32(
const riscv_dsp_fir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t
size)
3312 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3313 riscv_dsp_fir_f32(instance, src, dst,
size);
3331 static inline void hpm_dsp_fir_q31(
const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t
size)
3333 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3334 riscv_dsp_fir_q31(instance, src, dst,
size);
3354 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3355 riscv_dsp_fir_fast_q31(instance, src, dst,
size);
3372 static inline void hpm_dsp_fir_q15(
const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t
size)
3374 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3375 riscv_dsp_fir_q15(instance, src, dst,
size);
3393 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3394 riscv_dsp_fir_fast_q15(instance, src, dst,
size);
3410 static inline void hpm_dsp_fir_q7(
const riscv_dsp_fir_q7_t *instance, q7_t *src, q7_t *dst, uint32_t
size)
3412 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3413 riscv_dsp_fir_q7(instance, src, dst,
size);
3425 static inline void hpm_dsp_lfir_f32(
const riscv_dsp_lfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t
size)
3427 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3428 riscv_dsp_lfir_f32(instance, src, dst,
size);
3442 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3443 riscv_dsp_lfir_q15(instance, src, dst,
size);
3461 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3462 riscv_dsp_lfir_q31(instance, src, dst,
size);
3467 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3468 riscv_dsp_dcmfir_f32(instance, src, dst,
size);
3473 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3474 riscv_dsp_dcmfir_q15(instance, src, dst,
size);
3479 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3480 riscv_dsp_dcmfir_q31(instance, src, dst,
size);
3485 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3486 riscv_dsp_dcmfir_fast_q31(instance, src, dst,
size);
3491 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3492 riscv_dsp_dcmfir_fast_q15(instance, src, dst,
size);
3497 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3498 riscv_dsp_upsplfir_f32(instance, src, dst,
size);
3503 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3504 riscv_dsp_upsplfir_q15(instance, src, dst,
size);
3509 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3510 riscv_dsp_upsplfir_q31(instance, src, dst,
size);
3513 static inline void hpm_dsp_spafir_f32(riscv_dsp_spafir_f32_t *instance, float32_t *src, float32_t *dst, float32_t *buf, uint32_t
size)
3515 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3516 riscv_dsp_spafir_f32(instance, src, dst, buf,
size);
3519 static inline void hpm_dsp_spafir_q15(riscv_dsp_spafir_q15_t *instance, q15_t *src, q15_t *dst, q15_t *buf1, q31_t *buf2, uint32_t
size)
3521 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3522 riscv_dsp_spafir_q15(instance, src, dst, buf1, buf2,
size);
3527 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3528 riscv_dsp_spafir_q31(instance, src, dst, buf,
size);
3531 static inline void hpm_dsp_spafir_q7(riscv_dsp_spafir_q7_t *instance, q7_t *src, q7_t *dst, q7_t *buf1, q31_t *buf2, uint32_t
size)
3533 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3534 riscv_dsp_spafir_q7(instance, src, dst, buf1, buf2,
size);
3552 static inline void hpm_dsp_lms_f32(
const riscv_dsp_lms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t
size)
3554 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3555 riscv_dsp_lms_f32(instance, src, ref, dst, err,
size);
3575 static inline void hpm_dsp_lms_q31(
const riscv_dsp_lms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t
size)
3577 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3578 riscv_dsp_lms_q31(instance, src, ref, dst, err,
size);
3598 static inline void hpm_dsp_lms_q15(
const riscv_dsp_lms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t
size)
3600 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3601 riscv_dsp_lms_q15(instance, src, ref, dst, err,
size);
3609 static inline void hpm_dsp_nlms_f32(riscv_dsp_nlms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t
size)
3611 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3612 riscv_dsp_nlms_f32(instance, src, ref, dst, err,
size);
3621 static inline void hpm_dsp_nlms_q31(riscv_dsp_nlms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t
size)
3623 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3624 riscv_dsp_nlms_q31(instance, src, ref, dst, err,
size);
3629 static inline void hpm_dsp_nlms_q15(riscv_dsp_nlms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t
size)
3631 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3632 riscv_dsp_nlms_q15(instance, src, ref, dst, err,
size);
3647 static inline void hpm_dsp_conv_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
3649 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3651 tpt_conv_f32(dst, src1, len1, src2, len2);
3653 riscv_dsp_conv_f32(src1, len1, src2, len2, dst);
3673 static inline void hpm_dsp_conv_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
3675 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3677 tpt_conv_q15(dst, src1, len1, src2, len2);
3679 riscv_dsp_conv_q15(src1, len1, src2, len2, dst);
3701 static inline void hpm_dsp_conv_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
3703 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3705 tpt_conv_q31(dst, src1, len1, src2, len2);
3707 riscv_dsp_conv_q31(src1, len1, src2, len2, dst);
3727 static inline void hpm_dsp_conv_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
3729 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3731 tpt_conv_q7(dst, src1, len1, src2, len2);
3733 riscv_dsp_conv_q7(src1, len1, src2, len2, dst);
3752 static inline int32_t
hpm_dsp_conv_partial_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst, uint32_t startindex, uint32_t
size)
3754 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3756 return tpt_conv_partial_f32(dst, src1, len1, src2, len2, startindex,
size);
3758 return riscv_dsp_conv_partial_f32(src1, len1, src2, len2, dst, startindex,
3780 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3782 return tpt_conv_partial_q15(dst, src1, len1, src2, len2, startindex,
size);
3784 return riscv_dsp_conv_partial_q15(src1, len1, src2, len2, dst, startindex,
3806 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3808 return tpt_conv_partial_q31(dst, src1, len1, src2, len2, startindex,
size);
3810 return riscv_dsp_conv_partial_q31(src1, len1, src2, len2, dst, startindex,
3832 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3834 return tpt_conv_partial_q7(dst, src1, len1, src2, len2, startindex,
size);
3836 return riscv_dsp_conv_partial_q7(src1, len1, src2, len2, dst, startindex,
3853 static inline void hpm_dsp_corr_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
3855 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3857 tpt_correlate_f32(dst, src1, len1, src2, len2);
3859 riscv_dsp_corr_f32(src1, len1, src2, len2, dst);
3879 static inline void hpm_dsp_corr_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
3881 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3883 tpt_correlate_q15(dst, src1, len1, src2, len2);
3885 riscv_dsp_corr_q15(src1, len1, src2, len2, dst);
3909 static inline void hpm_dsp_corr_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
3911 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3913 tpt_correlate_q31(dst, src1, len1, src2, len2);
3915 riscv_dsp_corr_q31(src1, len1, src2, len2, dst);
3935 static inline void hpm_dsp_corr_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
3937 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3939 tpt_correlate_q7(dst, src1, len1, src2, len2);
3941 riscv_dsp_corr_q7(src1, len1, src2, len2, dst);
3947 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3948 riscv_dsp_bq_df1_f32(instance, src, dst,
size);
3953 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3954 riscv_dsp_bq_df1_q15(instance, src, dst,
size);
3959 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3960 riscv_dsp_bq_df1_fast_q15(instance, src, dst,
size);
3965 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3966 riscv_dsp_bq_df1_q31(instance, src, dst,
size);
3971 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3972 riscv_dsp_bq_df1_fast_q31(instance, src, dst,
size);
3977 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3978 riscv_dsp_bq_df1_32x64_q31(instance, src, dst,
size);
3983 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3984 riscv_dsp_bq_df2T_f32(instance, src, dst,
size);
3989 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3990 riscv_dsp_bq_df2T_f64(instance, src, dst,
size);
3995 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3996 riscv_dsp_bq_stereo_df2T_f32(instance, src, dst,
size);
4000 static inline void hpm_dsp_liir_f32(
const riscv_dsp_liir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t
size)
4002 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4003 riscv_dsp_liir_f32(instance, src, dst,
size);
4008 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4009 riscv_dsp_liir_q31(instance, src, dst,
size);
4014 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4015 riscv_dsp_liir_fast_q31(instance, src, dst,
size);
4020 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4021 riscv_dsp_liir_q15(instance, src, dst,
size);
4026 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4027 riscv_dsp_liir_fast_q15(instance, src, dst,
size);
4038 #ifdef HPM_MATH_DSP_MATRIX
4063 #ifdef HPM_EN_MATH_DSP_LIB
4065 #include "tpt_math.h"
4067 #include "riscv_dsp_matrix_math.h"
4078 static inline void hpm_dsp_mat_add_f32(
const float32_t *src1,
const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
4080 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4082 tpt_mat_add_f32(dst, src1, src2, row, col);
4084 riscv_dsp_mat_add_f32(src1, src2, dst, row, col);
4099 static inline void hpm_dsp_mat_add_q15(
const q15_t *src1,
const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
4101 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4103 tpt_mat_add_q15(dst, src1, src2, row, col);
4105 riscv_dsp_mat_add_q15(src1, src2, dst, row, col);
4120 static inline void hpm_dsp_mat_add_q31(
const q31_t *src1,
const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
4122 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4124 tpt_mat_add_q31(dst, src1, src2, row, col);
4126 riscv_dsp_mat_add_q31(src1, src2, dst, row, col);
4141 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4143 return tpt_mat_inverse_f32(dst, src,
size);
4145 return riscv_dsp_mat_inv_f32(src, dst,
size);
4151 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4153 return tpt_mat_inverse_f64(dst, src,
size);
4155 return riscv_dsp_mat_inv_f64(src, dst,
size);
4170 static inline void hpm_dsp_mat_mul_f32(
const float32_t *src1,
const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4172 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4174 return tpt_mat_mult_f32(dst, src1, src2, row, col, col2);
4176 riscv_dsp_mat_mul_f32(src1, src2, dst, row, col, col2);
4181 static inline void hpm_dsp_mat_mul_f64(
const float64_t *src1,
const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4183 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4185 return tpt_mat_mult_f64(dst, src1, src2, row, col, col2);
4187 riscv_dsp_mat_mul_f64(src1, src2, dst, row, col, col2);
4201 static inline void hpm_dsp_cmat_mul_f32(
const float32_t *src1,
const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4203 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4205 return tpt_mat_cmplx_mult_f32(dst, src1, src2, row, col, col2);
4207 riscv_dsp_cmat_mul_f32(src1, src2, dst, row, col, col2);
4228 static inline void hpm_dsp_mat_mul_q15(
const q15_t *src1,
const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4230 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4232 return tpt_mat_mult_q15(dst, src1, src2, row, col, col2);
4234 riscv_dsp_mat_mul_q15(src1, src2, dst, row, col, col2);
4238 static inline void hpm_dsp_mat_mul_fast_q15(
const q15_t *src1,
const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4240 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4242 return tpt_mat_mult_q15(dst, src1, src2, row, col, col2);
4244 riscv_dsp_mat_mul_fast_q15(src1, src2, dst, row, col, col2);
4265 static inline void hpm_dsp_cmat_mul_q15(
const q15_t *src1,
const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4267 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4269 return tpt_mat_cmplx_mult_q15(dst, src1, src2, row, col, col2);
4271 riscv_dsp_cmat_mul_q15(src1, src2, dst, row, col, col2);
4292 static inline void hpm_dsp_mat_mul_q31(
const q31_t *src1,
const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4294 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4296 return tpt_mat_mult_q31(dst, src1, src2, row, col, col2);
4298 riscv_dsp_mat_mul_q31(src1, src2, dst, row, col, col2);
4302 static inline void hpm_dsp_mat_mul_fast_q31(
const q31_t *src1,
const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4304 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4306 return tpt_mat_mult_q31(dst, src1, src2, row, col, col2);
4308 riscv_dsp_mat_mul_fast_q31(src1, src2, dst, row, col, col2);
4329 static inline void hpm_dsp_cmat_mul_q31(
const q31_t *src1,
const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4331 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4333 return tpt_mat_cmplx_mult_q31(dst, src1, src2, row, col, col2);
4335 riscv_dsp_cmat_mul_q31(src1, src2, dst, row, col, col2);
4356 static inline void hpm_dsp_mat_mul_q7(
const q7_t *src1,
const q7_t *src2, q7_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4358 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4359 riscv_dsp_mat_mul_q7(src1, src2, dst, row, col, col2);
4380 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4382 tpt_mat_mul_mxv_q7(dst, src1, src2, col, col2);
4384 riscv_dsp_mat_mul_vxm_q7(src1, src2, dst, col, col2);
4394 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4395 return riscv_dsp_mat_pwr2_cache_f64(src, dst,
size);
4408 static inline void hpm_dsp_mat_scale_f32(
const float32_t *src, float32_t scale, float32_t *dst, uint32_t row, uint32_t col)
4410 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4412 tpt_mat_scale_f32(dst, src, row, col, scale);
4414 riscv_dsp_mat_scale_f32(src, scale, dst, row, col);
4433 static inline void hpm_dsp_mat_scale_q15(
const q15_t *src, q15_t scale_fract, int32_t shift, q15_t *dst, uint32_t row, uint32_t col)
4435 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4437 tpt_mat_scale_q15(dst, src, row, col, scale_fract, shift);
4439 riscv_dsp_mat_scale_q15(src, scale_fract, shift, dst, row, col);
4458 static inline void hpm_dsp_mat_scale_q31(
const q31_t *src, q31_t scale_fract, int32_t shift, q31_t *dst, uint32_t row, uint32_t col)
4460 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4462 tpt_mat_scale_q31(dst, src, row, col, scale_fract, shift);
4464 riscv_dsp_mat_scale_q31(src, scale_fract, shift, dst, row, col);
4481 float64_t *dst, uint32_t row, uint32_t col)
4483 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4485 tpt_mat_sub_f64(dst, src1, src2, row, col);
4487 riscv_dsp_mat_sub_f64(src1, src2, dst, row, col);
4500 static inline void hpm_dsp_mat_sub_f32(
const float32_t *src1,
const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
4502 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4504 tpt_mat_sub_f32(dst, src1, src2, row, col);
4506 riscv_dsp_mat_sub_f32(src1, src2, dst, row, col);
4521 static inline void hpm_dsp_mat_sub_q15(
const q15_t *src1,
const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
4523 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4525 tpt_mat_sub_q15(dst, src1, src2, row, col);
4527 riscv_dsp_mat_sub_q15(src1, src2, dst, row, col);
4542 static inline void hpm_dsp_mat_sub_q31(
const q31_t *src1,
const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
4544 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4546 tpt_mat_sub_q31(dst, src1, src2, row, col);
4548 riscv_dsp_mat_sub_q31(src1, src2, dst, row, col);
4565 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4567 tpt_mat_trans_f64(dst, src, row, col);
4569 riscv_dsp_mat_trans_f64(src, dst, row, col);
4583 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4584 riscv_dsp_mat_trans_f32(src, dst, row, col);
4597 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4599 tpt_mat_trans_q15(dst, src, row, col);
4601 riscv_dsp_mat_trans_q15(src, dst, row, col);
4615 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4617 tpt_mat_trans_q31(dst, src, row, col);
4619 riscv_dsp_mat_trans_q31(src, dst, row, col);
4633 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4634 riscv_dsp_mat_trans_u8(src, dst, row, col);
4648 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4649 riscv_dsp_mat_trans_q7(src, dst, row, col);
4685 q31_t * dst, uint32_t size1, uint32_t size2)
4687 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4689 tpt_mat_oprod_q31(dst, src1, src2, size1, size2);
4691 riscv_dsp_mat_oprod_q31(src1, src2, dst, size1, size2);
4719 float32_t *dst, uint32_t row, uint32_t col)
4721 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4723 tpt_mat_mul_mxv_f32(dst, src1, src2, row, col);
4725 riscv_dsp_mat_mul_mxv_f32(src1, src2, dst, row, col);
4739 q15_t *dst, uint32_t row, uint32_t col)
4741 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4743 tpt_mat_mul_mxv_q15(dst, src1, src2, row, col);
4745 riscv_dsp_mat_mul_mxv_q15(src1, src2, dst, row, col);
4759 q31_t *dst, uint32_t row, uint32_t col)
4761 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4763 tpt_mat_mul_mxv_q31(dst, src1, src2, row, col);
4765 riscv_dsp_mat_mul_mxv_q31(src1, src2, dst, row, col);
4779 q7_t *dst, uint32_t row, uint32_t col)
4781 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4783 tpt_mat_mul_mxv_q7(dst, src1, src2, row, col);
4785 riscv_dsp_mat_mul_mxv_q7(src1, src2, dst, row, col);
4798 #ifdef HPM_MATH_DSP_SVM
4806 #ifdef HPM_EN_MATH_DSP_LIB
4808 #include "tpt_math.h"
4810 #include "riscv_dsp_svm_math.h"
4820 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4821 riscv_dsp_svm_linear_est_f32(instance, src, result);
4834 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4835 riscv_dsp_svm_sigmoid_est_f32(instance, src, result);
4848 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4849 riscv_dsp_svm_rbf_est_f32(instance, src, result);
4862 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4863 riscv_dsp_svm_poly_est_f32(instance, src, result);
4875 #ifdef HPM_MATH_DSP_TRANSFORM
4882 #ifdef HPM_EN_MATH_DSP_LIB
4884 #include "tpt_math.h"
4886 #include "riscv_dsp_transform_math.h"
4918 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4920 return tpt_cfft_f32(src, m,
false);
4922 return riscv_dsp_cfft_rd2_f32(src, m);
4936 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4938 return tpt_cfft_f32(src, m,
true);
4940 return riscv_dsp_cifft_rd2_f32(src, m);
4961 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4963 return tpt_cfft_q15(src, m,
false);
4965 return riscv_dsp_cfft_rd2_q15(src, m);
4985 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4987 return tpt_cfft_q15(src, m,
true);
4989 return riscv_dsp_cifft_rd2_q15(src, m);
5009 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5011 return tpt_cfft_q31(src, m,
false);
5013 return riscv_dsp_cfft_rd2_q31(src, m);
5034 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5036 return tpt_cfft_q31(src, m,
true);
5038 return riscv_dsp_cifft_rd2_q31(src, m);
5075 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5077 return tpt_cfft_f32(src, m,
false);
5079 return riscv_dsp_cfft_rd4_f32(src, m);
5094 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5096 return tpt_cfft_f32(src, m,
true);
5098 return riscv_dsp_cifft_rd4_f32(src, m);
5118 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5120 return tpt_cfft_q15(src, m,
false);
5122 return riscv_dsp_cfft_rd4_q15(src, m);
5142 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5144 return tpt_cfft_q15(src, m,
true);
5146 return riscv_dsp_cifft_rd4_q15(src, m);
5166 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5168 return tpt_cfft_q31(src, m,
false);
5170 return riscv_dsp_cfft_rd4_q31(src, m);
5190 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5192 return tpt_cfft_q31(src, m,
true);
5194 return riscv_dsp_cifft_rd4_q31(src, m);
5221 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5223 tpt_cfft_f32(src, m,
false);
5225 riscv_dsp_cfft_f32(src, m);
5238 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5240 tpt_cfft_f64(src, m,
false);
5242 riscv_dsp_cfft_f64(src, m);
5255 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5257 tpt_cfft_f32(src, m,
true);
5259 riscv_dsp_cifft_f32(src, m);
5272 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5274 tpt_cfft_f64(src, m,
true);
5276 riscv_dsp_cifft_f64(src, m);
5296 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5298 tpt_cfft_q15(src, m,
false);
5300 riscv_dsp_cfft_q15(src, m);
5319 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5321 tpt_cfft_q15(src, m,
true);
5323 riscv_dsp_cifft_q15(src, m);
5342 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5344 tpt_cfft_q31(src, m,
false);
5346 riscv_dsp_cfft_q31(src, m);
5365 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5367 tpt_cfft_q31(src, m,
true);
5369 riscv_dsp_cifft_q31(src, m);
5405 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5407 return tpt_rfft_f32(src, src, m,
false);
5409 return riscv_dsp_rfft_f32(src, m);
5423 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5424 return riscv_dsp_rfft_f64(src, m);
5437 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5438 return riscv_dsp_rifft_f32(src, m);
5451 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5452 return riscv_dsp_rifft_f64(src, m);
5471 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5472 return riscv_dsp_rfft_q15(src, m);
5491 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5492 return riscv_dsp_rifft_q15(src, m);
5511 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5512 return riscv_dsp_rfft_q31(src, m);
5531 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5532 return riscv_dsp_rifft_q31(src, m);
5556 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5557 riscv_dsp_dct_f32(src, m);
5569 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5570 riscv_dsp_idct_f32(src, m);
5588 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5589 riscv_dsp_dct_q15(src, m);
5607 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5608 riscv_dsp_idct_q15(src, m);
5626 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5627 riscv_dsp_dct_q31(src, m);
5645 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5646 riscv_dsp_idct_q31(src, m);
5670 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5671 riscv_dsp_dct4_f32(src, m);
5683 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5684 riscv_dsp_idct4_f32(src, m);
5702 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5703 riscv_dsp_dct4_q15(src, m);
5721 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5722 riscv_dsp_idct4_q15(src, m);
5740 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5741 riscv_dsp_dct4_q31(src, m);
5759 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5760 riscv_dsp_idct4_q31(src, m);
5779 #if defined(HPMSOC_HAS_HPMSDK_FFA) && defined(HPM_EN_MATH_DSP_LIB)
5782 #include "hpm_soc.h"
5795 static inline void hpm_ffa_cfft_q15(q15_t *src, uint32_t m)
5814 static inline void hpm_ffa_cfft_q31(q31_t *src, uint32_t m)
5826 #if defined(HPM_IP_FEATURE_FFA_FP32) && HPM_IP_FEATURE_FFA_FP32
5827 static inline void hpm_ffa_cfft_f32(
float *src, uint32_t m)
5837 ffa_set_coef_max_index(
HPM_FFA, 0);
5838 ffa_set_output_max_index(
HPM_FFA, 20);
5839 ffa_set_input_max_index(
HPM_FFA, 20 - m);
5851 static inline void hpm_ffa_cifft_q15(q15_t *src, uint32_t m)
5871 static inline void hpm_ffa_cifft_q31(q31_t *src, uint32_t m)
5883 #if defined(HPM_IP_FEATURE_FFA_FP32) && HPM_IP_FEATURE_FFA_FP32
5884 static inline void hpm_ffa_cifft_f32(
float *src, uint32_t m)
5894 ffa_set_coef_max_index(
HPM_FFA, 0x0);
5895 ffa_set_output_max_index(
HPM_FFA, 10);
5896 ffa_set_input_max_index(
HPM_FFA, 20);
5910 #ifdef HPM_MATH_DSP_UTILS
5920 #ifdef HPM_EN_MATH_DSP_LIB
5922 #include <tpt_math.h>
5924 #include "riscv_dsp_utils_math.h"
5928 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5929 return riscv_dsp_cos_f32(src);
5934 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5935 return riscv_dsp_cos_q31(src);
5940 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5941 return riscv_dsp_cos_q15(src);
5947 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5948 return riscv_dsp_sin_f32(src);
5952 #if defined (__riscv_zfh)
5957 static inline float16_t hpm_dsp_sin_f16(float16_t src)
5959 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5960 return riscv_dsp_sin_f16(src);
5967 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5968 return riscv_dsp_sin_q31(src);
5973 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5974 return riscv_dsp_sin_q15(src);
5981 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5982 return riscv_dsp_atan_f32(src);
5987 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5988 return riscv_dsp_atan_q31(src);
5993 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5994 return riscv_dsp_atan_q15(src);
5999 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6000 return riscv_dsp_atan2_f32(srcy, src2);
6005 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6006 return riscv_dsp_atan2_q15(srcy, src2);
6011 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6012 return riscv_dsp_atan2_q31(srcy, src2);
6024 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6025 return riscv_dsp_sqrt_f32(src);
6036 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6037 return riscv_dsp_sqrt_q31(src);
6048 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6049 return riscv_dsp_sqrt_q15(src);
6062 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6063 riscv_dsp_convert_f32_q15(src, dst,
size);
6075 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6077 tpt_f32_to_q31(dst, src,
size);
6079 riscv_dsp_convert_f32_q31(src, dst,
size);
6092 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6093 riscv_dsp_convert_f32_q7(src, dst,
size);
6105 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6106 riscv_dsp_convert_q15_f32(src, dst,
size);
6118 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6119 riscv_dsp_convert_q15_q31(src, dst,
size);
6131 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6132 riscv_dsp_convert_q15_q7(src, dst,
size);
6144 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6146 tpt_q31_to_f32(dst, src,
size);
6148 riscv_dsp_convert_q31_f32(src, dst,
size);
6161 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6162 riscv_dsp_convert_q31_q15(src, dst,
size);
6174 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6175 riscv_dsp_convert_q31_q7(src, dst,
size);
6187 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6188 riscv_dsp_convert_q7_f32(src, dst,
size);
6200 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6201 riscv_dsp_convert_q7_q15(src, dst,
size);
6213 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6214 riscv_dsp_convert_q7_q31(src, dst,
size);
6227 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6228 riscv_dsp_dup_f32(src, dst,
size);
6240 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6241 riscv_dsp_dup_q15(src, dst,
size);
6253 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6254 riscv_dsp_dup_q31(src, dst,
size);
6266 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6267 riscv_dsp_dup_q7(src, dst,
size);
6280 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6281 riscv_dsp_set_f32(val, dst,
size);
6293 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6294 riscv_dsp_set_q15(val, dst,
size);
6306 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6307 riscv_dsp_set_q31(val, dst,
size);
6319 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6320 riscv_dsp_set_q7(val, dst,
size);
6334 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6335 return riscv_dsp_weighted_sum_f32(src, weight,
size);
6348 static inline void hpm_dsp_barycenter_f32(
const float32_t *src,
const float32_t *weights, float32_t *out, uint32_t numofvec, uint32_t dimofvec)
6350 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6351 riscv_dsp_barycenter_f32(src, weights, out, numofvec, dimofvec);
6362 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6363 return riscv_dsp_exp_f32(src);
6367 #if defined (__riscv_zfh)
6373 static inline float16_t hpm_dsp_exp_f16(float16_t src)
6375 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6376 return riscv_dsp_exp_f16(src);
6388 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6389 return riscv_dsp_sigmoid_f32(src);
6393 #if defined (__riscv_zfh)
6399 static inline float16_t hpm_dsp_sigmoid_f16(float16_t src)
6401 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6402 return riscv_dsp_sigmoid_f16(src);
6414 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6415 return riscv_dsp_log_f32(src);
6419 #if defined (__riscv_zfh)
6425 static inline float16_t hpm_dsp_log_f16(float16_t src)
6427 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6428 return riscv_dsp_log_f16(src);
6442 #ifdef HPM_MATH_DSP_SORT
6454 #ifdef HPM_EN_MATH_DSP_LIB
6455 #include "riscv_dsp_sort_math.h"
6478 static inline void hpm_dsp_sort_init_f32(riscv_dsp_sort_f32_t * instance, riscv_dsp_sort_alg alg, riscv_dsp_sort_order order)
6480 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6481 riscv_dsp_sort_init_f32(instance, alg, order);
6525 static inline void hpm_dsp_sort_f32(
const riscv_dsp_sort_f32_t * instance,float32_t * src, float32_t * dst, uint32_t
size)
6527 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6528 riscv_dsp_sort_f32(instance, src, dst,
size);
6549 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6550 riscv_dsp_sort_merge_init_f32(instance, order, buf);
6589 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6590 riscv_dsp_sort_merge_f32(instance, src, dst,
size);
6597 #ifdef HPM_MATH_NN_TINYENGINE
6598 #ifdef HPM_EN_MATH_DSP_LIB
6600 #include "riscv_math_types.h"
6602 #include "riscv_simd_convert.h"
6604 #define LEFT_SHIFT(_shift) (_shift > 0 ? _shift : 0)
6605 #define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift)
6606 #define Q31_MAX ((q31_t)(0x7FFFFFFFL))
6607 #define Q31_MIN ((q31_t)(0x80000000L))
6614 (*pQ15)[0] = (val & 0x0FFFF);
6615 (*pQ15)[1] = (val >> 16) & 0x0FFFF;
6629 val = *(q31_t *)(*in_q15);
6646 q63_t mult = 1 << 30;
6648 if ((m1 < 0) ^ (m2 < 0)) {
6651 mult = mult + (q63_t)m1 * m2;
6652 result = mult / (1UL << 31);
6654 if ((m1 == m2) && (m1 == (int32_t)
Q31_MIN)) {
6672 const q31_t remainder_mask = (1l << exponent) - 1;
6673 int32_t remainder = remainder_mask & dividend;
6675 result = dividend >> exponent;
6676 q31_t threshold = remainder_mask >> 1;
6680 if (remainder > threshold) {
6687 __STATIC_FORCEINLINE q31_t
hpm_nn_requantize(
const q31_t val,
const q31_t multiplier,
const q31_t shift)
6702 val = *(q31_t *)(*in_q7);
6716 *out2 = __SXTB16_ROR(inA, 8);
6717 *out1 = __SXTB16(inA);
6726 __STATIC_FORCEINLINE
const q7_t *
read_and_pad(
const q7_t *source, q31_t *out1, q31_t *out2)
6729 q31_t inAbuf1 = __SXTB16_ROR(inA, 8);
6730 q31_t inAbuf2 = __SXTB16(inA);
6732 *out2 = __PKHTB(inAbuf1, inAbuf2, 16);
6733 *out1 = __PKHBT(inAbuf2, inAbuf1, 16);
6747 val = *(int32_t *)(*in_s8);
6761 int32_t out_q15x2_1;
6762 int32_t out_q15x2_2;
6765 block_cnt = block_size >> 2;
6768 const int32_t offset_q15x2 = __PKHBT(offset, offset, 16);
6769 while (block_cnt > 0) {
6774 in_q15x2_1 = __SXTAB16(offset_q15x2, __ROR(in_q7x4, 8));
6775 in_q15x2_2 = __SXTAB16(offset_q15x2, in_q7x4);
6777 out_q15x2_2 = __PKHTB(in_q15x2_1, in_q15x2_2, 16);
6778 out_q15x2_1 = __PKHBT(in_q15x2_2, in_q15x2_1, 16);
6786 block_cnt = block_size % 0x4;
6788 while (block_cnt > 0) {
6789 *dst++ = (int16_t)*src++ + offset;
6799 #ifdef HPM_MATH_NN_ACTIVATION
6800 #ifdef HPM_EN_MATH_NN_LIB
6801 #if defined(__zcc__)
6802 #include "tpt_nn_activation.h"
6804 #include "riscv_nn_activation.h"
6840 riscv_nn_activation_fun act_fun)
6842 #if defined(__zcc__)
6843 tpt_nn_activate_s8(in_out,
size, int_bits, act_fun);
6845 riscv_nn_activate_s8(in_out,
size, int_bits, act_fun);
6867 riscv_nn_activation_fun act_fun)
6869 #if defined(__zcc__)
6870 tpt_nn_activate_s16(in_out,
size, int_bits, act_fun);
6872 riscv_nn_activate_s16(in_out,
size, int_bits, act_fun);
6896 #if defined(__zcc__)
6897 tpt_nn_leaky_relu_q7(in_out, in_out,
size,
slope);
6912 #if defined(__zcc__)
6913 tpt_nn_relu_any_q7(data,
size, max_val);
6915 riscv_nn_relu_any_s8(data,
size, max_val);
6937 #if defined(__zcc__)
6938 tpt_nn_relu_q7(in_out,
size);
6940 riscv_nn_relu_s8(in_out,
size);
6952 #if defined(__zcc__)
6953 tpt_nn_relu_q15(in_out,
size);
6955 riscv_nn_relu_s16(in_out,
size);
6969 static inline int32_t hpm_nn_sigmoid_f16(
const float16_t *in_vec,
6973 #if defined(__zcc__)
6976 return riscv_nn_sigmoid_f16(in_vec,
size,
out_vec);
6988 static inline int32_t hpm_nn_tanh_f16(
const float16_t *in_vec,
6992 #if defined(__zcc__)
7006 #ifdef HPM_MATH_NN_BASIC
7007 #ifdef HPM_EN_MATH_NN_LIB
7008 #if defined(__zcc__)
7009 #include "tpt_nn_basic.h"
7011 #include "riscv_nn_basic.h"
7059 const q7_t *in_tensor2,
7060 const int16_t *scale1,
7061 const int16_t *scale2,
7062 const uint32_t
size,
7063 const uint16_t pre_rshift,
7064 const uint16_t out_scale,
7065 const uint16_t post_rshift,
7068 #if defined(__zcc__)
7069 tpt_nn_add_s8_sym(in_tensor1, in_tensor2, scale1, scale2,
size, pre_rshift,
7070 out_scale, post_rshift, out);
7072 riscv_nn_add_s8_sym(in_tensor1, in_tensor2, scale1, scale2,
size, pre_rshift,
7073 out_scale, post_rshift, out);
7097 const q7_t *in_tensor2,
7098 const uint32_t scale1,
7099 const uint32_t scale2,
7100 const uint32_t
size,
7101 const uint16_t pre_rshift,
7102 const uint16_t out_scale,
7103 const uint16_t post_rshift,
7106 #if defined(__zcc__)
7107 tpt_nn_add_s8_sym_round(in_tensor1, in_tensor2, scale1, scale2,
size,
7108 pre_rshift, out_scale, post_rshift, out);
7110 riscv_nn_add_s8_sym_round(in_tensor1, in_tensor2, scale1, scale2,
size,
7111 pre_rshift, out_scale, post_rshift, out);
7167 const int8_t *in_tensor2,
7168 const int32_t in_offset1,
7169 const int32_t in_scale1,
7170 const int32_t in_rshift1,
7171 const int32_t in_offset2,
7172 const int32_t in_scale2,
7173 const int32_t in_rshift2,
7174 const int32_t lshift,
7176 const int32_t out_offset,
7177 const int32_t out_scale,
7179 const int32_t act_min,
7180 const int32_t act_max,
7181 const uint32_t
size)
7183 #if defined(__zcc__)
7184 return tpt_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
7185 in_rshift1, in_offset2, in_scale2, in_rshift2,
7186 lshift, out, out_offset, out_scale,
out_rshift,
7187 act_min, act_max,
size);
7189 return riscv_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
7190 in_rshift1, in_offset2, in_scale2, in_rshift2,
7191 lshift, out, out_offset, out_scale,
out_rshift,
7192 act_min, act_max,
size);
7236 const int8_t *in_tensor2,
7237 const int32_t in_offset1,
7238 const int32_t in_offset2,
7240 const int32_t out_offset,
7241 const int32_t out_scale,
7242 const int32_t out_shift,
7243 const int32_t act_min,
7244 const int32_t act_max,
7245 const uint32_t
size)
7247 #if defined(__zcc__)
7248 return tpt_nn_ew_mul_s8_asym(in_tensor1, in_tensor2, in_offset1, in_offset2,
7249 out, out_offset, out_scale, out_shift, act_min,
7252 return riscv_nn_ew_mul_s8_asym(in_tensor1, in_tensor2, in_offset1, in_offset2,
7253 out, out_offset, out_scale, out_shift, act_min,
7264 #ifdef HPM_EN_MATH_NN_RVP32_LIB
7265 #if defined(__zcc__)
7266 #include "tpt_nn_basic.h"
7268 #include "riscv_nn_basic.h"
7324 const int8_t *in_tensor2,
7325 const int32_t in_offset1,
7326 const int32_t in_scale1,
7327 const int32_t in_rshift1,
7328 const int32_t in_offset2,
7329 const int32_t in_scale2,
7330 const int32_t in_rshift2,
7331 const int32_t lshift,
7333 const int32_t out_offset,
7334 const int32_t out_scale,
7336 const int32_t act_min,
7337 const int32_t act_max,
7338 const uint32_t
size)
7340 #if defined(__zcc__)
7341 return tpt_elementwise_add_s8(out, out_offset, out_scale, -
out_rshift, act_min,
7342 act_max, in_tensor1, in_tensor2, in_offset1, in_scale1,
7343 in_rshift1, in_offset2, in_scale2, in_rshift2,
7346 return riscv_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
7347 in_rshift1, in_offset2, in_scale2, in_rshift2,
7348 lshift, out, out_offset, out_scale,
out_rshift,
7349 act_min, act_max,
size);
7357 #ifdef HPM_MATH_NN_CONCATENATION
7358 #ifdef HPM_EN_MATH_NN_LIB
7359 #if defined(__zcc__)
7360 #include "tpt_nn_concatenation.h"
7362 #include "riscv_nn_concatenation.h"
7391 const uint16_t in_tensor_x,
7392 const uint16_t in_tensor_y,
7393 const uint16_t in_tensor_z,
7394 const uint16_t in_tensor_w,
7396 const uint32_t out_offset_w)
7398 #if defined(__zcc__)
7399 tpt_concatenation_s8_w(out_tensor, in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7400 in_tensor_w, out_offset_w);
7402 riscv_nn_concate_s8_w(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7403 in_tensor_w, out_tensor, out_offset_w);
7426 const uint16_t in_tensor_x,
7427 const uint16_t in_tensor_y,
7428 const uint16_t in_tensor_z,
7429 const uint16_t in_tensor_w,
7431 const uint16_t out_tensor_x,
7432 const uint32_t out_offset_x)
7434 #if defined(__zcc__)
7435 tpt_nn_concate_s8_x(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7436 in_tensor_w, out_tensor, out_tensor_x, out_offset_x);
7438 riscv_nn_concate_s8_x(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7439 in_tensor_w, out_tensor, out_tensor_x, out_offset_x);
7461 const uint16_t in_tensor_x,
7462 const uint16_t in_tensor_y,
7463 const uint16_t in_tensor_z,
7464 const uint16_t in_tensor_w,
7466 const uint16_t out_tensor_y,
7467 const uint32_t out_offset_y)
7469 #if defined(__zcc__)
7470 tpt_nn_concate_s8_y(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7471 in_tensor_w, out_tensor, out_tensor_y, out_offset_y);
7473 riscv_nn_concate_s8_y(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7474 in_tensor_w, out_tensor, out_tensor_y, out_offset_y);
7496 const uint16_t in_tensor_x,
7497 const uint16_t in_tensor_y,
7498 const uint16_t in_tensor_z,
7499 const uint16_t in_tensor_w,
7501 const uint16_t out_tensor_z,
7502 const uint32_t out_offset_z)
7504 #if defined(__zcc__)
7505 tpt_nn_concate_s8_z(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7506 in_tensor_w, out_tensor, out_tensor_z, out_offset_z);
7508 riscv_nn_concate_s8_z(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7509 in_tensor_w, out_tensor, out_tensor_z, out_offset_z);
7520 #ifdef HPM_MATH_NN_CONVOLUTION
7521 #ifdef HPM_EN_MATH_NN_LIB
7522 #if defined(__zcc__)
7523 #include "tpt_nn_convolution.h"
7525 #include "riscv_nn_convolution.h"
7616 const uint16_t in_tensor_dim_x,
7617 const uint16_t in_tensor_dim_y,
7618 const uint16_t in_tensor_ch,
7619 const q7_t *ker_weight,
7620 const uint16_t out_tensor_ch,
7621 const uint16_t ker_dim_x,
7622 const uint16_t ker_dim_y,
7623 const uint16_t pad_x,
7624 const uint16_t pad_y,
7625 const uint16_t stride_x,
7626 const uint16_t stride_y,
7631 const uint16_t out_tensor_dim_x,
7632 const uint16_t out_tensor_dim_y,
7636 #if defined(__zcc__)
7637 return tpt_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(
7638 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
7639 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
7643 return riscv_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(
7644 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
7645 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
7703 const uint16_t in_tensor_dim,
7704 const q7_t *ker_weight,
7705 const uint16_t out_tensor_ch,
7706 const uint16_t ker_dim,
7708 const uint16_t stride,
7713 const uint16_t out_tensor_dim,
7717 #if defined(__zcc__)
7718 return tpt_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(
7719 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7723 return riscv_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(
7724 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7782 const uint16_t in_tensor_dim,
7783 const q7_t *ker_weight,
7784 const uint16_t out_tensor_ch,
7785 const uint16_t ker_dim,
7787 const uint16_t stride,
7792 const uint16_t out_tensor_dim,
7796 #if defined(__zcc__)
7797 return tpt_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(
7798 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7802 return riscv_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(
7803 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7861 const uint16_t in_tensor_dim,
7862 const uint16_t in_tensor_ch,
7863 const q7_t *ker_weight,
7864 const uint16_t out_tensor_ch,
7865 const uint16_t ker_dim,
7867 const uint16_t stride,
7872 const uint16_t out_tensor_dim,
7876 #if defined(__zcc__)
7877 return tpt_nn_conv_HWC_s8_s8_s8_sft_bias(
7878 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
7882 return riscv_nn_conv_HWC_s8_s8_s8_sft_bias(
7883 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
7951 const uint16_t in_tensor_dim_x,
7952 const uint16_t in_tensor_dim_y,
7953 const uint16_t in_tensor_ch,
7954 const q7_t *ker_weight,
7955 const uint16_t out_tensor_ch,
7956 const uint16_t ker_dim_x,
7957 const uint16_t ker_dim_y,
7958 const uint16_t pad_x,
7959 const uint16_t pad_y,
7960 const uint16_t stride_x,
7961 const uint16_t stride_y,
7966 const uint16_t out_tensor_dim_x,
7967 const uint16_t out_tensor_dim_y,
7971 #if defined(__zcc__)
7972 tpt_nn_conv_HWC_s8_s8_s8_sft_bias_any(
7973 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
7974 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
7978 riscv_nn_conv_HWC_s8_s8_s8_sft_bias_any(
7979 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
7980 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8039 const uint16_t in_tensor_dim,
8040 const uint16_t in_tensor_ch,
8041 const q7_t *ker_weight,
8042 const uint16_t out_tensor_ch,
8043 const uint16_t ker_dim,
8045 const uint16_t stride,
8050 const uint16_t out_tensor_dim,
8054 #if defined(__zcc__)
8055 return tpt_nn_conv_HWC_s8_s8_s8_sft_bias_fast(
8056 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8060 return riscv_nn_conv_HWC_s8_s8_s8_sft_bias_fast(
8061 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8134 const uint16_t in_tensor_dim_x,
8135 const uint16_t in_tensor_dim_y,
8136 const uint16_t in_tensor_ch,
8137 const q7_t *ker_weight,
8138 const uint16_t out_tensor_ch,
8139 const uint16_t ker_dim_x,
8140 const uint16_t ker_dim_y,
8141 const uint16_t pad_x,
8142 const uint16_t pad_y,
8143 const uint16_t stride_x,
8144 const uint16_t stride_y,
8149 const uint16_t out_tensor_dim_x,
8150 const uint16_t out_tensor_dim_y,
8154 #if defined(__zcc__)
8155 return tpt_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(
8156 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8157 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8161 return riscv_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(
8162 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8163 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8221 const uint16_t in_tensor_dim,
8222 const uint16_t in_tensor_ch,
8223 const q15_t *ker_weight,
8224 const uint16_t out_tensor_ch,
8225 const uint16_t ker_dim,
8227 const uint16_t stride,
8232 const uint16_t out_tensor_dim,
8236 #if defined(__zcc__)
8237 return tpt_nn_conv_HWC_s16_s16_s16_sft_bias(
8238 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8242 return riscv_nn_conv_HWC_s16_s16_s16_sft_bias(
8243 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8302 const uint16_t in_tensor_dim,
8303 const uint16_t in_tensor_ch,
8304 const q15_t *ker_weight,
8305 const uint16_t out_tensor_ch,
8306 const uint16_t ker_dim,
8308 const uint16_t stride,
8313 const uint16_t out_tensor_dim,
8317 #if defined(__zcc__)
8318 return tpt_nn_conv_HWC_s16_s16_s16_sft_bias_fast(
8319 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8323 return riscv_nn_conv_HWC_s16_s16_s16_sft_bias_fast(
8324 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8397 const uint16_t in_tensor_dim_x,
8398 const uint16_t in_tensor_dim_y,
8399 const uint16_t in_tensor_ch,
8400 const q15_t *ker_weight,
8401 const uint16_t out_tensor_ch,
8402 const uint16_t ker_dim_x,
8403 const uint16_t ker_dim_y,
8404 const uint16_t pad_x,
8405 const uint16_t pad_y,
8406 const uint16_t stride_x,
8407 const uint16_t stride_y,
8412 const uint16_t out_tensor_dim_x,
8413 const uint16_t out_tensor_dim_y,
8417 #if defined(__zcc__)
8418 return tpt_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(
8419 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8420 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8424 return riscv_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(
8425 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8426 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8484 const uint16_t in_tensor_dim,
8485 const uint16_t in_tensor_ch,
8486 const q7_t *ker_weight,
8487 const uint16_t out_tensor_ch,
8488 const uint16_t ker_dim,
8490 const uint16_t stride,
8495 const uint16_t out_tensor_dim,
8499 #if defined(__zcc__)
8500 return tpt_nn_conv_dw_HWC_s8_s8_s8_sft_bias(
8501 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8505 return riscv_nn_conv_dw_HWC_s8_s8_s8_sft_bias(
8506 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8575 const uint16_t in_tensor_dim_x,
8576 const uint16_t in_tensor_dim_y,
8577 const uint16_t in_tensor_ch,
8578 const q7_t *ker_weight,
8579 const uint16_t out_tensor_ch,
8580 const uint16_t ker_dim_x,
8581 const uint16_t ker_dim_y,
8582 const uint16_t pad_x,
8583 const uint16_t pad_y,
8584 const uint16_t stride_x,
8585 const uint16_t stride_y,
8590 const uint16_t out_tensor_dim_x,
8591 const uint16_t out_tensor_dim_y,
8595 #if defined(__zcc__)
8596 return tpt_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(
8597 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8598 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8602 return riscv_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(
8603 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8604 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8656 const uint16_t in_tensor_dim_x,
8657 const uint16_t in_tensor_dim_y,
8658 const uint16_t in_tensor_ch,
8659 const q7_t *ker_weight,
8660 const uint16_t out_tensor_ch,
8661 const uint16_t ker_dim_x,
8662 const uint16_t ker_dim_y,
8663 const uint16_t pad_x,
8664 const uint16_t pad_y,
8665 const uint16_t stride_x,
8666 const uint16_t stride_y,
8668 const uint16_t pre_rshift,
8669 const uint16_t out_scale,
8670 const uint16_t post_rshift,
8672 const uint16_t out_tensor_dim_x,
8673 const uint16_t out_tensor_dim_y,
8676 #if defined(__zcc__)
8678 tpt_nn_conv_1x1_sym_params S1 = {stride_x, stride_y, pad_x, pad_y, pre_rshift, out_scale, post_rshift};
8679 tpt_nn_1x1_sym_dims S2 = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_dim_x, ker_dim_y,
8680 out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
8681 return tpt_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(
8682 out_tensor_ch, in_tensor, ker_weight,
bias, &S1, &S2,
in_tmp_buf);
8685 return riscv_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(
8686 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8687 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8688 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8740 const uint16_t in_tensor_dim_x,
8741 const uint16_t in_tensor_dim_y,
8742 const uint16_t in_tensor_ch,
8743 const q7_t *ker_weight,
8744 const uint16_t out_tensor_ch,
8745 const uint16_t ker_dim_x,
8746 const uint16_t ker_dim_y,
8747 const uint16_t pad_x,
8748 const uint16_t pad_y,
8749 const uint16_t stride_x,
8750 const uint16_t stride_y,
8752 const uint16_t pre_rshift,
8753 const uint16_t out_scale,
8754 const uint16_t post_rshift,
8756 const uint16_t out_tensor_dim_x,
8757 const uint16_t out_tensor_dim_y,
8760 #if defined(__zcc__)
8761 return tpt_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(
8762 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8763 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8764 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8767 return riscv_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(
8768 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8769 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8770 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8821 const uint16_t in_tensor_dim_x,
8822 const uint16_t in_tensor_dim_y,
8823 const uint16_t in_tensor_ch,
8824 const q7_t *ker_weight,
8825 const uint16_t out_tensor_ch,
8826 const uint16_t ker_dim_x,
8827 const uint16_t ker_dim_y,
8828 const uint16_t pad_x,
8829 const uint16_t pad_y,
8830 const uint16_t stride_x,
8831 const uint16_t stride_y,
8833 const uint16_t pre_rshift,
8834 const uint16_t out_scale,
8835 const uint16_t post_rshift,
8837 const uint16_t out_tensor_dim_x,
8838 const uint16_t out_tensor_dim_y,
8841 #if defined(__zcc__)
8842 return tpt_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(
8843 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8844 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8845 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8848 return riscv_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(
8849 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8850 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8851 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8903 const uint16_t in_tensor_dim_x,
8904 const uint16_t in_tensor_dim_y,
8905 const uint16_t in_tensor_ch,
8906 const q7_t *ker_weight,
8907 const uint16_t out_tensor_ch,
8908 const uint16_t ker_dim_x,
8909 const uint16_t ker_dim_y,
8910 const uint16_t pad_x,
8911 const uint16_t pad_y,
8912 const uint16_t stride_x,
8913 const uint16_t stride_y,
8915 const uint16_t pre_rshift,
8916 const uint16_t out_scale,
8917 const uint16_t post_rshift,
8919 const uint16_t out_tensor_dim_x,
8920 const uint16_t out_tensor_dim_y,
8923 #if defined(__zcc__)
8924 return tpt_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(
8925 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8926 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8927 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8930 return riscv_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(
8931 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8932 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8933 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8985 const uint16_t in_tensor_dim_x,
8986 const uint16_t in_tensor_dim_y,
8987 const uint16_t in_tensor_ch,
8988 const q7_t *ker_weight,
8989 const uint16_t out_tensor_ch,
8990 const uint16_t ker_dim_x,
8991 const uint16_t ker_dim_y,
8992 const uint16_t pad_x,
8993 const uint16_t pad_y,
8994 const uint16_t stride_x,
8995 const uint16_t stride_y,
8997 const uint16_t pre_rshift,
8998 const uint16_t out_scale,
8999 const uint16_t post_rshift,
9001 const uint16_t out_tensor_dim_x,
9002 const uint16_t out_tensor_dim_y,
9005 #if defined(__zcc__)
9006 return tpt_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(
9007 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9008 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9009 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9012 return riscv_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(
9013 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9014 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9015 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9065 const uint16_t in_tensor_dim_x,
9066 const uint16_t in_tensor_dim_y,
9067 const uint16_t in_tensor_ch,
9068 const q7_t *ker_weight,
9069 const uint16_t out_tensor_ch,
9070 const uint16_t ker_dim_x,
9071 const uint16_t ker_dim_y,
9072 const uint16_t pad_x,
9073 const uint16_t pad_y,
9074 const uint16_t stride_x,
9075 const uint16_t stride_y,
9076 const uint16_t pre_rshift,
9077 const uint16_t out_scale,
9078 const uint16_t post_rshift,
9080 const uint16_t out_tensor_dim_x,
9081 const uint16_t out_tensor_dim_y,
9084 #if defined(__zcc__)
9085 return tpt_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(
9086 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9087 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9088 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9091 return riscv_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(
9092 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9093 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9094 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9145 const uint16_t in_tensor_dim_x,
9146 const uint16_t in_tensor_dim_y,
9147 const uint16_t in_tensor_ch,
9148 const q7_t *ker_weight,
9149 const uint16_t out_tensor_ch,
9150 const uint16_t ker_dim_x,
9151 const uint16_t ker_dim_y,
9152 const uint16_t pad_x,
9153 const uint16_t pad_y,
9154 const uint16_t stride_x,
9155 const uint16_t stride_y,
9156 const uint16_t pre_rshift,
9157 const uint16_t out_scale,
9158 const uint16_t post_rshift,
9160 const uint16_t out_tensor_dim_x,
9161 const uint16_t out_tensor_dim_y,
9164 #if defined(__zcc__)
9165 return tpt_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(
9166 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9167 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9168 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9171 return riscv_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(
9172 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9173 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9174 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9224 const uint16_t in_tensor_dim_x,
9225 const uint16_t in_tensor_dim_y,
9226 const uint16_t in_tensor_ch,
9227 const q7_t *ker_weight,
9228 const uint16_t out_tensor_ch,
9229 const uint16_t ker_dim_x,
9230 const uint16_t ker_dim_y,
9231 const uint16_t pad_x,
9232 const uint16_t pad_y,
9233 const uint16_t stride_x,
9234 const uint16_t stride_y,
9235 const uint16_t pre_rshift,
9236 const uint16_t out_scale,
9237 const uint16_t post_rshift,
9239 const uint16_t out_tensor_dim_x,
9240 const uint16_t out_tensor_dim_y,
9243 #if defined(__zcc__)
9244 return tpt_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(
9245 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9246 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9247 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9250 return riscv_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(
9251 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9252 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9253 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9304 const uint16_t in_tensor_dim_x,
9305 const uint16_t in_tensor_dim_y,
9306 const uint16_t in_tensor_ch,
9307 const q7_t *ker_weight,
9308 const uint16_t out_tensor_ch,
9309 const uint16_t ker_dim_x,
9310 const uint16_t ker_dim_y,
9311 const uint16_t pad_x,
9312 const uint16_t pad_y,
9313 const uint16_t stride_x,
9314 const uint16_t stride_y,
9315 const uint16_t pre_rshift,
9316 const uint16_t out_scale,
9317 const uint16_t post_rshift,
9319 const uint16_t out_tensor_dim_x,
9320 const uint16_t out_tensor_dim_y,
9323 #if defined(__zcc__)
9324 return tpt_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(
9325 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9326 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9327 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9330 return riscv_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(
9331 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9332 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9333 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9384 const uint16_t in_tensor_dim_x,
9385 const uint16_t in_tensor_dim_y,
9386 const uint16_t in_tensor_ch,
9387 const q7_t *ker_weight,
9388 const uint16_t out_tensor_ch,
9389 const uint16_t ker_dim_x,
9390 const uint16_t ker_dim_y,
9391 const uint16_t pad_x,
9392 const uint16_t pad_y,
9393 const uint16_t stride_x,
9394 const uint16_t stride_y,
9395 const uint16_t pre_rshift,
9396 const uint16_t out_scale,
9397 const uint16_t post_rshift,
9399 const uint16_t out_tensor_dim_x,
9400 const uint16_t out_tensor_dim_y,
9403 #if defined(__zcc__)
9404 return tpt_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(
9405 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9406 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9407 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9410 return riscv_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(
9411 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9412 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9413 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9450 const uint16_t in_tensor_dim,
9451 const q7_t *ker_weight,
9452 const uint16_t out_tensor_ch,
9453 const uint16_t ker_dim,
9455 const uint16_t stride,
9457 const uint16_t pre_rshift,
9458 const uint16_t out_scale,
9459 const uint16_t post_rshift,
9461 const uint16_t out_tensor_dim,
9465 #if defined(__zcc__)
9466 return tpt_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(
9467 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9468 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9471 return riscv_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(
9472 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9473 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9511 const uint16_t in_tensor_dim,
9512 const q7_t *ker_weight,
9513 const uint16_t out_tensor_ch,
9514 const uint16_t ker_dim,
9516 const uint16_t stride,
9518 const uint16_t pre_rshift,
9519 const uint16_t out_scale,
9520 const uint16_t post_rshift,
9522 const uint16_t out_tensor_dim,
9526 #if defined(__zcc__)
9527 return tpt_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(
9528 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9529 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9532 return riscv_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(
9533 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9534 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9571 const uint16_t in_tensor_dim,
9572 const q7_t *ker_weight,
9573 const uint16_t out_tensor_ch,
9574 const uint16_t ker_dim,
9576 const uint16_t stride,
9578 const uint16_t pre_rshift,
9579 const uint16_t out_scale,
9580 const uint16_t post_rshift,
9582 const uint16_t out_tensor_dim,
9586 #if defined(__zcc__)
9587 return tpt_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(
9588 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9589 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9592 return riscv_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(
9593 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9594 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9631 const uint16_t in_tensor_dim,
9632 const q7_t *ker_weight,
9633 const uint16_t out_tensor_ch,
9634 const uint16_t ker_dim,
9636 const uint16_t stride,
9638 const uint16_t pre_rshift,
9639 const uint16_t out_scale,
9640 const uint16_t post_rshift,
9642 const uint16_t out_tensor_dim,
9646 #if defined(__zcc__)
9647 return tpt_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(
9648 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9649 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9652 return riscv_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(
9653 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9654 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9692 const uint16_t in_tensor_dim,
9693 const q7_t *ker_weight,
9694 const uint16_t out_tensor_ch,
9695 const uint16_t ker_dim,
9697 const uint16_t stride,
9699 const uint16_t pre_rshift,
9700 const uint16_t out_scale,
9701 const uint16_t post_rshift,
9703 const uint16_t out_tensor_dim,
9707 #if defined(__zcc__)
9708 return tpt_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(
9709 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9710 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9713 return riscv_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(
9714 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9715 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9751 const uint16_t in_tensor_dim,
9752 const q7_t *ker_weight,
9753 const uint16_t out_tensor_ch,
9754 const uint16_t ker_dim,
9756 const uint16_t stride,
9757 const uint16_t pre_rshift,
9758 const uint16_t out_scale,
9759 const uint16_t post_rshift,
9761 const uint16_t out_tensor_dim,
9765 #if defined(__zcc__)
9766 return tpt_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(
9767 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9768 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9771 return riscv_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(
9772 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9773 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9809 const uint16_t in_tensor_dim,
9810 const q7_t *ker_weight,
9811 const uint16_t out_tensor_ch,
9812 const uint16_t ker_dim,
9814 const uint16_t stride,
9815 const uint16_t pre_rshift,
9816 const uint16_t out_scale,
9817 const uint16_t post_rshift,
9819 const uint16_t out_tensor_dim,
9823 #if defined(__zcc__)
9824 return tpt_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(
9825 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9826 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9829 return riscv_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(
9830 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9831 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9867 const uint16_t in_tensor_dim,
9868 const q7_t *ker_weight,
9869 const uint16_t out_tensor_ch,
9870 const uint16_t ker_dim,
9872 const uint16_t stride,
9873 const uint16_t pre_rshift,
9874 const uint16_t out_scale,
9875 const uint16_t post_rshift,
9877 const uint16_t out_tensor_dim,
9881 #if defined(__zcc__)
9882 return tpt_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(
9883 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9884 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9887 return riscv_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(
9888 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9889 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9925 const uint16_t in_tensor_dim,
9926 const q7_t *ker_weight,
9927 const uint16_t out_tensor_ch,
9928 const uint16_t ker_dim,
9930 const uint16_t stride,
9931 const uint16_t pre_rshift,
9932 const uint16_t out_scale,
9933 const uint16_t post_rshift,
9935 const uint16_t out_tensor_dim,
9939 #if defined(__zcc__)
9940 return tpt_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(
9941 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9942 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9945 return riscv_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(
9946 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9947 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9983 const uint16_t in_tensor_dim,
9984 const q7_t *ker_weight,
9985 const uint16_t out_tensor_ch,
9986 const uint16_t ker_dim,
9988 const uint16_t stride,
9989 const uint16_t pre_rshift,
9990 const uint16_t out_scale,
9991 const uint16_t post_rshift,
9993 const uint16_t out_tensor_dim,
9997 #if defined(__zcc__)
9998 return tpt_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(
9999 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
10000 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
10003 return riscv_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(
10004 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
10005 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
10041 const uint16_t in_tensor_dim,
10042 const uint16_t in_tensor_ch,
10043 const q7_t *ker_weight,
10044 const uint16_t out_tensor_ch,
10045 const uint16_t ker_dim,
10046 const uint16_t pad,
10047 const uint16_t stride,
10049 const uint16_t pre_rshift,
10050 const uint16_t out_scale,
10051 const uint16_t post_rshift,
10053 const uint16_t out_tensor_dim,
10056 #if defined(__zcc__)
10057 return tpt_nn_conv_HWC_s8_s8_s8_sym_bias_fast(
10058 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10059 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10062 return riscv_nn_conv_HWC_s8_s8_s8_sym_bias_fast(
10063 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10064 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10100 const uint16_t in_tensor_dim,
10101 const uint16_t in_tensor_ch,
10102 const q7_t *ker_weight,
10103 const uint16_t out_tensor_ch,
10104 const uint16_t ker_dim,
10105 const uint16_t pad,
10106 const uint16_t stride,
10108 const uint16_t pre_rshift,
10109 const uint16_t out_scale,
10110 const uint16_t post_rshift,
10112 const uint16_t out_tensor_dim,
10115 #if defined(__zcc__)
10116 return tpt_nn_conv_HWC_s8_s16_s8_sym_bias_fast(
10117 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10118 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10121 return riscv_nn_conv_HWC_s8_s16_s8_sym_bias_fast(
10122 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10123 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10159 const uint16_t in_tensor_dim,
10160 const uint16_t in_tensor_ch,
10161 const q7_t *ker_weight,
10162 const uint16_t out_tensor_ch,
10163 const uint16_t ker_dim,
10164 const uint16_t pad,
10165 const uint16_t stride,
10167 const uint16_t pre_rshift,
10168 const uint16_t out_scale,
10169 const uint16_t post_rshift,
10171 const uint16_t out_tensor_dim,
10174 #if defined(__zcc__)
10175 return tpt_nn_conv_HWC_u8_u8_s8_sym_bias_fast(
10176 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10177 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10180 return riscv_nn_conv_HWC_u8_u8_s8_sym_bias_fast(
10181 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10182 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10218 const uint16_t in_tensor_dim,
10219 const uint16_t in_tensor_ch,
10220 const q7_t *ker_weight,
10221 const uint16_t out_tensor_ch,
10222 const uint16_t ker_dim,
10223 const uint16_t pad,
10224 const uint16_t stride,
10226 const uint16_t pre_rshift,
10227 const uint16_t out_scale,
10228 const uint16_t post_rshift,
10230 const uint16_t out_tensor_dim,
10233 #if defined(__zcc__)
10234 return tpt_nn_conv_HWC_u8_s8_s8_sym_bias_fast(
10235 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10236 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10239 return riscv_nn_conv_HWC_u8_s8_s8_sym_bias_fast(
10240 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10241 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10277 const uint16_t in_tensor_dim,
10278 const uint16_t in_tensor_ch,
10279 const q7_t *ker_weight,
10280 const uint16_t out_tensor_ch,
10281 const uint16_t ker_dim,
10282 const uint16_t pad,
10283 const uint16_t stride,
10285 const uint16_t pre_rshift,
10286 const uint16_t out_scale,
10287 const uint16_t post_rshift,
10289 const uint16_t out_tensor_dim,
10292 #if defined(__zcc__)
10293 return tpt_nn_conv_HWC_u8_s16_s8_sym_bias_fast(
10294 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10295 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10298 return riscv_nn_conv_HWC_u8_s16_s8_sym_bias_fast(
10299 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10300 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10335 const uint16_t in_tensor_dim,
10336 const uint16_t in_tensor_ch,
10337 const q7_t *ker_weight,
10338 const uint16_t out_tensor_ch,
10339 const uint16_t ker_dim,
10340 const uint16_t pad,
10341 const uint16_t stride,
10342 const uint16_t pre_rshift,
10343 const uint16_t out_scale,
10344 const uint16_t post_rshift,
10346 const uint16_t out_tensor_dim,
10349 #if defined(__zcc__)
10350 return tpt_nn_conv_HWC_s8_s8_s8_sym_fast(
10351 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10352 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10355 return riscv_nn_conv_HWC_s8_s8_s8_sym_fast(
10356 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10357 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10392 const uint16_t in_tensor_dim,
10393 const uint16_t in_tensor_ch,
10394 const q7_t *ker_weight,
10395 const uint16_t out_tensor_ch,
10396 const uint16_t ker_dim,
10397 const uint16_t pad,
10398 const uint16_t stride,
10399 const uint16_t pre_rshift,
10400 const uint16_t out_scale,
10401 const uint16_t post_rshift,
10403 const uint16_t out_tensor_dim,
10406 #if defined(__zcc__)
10407 return tpt_nn_conv_HWC_s8_s16_s8_sym_fast(
10408 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10409 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10412 return riscv_nn_conv_HWC_s8_s16_s8_sym_fast(
10413 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10414 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10449 const uint16_t in_tensor_dim,
10450 const uint16_t in_tensor_ch,
10451 const q7_t *ker_weight,
10452 const uint16_t out_tensor_ch,
10453 const uint16_t ker_dim,
10454 const uint16_t pad,
10455 const uint16_t stride,
10456 const uint16_t pre_rshift,
10457 const uint16_t out_scale,
10458 const uint16_t post_rshift,
10460 const uint16_t out_tensor_dim,
10463 #if defined(__zcc__)
10464 return tpt_nn_conv_HWC_u8_u8_s8_sym_fast(
10465 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10466 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10469 return riscv_nn_conv_HWC_u8_u8_s8_sym_fast(
10470 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10471 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10506 const uint16_t in_tensor_dim,
10507 const uint16_t in_tensor_ch,
10508 const q7_t *ker_weight,
10509 const uint16_t out_tensor_ch,
10510 const uint16_t ker_dim,
10511 const uint16_t pad,
10512 const uint16_t stride,
10513 const uint16_t pre_rshift,
10514 const uint16_t out_scale,
10515 const uint16_t post_rshift,
10517 const uint16_t out_tensor_dim,
10520 #if defined(__zcc__)
10521 return tpt_nn_conv_HWC_u8_s8_s8_sym_fast(
10522 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10523 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10526 return riscv_nn_conv_HWC_u8_s8_s8_sym_fast(
10527 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10528 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10563 const uint16_t in_tensor_dim,
10564 const uint16_t in_tensor_ch,
10565 const q7_t *ker_weight,
10566 const uint16_t out_tensor_ch,
10567 const uint16_t ker_dim,
10568 const uint16_t pad,
10569 const uint16_t stride,
10570 const uint16_t pre_rshift,
10571 const uint16_t out_scale,
10572 const uint16_t post_rshift,
10574 const uint16_t out_tensor_dim,
10577 #if defined(__zcc__)
10578 return tpt_nn_conv_HWC_u8_s16_s8_sym_fast(
10579 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10580 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10583 return riscv_nn_conv_HWC_u8_s16_s8_sym_fast(
10584 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10585 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10627 const uint16_t in_tensor_dim_x,
10628 const uint16_t in_tensor_dim_y,
10629 const uint16_t in_tensor_ch,
10630 const q7_t *ker_weight,
10631 const uint16_t out_tensor_ch,
10632 const uint16_t ker_dim_x,
10633 const uint16_t ker_dim_y,
10634 const uint16_t pad_x,
10635 const uint16_t pad_y,
10636 const uint16_t stride_x,
10637 const uint16_t stride_y,
10639 const uint16_t pre_rshift,
10640 const uint16_t out_scale,
10641 const uint16_t post_rshift,
10643 const uint16_t out_tensor_dim_x,
10644 const uint16_t out_tensor_dim_y,
10647 #if defined(__zcc__)
10648 return tpt_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(
10649 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10650 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10651 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10654 return riscv_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(
10655 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10656 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10657 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10700 const uint16_t in_tensor_dim_x,
10701 const uint16_t in_tensor_dim_y,
10702 const uint16_t in_tensor_ch,
10703 const q7_t *ker_weight,
10704 const uint16_t out_tensor_ch,
10705 const uint16_t ker_dim_x,
10706 const uint16_t ker_dim_y,
10707 const uint16_t pad_x,
10708 const uint16_t pad_y,
10709 const uint16_t stride_x,
10710 const uint16_t stride_y,
10712 const uint16_t pre_rshift,
10713 const uint16_t out_scale,
10714 const uint16_t post_rshift,
10716 const uint16_t out_tensor_dim_x,
10717 const uint16_t out_tensor_dim_y,
10720 #if defined(__zcc__)
10721 return tpt_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(
10722 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10723 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10724 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10727 return riscv_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(
10728 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10729 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10730 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10772 const uint16_t in_tensor_dim_x,
10773 const uint16_t in_tensor_dim_y,
10774 const uint16_t in_tensor_ch,
10775 const q7_t *ker_weight,
10776 const uint16_t out_tensor_ch,
10777 const uint16_t ker_dim_x,
10778 const uint16_t ker_dim_y,
10779 const uint16_t pad_x,
10780 const uint16_t pad_y,
10781 const uint16_t stride_x,
10782 const uint16_t stride_y,
10784 const uint16_t pre_rshift,
10785 const uint16_t out_scale,
10786 const uint16_t post_rshift,
10788 const uint16_t out_tensor_dim_x,
10789 const uint16_t out_tensor_dim_y,
10792 #if defined(__zcc__)
10793 return tpt_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(
10794 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10795 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10796 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10799 return riscv_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(
10800 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10801 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10802 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10845 const uint16_t in_tensor_dim_x,
10846 const uint16_t in_tensor_dim_y,
10847 const uint16_t in_tensor_ch,
10848 const q7_t *ker_weight,
10849 const uint16_t out_tensor_ch,
10850 const uint16_t ker_dim_x,
10851 const uint16_t ker_dim_y,
10852 const uint16_t pad_x,
10853 const uint16_t pad_y,
10854 const uint16_t stride_x,
10855 const uint16_t stride_y,
10857 const uint16_t pre_rshift,
10858 const uint16_t out_scale,
10859 const uint16_t post_rshift,
10861 const uint16_t out_tensor_dim_x,
10862 const uint16_t out_tensor_dim_y,
10865 #if defined(__zcc__)
10866 return tpt_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(
10867 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10868 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10869 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10872 return riscv_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(
10873 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10874 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10875 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10918 const uint16_t in_tensor_dim_x,
10919 const uint16_t in_tensor_dim_y,
10920 const uint16_t in_tensor_ch,
10921 const q7_t *ker_weight,
10922 const uint16_t out_tensor_ch,
10923 const uint16_t ker_dim_x,
10924 const uint16_t ker_dim_y,
10925 const uint16_t pad_x,
10926 const uint16_t pad_y,
10927 const uint16_t stride_x,
10928 const uint16_t stride_y,
10930 const uint16_t pre_rshift,
10931 const uint16_t out_scale,
10932 const uint16_t post_rshift,
10934 const uint16_t out_tensor_dim_x,
10935 const uint16_t out_tensor_dim_y,
10938 #if defined(__zcc__)
10939 return tpt_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(
10940 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10941 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10942 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10945 return riscv_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(
10946 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10947 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10948 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10989 const uint16_t in_tensor_dim_x,
10990 const uint16_t in_tensor_dim_y,
10991 const uint16_t in_tensor_ch,
10992 const q7_t *ker_weight,
10993 const uint16_t out_tensor_ch,
10994 const uint16_t ker_dim_x,
10995 const uint16_t ker_dim_y,
10996 const uint16_t pad_x,
10997 const uint16_t pad_y,
10998 const uint16_t stride_x,
10999 const uint16_t stride_y,
11000 const uint16_t pre_rshift,
11001 const uint16_t out_scale,
11002 const uint16_t post_rshift,
11004 const uint16_t out_tensor_dim_x,
11005 const uint16_t out_tensor_dim_y,
11008 #if defined(__zcc__)
11009 return tpt_nn_conv_HWC_s8_s8_s8_sym_fast_any(
11010 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11011 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11012 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11015 return riscv_nn_conv_HWC_s8_s8_s8_sym_fast_any(
11016 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11017 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11018 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11059 const uint16_t in_tensor_dim_x,
11060 const uint16_t in_tensor_dim_y,
11061 const uint16_t in_tensor_ch,
11062 const q7_t *ker_weight,
11063 const uint16_t out_tensor_ch,
11064 const uint16_t ker_dim_x,
11065 const uint16_t ker_dim_y,
11066 const uint16_t pad_x,
11067 const uint16_t pad_y,
11068 const uint16_t stride_x,
11069 const uint16_t stride_y,
11070 const uint16_t pre_rshift,
11071 const uint16_t out_scale,
11072 const uint16_t post_rshift,
11074 const uint16_t out_tensor_dim_x,
11075 const uint16_t out_tensor_dim_y,
11078 #if defined(__zcc__)
11079 return tpt_nn_conv_HWC_s8_s16_s8_sym_fast_any(
11080 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11081 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11082 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11085 return riscv_nn_conv_HWC_s8_s16_s8_sym_fast_any(
11086 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11087 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11088 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11129 const uint16_t in_tensor_dim_x,
11130 const uint16_t in_tensor_dim_y,
11131 const uint16_t in_tensor_ch,
11132 const q7_t *ker_weight,
11133 const uint16_t out_tensor_ch,
11134 const uint16_t ker_dim_x,
11135 const uint16_t ker_dim_y,
11136 const uint16_t pad_x,
11137 const uint16_t pad_y,
11138 const uint16_t stride_x,
11139 const uint16_t stride_y,
11140 const uint16_t pre_rshift,
11141 const uint16_t out_scale,
11142 const uint16_t post_rshift,
11144 const uint16_t out_tensor_dim_x,
11145 const uint16_t out_tensor_dim_y,
11148 #if defined(__zcc__)
11149 return tpt_nn_conv_HWC_u8_u8_s8_sym_fast_any(
11150 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11151 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11152 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11155 return riscv_nn_conv_HWC_u8_u8_s8_sym_fast_any(
11156 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11157 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11158 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11199 const uint16_t in_tensor_dim_x,
11200 const uint16_t in_tensor_dim_y,
11201 const uint16_t in_tensor_ch,
11202 const q7_t *ker_weight,
11203 const uint16_t out_tensor_ch,
11204 const uint16_t ker_dim_x,
11205 const uint16_t ker_dim_y,
11206 const uint16_t pad_x,
11207 const uint16_t pad_y,
11208 const uint16_t stride_x,
11209 const uint16_t stride_y,
11210 const uint16_t pre_rshift,
11211 const uint16_t out_scale,
11212 const uint16_t post_rshift,
11214 const uint16_t out_tensor_dim_x,
11215 const uint16_t out_tensor_dim_y,
11218 #if defined(__zcc__)
11219 return tpt_nn_conv_HWC_u8_s8_s8_sym_fast_any(
11220 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11221 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11222 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11225 return riscv_nn_conv_HWC_u8_s8_s8_sym_fast_any(
11226 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11227 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11228 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11269 const uint16_t in_tensor_dim_x,
11270 const uint16_t in_tensor_dim_y,
11271 const uint16_t in_tensor_ch,
11272 const q7_t *ker_weight,
11273 const uint16_t out_tensor_ch,
11274 const uint16_t ker_dim_x,
11275 const uint16_t ker_dim_y,
11276 const uint16_t pad_x,
11277 const uint16_t pad_y,
11278 const uint16_t stride_x,
11279 const uint16_t stride_y,
11280 const uint16_t pre_rshift,
11281 const uint16_t out_scale,
11282 const uint16_t post_rshift,
11284 const uint16_t out_tensor_dim_x,
11285 const uint16_t out_tensor_dim_y,
11288 #if defined(__zcc__)
11289 return tpt_nn_conv_HWC_u8_s16_s8_sym_fast_any(
11290 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11291 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11292 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11295 return riscv_nn_conv_HWC_u8_s16_s8_sym_fast_any(
11296 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11297 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11298 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11335 const uint16_t in_tensor_dim,
11336 const uint16_t in_tensor_ch,
11337 const q7_t *ker_weight,
11338 const uint16_t out_tensor_ch,
11339 const uint16_t ker_dim,
11340 const uint16_t pad,
11341 const uint16_t stride,
11343 const uint16_t pre_rshift,
11344 const uint16_t out_scale,
11345 const uint16_t post_rshift,
11347 const uint16_t out_tensor_dim,
11350 #if defined(__zcc__)
11351 return tpt_nn_conv_dw_HWC_s8_s8_s8_sym_bias(
11352 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11353 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11356 return riscv_nn_conv_dw_HWC_s8_s8_s8_sym_bias(
11357 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11358 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11394 const uint16_t in_tensor_dim,
11395 const uint16_t in_tensor_ch,
11396 const q7_t *ker_weight,
11397 const uint16_t out_tensor_ch,
11398 const uint16_t ker_dim,
11399 const uint16_t pad,
11400 const uint16_t stride,
11402 const uint16_t pre_rshift,
11403 const uint16_t out_scale,
11404 const uint16_t post_rshift,
11406 const uint16_t out_tensor_dim,
11409 #if defined(__zcc__)
11410 return tpt_nn_conv_dw_HWC_s8_s16_s8_sym_bias(
11411 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11412 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11415 return riscv_nn_conv_dw_HWC_s8_s16_s8_sym_bias(
11416 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11417 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11453 const uint16_t in_tensor_dim,
11454 const uint16_t in_tensor_ch,
11455 const q7_t *ker_weight,
11456 const uint16_t out_tensor_ch,
11457 const uint16_t ker_dim,
11458 const uint16_t pad,
11459 const uint16_t stride,
11461 const uint16_t pre_rshift,
11462 const uint16_t out_scale,
11463 const uint16_t post_rshift,
11465 const uint16_t out_tensor_dim,
11468 #if defined(__zcc__)
11469 return tpt_nn_conv_dw_HWC_u8_u8_s8_sym_bias(
11470 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11471 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11474 return riscv_nn_conv_dw_HWC_u8_u8_s8_sym_bias(
11475 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11476 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11512 const uint16_t in_tensor_dim,
11513 const uint16_t in_tensor_ch,
11514 const q7_t *ker_weight,
11515 const uint16_t out_tensor_ch,
11516 const uint16_t ker_dim,
11517 const uint16_t pad,
11518 const uint16_t stride,
11520 const uint16_t pre_rshift,
11521 const uint16_t out_scale,
11522 const uint16_t post_rshift,
11524 const uint16_t out_tensor_dim,
11527 #if defined(__zcc__)
11528 return tpt_nn_conv_dw_HWC_u8_s8_s8_sym_bias(
11529 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11530 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11533 return riscv_nn_conv_dw_HWC_u8_s8_s8_sym_bias(
11534 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11535 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11571 const uint16_t in_tensor_dim,
11572 const uint16_t in_tensor_ch,
11573 const q7_t *ker_weight,
11574 const uint16_t out_tensor_ch,
11575 const uint16_t ker_dim,
11576 const uint16_t pad,
11577 const uint16_t stride,
11579 const uint16_t pre_rshift,
11580 const uint16_t out_scale,
11581 const uint16_t post_rshift,
11583 const uint16_t out_tensor_dim,
11586 #if defined(__zcc__)
11587 return tpt_nn_conv_dw_HWC_u8_s16_s8_sym_bias(
11588 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11589 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11592 return riscv_nn_conv_dw_HWC_u8_s16_s8_sym_bias(
11593 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11594 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11629 const uint16_t in_tensor_dim,
11630 const uint16_t in_tensor_ch,
11631 const q7_t *ker_weight,
11632 const uint16_t out_tensor_ch,
11633 const uint16_t ker_dim,
11634 const uint16_t pad,
11635 const uint16_t stride,
11636 const uint16_t pre_rshift,
11637 const uint16_t out_scale,
11638 const uint16_t post_rshift,
11640 const uint16_t out_tensor_dim,
11643 #if defined(__zcc__)
11644 return tpt_nn_conv_dw_HWC_s8_s8_s8_sym(
11645 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11646 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11649 return riscv_nn_conv_dw_HWC_s8_s8_s8_sym(
11650 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11651 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11686 const uint16_t in_tensor_dim,
11687 const uint16_t in_tensor_ch,
11688 const q7_t *ker_weight,
11689 const uint16_t out_tensor_ch,
11690 const uint16_t ker_dim,
11691 const uint16_t pad,
11692 const uint16_t stride,
11693 const uint16_t pre_rshift,
11694 const uint16_t out_scale,
11695 const uint16_t post_rshift,
11697 const uint16_t out_tensor_dim,
11700 #if defined(__zcc__)
11701 return tpt_nn_conv_dw_HWC_s8_s16_s8_sym(
11702 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11703 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11706 return riscv_nn_conv_dw_HWC_s8_s16_s8_sym(
11707 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11708 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11743 const uint16_t in_tensor_dim,
11744 const uint16_t in_tensor_ch,
11745 const q7_t *ker_weight,
11746 const uint16_t out_tensor_ch,
11747 const uint16_t ker_dim,
11748 const uint16_t pad,
11749 const uint16_t stride,
11750 const uint16_t pre_rshift,
11751 const uint16_t out_scale,
11752 const uint16_t post_rshift,
11754 const uint16_t out_tensor_dim,
11757 #if defined(__zcc__)
11758 return tpt_nn_conv_dw_HWC_u8_u8_s8_sym(
11759 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11760 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11763 return riscv_nn_conv_dw_HWC_u8_u8_s8_sym(
11764 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11765 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11800 const uint16_t in_tensor_dim,
11801 const uint16_t in_tensor_ch,
11802 const q7_t *ker_weight,
11803 const uint16_t out_tensor_ch,
11804 const uint16_t ker_dim,
11805 const uint16_t pad,
11806 const uint16_t stride,
11807 const uint16_t pre_rshift,
11808 const uint16_t out_scale,
11809 const uint16_t post_rshift,
11811 const uint16_t out_tensor_dim,
11814 #if defined(__zcc__)
11815 return tpt_nn_conv_dw_HWC_u8_s8_s8_sym(
11816 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11817 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11820 return riscv_nn_conv_dw_HWC_u8_s8_s8_sym(
11821 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11822 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11857 const uint16_t in_tensor_dim,
11858 const uint16_t in_tensor_ch,
11859 const q7_t *ker_weight,
11860 const uint16_t out_tensor_ch,
11861 const uint16_t ker_dim,
11862 const uint16_t pad,
11863 const uint16_t stride,
11864 const uint16_t pre_rshift,
11865 const uint16_t out_scale,
11866 const uint16_t post_rshift,
11868 const uint16_t out_tensor_dim,
11871 #if defined(__zcc__)
11872 return tpt_nn_conv_dw_HWC_u8_s16_s8_sym(
11873 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11874 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11877 return riscv_nn_conv_dw_HWC_u8_s16_s8_sym(
11878 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11879 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11921 const uint16_t in_tensor_dim_x,
11922 const uint16_t in_tensor_dim_y,
11923 const uint16_t in_tensor_ch,
11924 const q7_t *ker_weight,
11925 const uint16_t out_tensor_ch,
11926 const uint16_t ker_dim_x,
11927 const uint16_t ker_dim_y,
11928 const uint16_t pad_x,
11929 const uint16_t pad_y,
11930 const uint16_t stride_x,
11931 const uint16_t stride_y,
11933 const uint16_t pre_rshift,
11934 const uint16_t out_scale,
11935 const uint16_t post_rshift,
11937 const uint16_t out_tensor_dim_x,
11938 const uint16_t out_tensor_dim_y,
11941 #if defined(__zcc__)
11942 return tpt_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(
11943 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11944 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11945 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11948 return riscv_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(
11949 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11950 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11951 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11994 const uint16_t in_tensor_dim_x,
11995 const uint16_t in_tensor_dim_y,
11996 const uint16_t in_tensor_ch,
11997 const q7_t *ker_weight,
11998 const uint16_t out_tensor_ch,
11999 const uint16_t ker_dim_x,
12000 const uint16_t ker_dim_y,
12001 const uint16_t pad_x,
12002 const uint16_t pad_y,
12003 const uint16_t stride_x,
12004 const uint16_t stride_y,
12006 const uint16_t pre_rshift,
12007 const uint16_t out_scale,
12008 const uint16_t post_rshift,
12010 const uint16_t out_tensor_dim_x,
12011 const uint16_t out_tensor_dim_y,
12014 #if defined(__zcc__)
12015 return tpt_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(
12016 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12017 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12018 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12021 return riscv_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(
12022 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12023 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12024 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12066 const uint16_t in_tensor_dim_x,
12067 const uint16_t in_tensor_dim_y,
12068 const uint16_t in_tensor_ch,
12069 const q7_t *ker_weight,
12070 const uint16_t out_tensor_ch,
12071 const uint16_t ker_dim_x,
12072 const uint16_t ker_dim_y,
12073 const uint16_t pad_x,
12074 const uint16_t pad_y,
12075 const uint16_t stride_x,
12076 const uint16_t stride_y,
12078 const uint16_t pre_rshift,
12079 const uint16_t out_scale,
12080 const uint16_t post_rshift,
12082 const uint16_t out_tensor_dim_x,
12083 const uint16_t out_tensor_dim_y,
12086 #if defined(__zcc__)
12087 return tpt_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(
12088 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12089 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12090 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12093 return riscv_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(
12094 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12095 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12096 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12139 const uint16_t in_tensor_dim_x,
12140 const uint16_t in_tensor_dim_y,
12141 const uint16_t in_tensor_ch,
12142 const q7_t *ker_weight,
12143 const uint16_t out_tensor_ch,
12144 const uint16_t ker_dim_x,
12145 const uint16_t ker_dim_y,
12146 const uint16_t pad_x,
12147 const uint16_t pad_y,
12148 const uint16_t stride_x,
12149 const uint16_t stride_y,
12151 const uint16_t pre_rshift,
12152 const uint16_t out_scale,
12153 const uint16_t post_rshift,
12155 const uint16_t out_tensor_dim_x,
12156 const uint16_t out_tensor_dim_y,
12159 #if defined(__zcc__)
12160 return tpt_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(
12161 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12162 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12163 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12166 return riscv_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(
12167 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12168 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12169 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12212 const uint16_t in_tensor_dim_x,
12213 const uint16_t in_tensor_dim_y,
12214 const uint16_t in_tensor_ch,
12215 const q7_t *ker_weight,
12216 const uint16_t out_tensor_ch,
12217 const uint16_t ker_dim_x,
12218 const uint16_t ker_dim_y,
12219 const uint16_t pad_x,
12220 const uint16_t pad_y,
12221 const uint16_t stride_x,
12222 const uint16_t stride_y,
12224 const uint16_t pre_rshift,
12225 const uint16_t out_scale,
12226 const uint16_t post_rshift,
12228 const uint16_t out_tensor_dim_x,
12229 const uint16_t out_tensor_dim_y,
12232 #if defined(__zcc__)
12233 return tpt_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(
12234 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12235 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12236 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12239 return riscv_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(
12240 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12241 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12242 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12283 const uint16_t in_tensor_dim_x,
12284 const uint16_t in_tensor_dim_y,
12285 const uint16_t in_tensor_ch,
12286 const q7_t *ker_weight,
12287 const uint16_t out_tensor_ch,
12288 const uint16_t ker_dim_x,
12289 const uint16_t ker_dim_y,
12290 const uint16_t pad_x,
12291 const uint16_t pad_y,
12292 const uint16_t stride_x,
12293 const uint16_t stride_y,
12294 const uint16_t pre_rshift,
12295 const uint16_t out_scale,
12296 const uint16_t post_rshift,
12298 const uint16_t out_tensor_dim_x,
12299 const uint16_t out_tensor_dim_y,
12302 #if defined(__zcc__)
12303 return tpt_nn_conv_dw_HWC_s8_s8_s8_sym_any(
12304 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12305 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12306 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12309 return riscv_nn_conv_dw_HWC_s8_s8_s8_sym_any(
12310 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12311 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12312 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12354 const uint16_t in_tensor_dim_x,
12355 const uint16_t in_tensor_dim_y,
12356 const uint16_t in_tensor_ch,
12357 const q7_t *ker_weight,
12358 const uint16_t out_tensor_ch,
12359 const uint16_t ker_dim_x,
12360 const uint16_t ker_dim_y,
12361 const uint16_t pad_x,
12362 const uint16_t pad_y,
12363 const uint16_t stride_x,
12364 const uint16_t stride_y,
12365 const uint16_t pre_rshift,
12366 const uint16_t out_scale,
12367 const uint16_t post_rshift,
12369 const uint16_t out_tensor_dim_x,
12370 const uint16_t out_tensor_dim_y,
12373 #if defined(__zcc__)
12374 return tpt_nn_conv_dw_HWC_s8_s16_s8_sym_any(
12375 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12376 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12377 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12380 return riscv_nn_conv_dw_HWC_s8_s16_s8_sym_any(
12381 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12382 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12383 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12424 const uint16_t in_tensor_dim_x,
12425 const uint16_t in_tensor_dim_y,
12426 const uint16_t in_tensor_ch,
12427 const q7_t *ker_weight,
12428 const uint16_t out_tensor_ch,
12429 const uint16_t ker_dim_x,
12430 const uint16_t ker_dim_y,
12431 const uint16_t pad_x,
12432 const uint16_t pad_y,
12433 const uint16_t stride_x,
12434 const uint16_t stride_y,
12435 const uint16_t pre_rshift,
12436 const uint16_t out_scale,
12437 const uint16_t post_rshift,
12439 const uint16_t out_tensor_dim_x,
12440 const uint16_t out_tensor_dim_y,
12443 #if defined(__zcc__)
12444 return tpt_nn_conv_dw_HWC_u8_u8_s8_sym_any(
12445 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12446 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12447 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12450 return riscv_nn_conv_dw_HWC_u8_u8_s8_sym_any(
12451 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12452 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12453 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12495 const uint16_t in_tensor_dim_x,
12496 const uint16_t in_tensor_dim_y,
12497 const uint16_t in_tensor_ch,
12498 const q7_t *ker_weight,
12499 const uint16_t out_tensor_ch,
12500 const uint16_t ker_dim_x,
12501 const uint16_t ker_dim_y,
12502 const uint16_t pad_x,
12503 const uint16_t pad_y,
12504 const uint16_t stride_x,
12505 const uint16_t stride_y,
12506 const uint16_t pre_rshift,
12507 const uint16_t out_scale,
12508 const uint16_t post_rshift,
12510 const uint16_t out_tensor_dim_x,
12511 const uint16_t out_tensor_dim_y,
12514 #if defined(__zcc__)
12515 return tpt_nn_conv_dw_HWC_u8_s8_s8_sym_any(
12516 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12517 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12518 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12521 return riscv_nn_conv_dw_HWC_u8_s8_s8_sym_any(
12522 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12523 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12524 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12566 const uint16_t in_tensor_dim_x,
12567 const uint16_t in_tensor_dim_y,
12568 const uint16_t in_tensor_ch,
12569 const q7_t *ker_weight,
12570 const uint16_t out_tensor_ch,
12571 const uint16_t ker_dim_x,
12572 const uint16_t ker_dim_y,
12573 const uint16_t pad_x,
12574 const uint16_t pad_y,
12575 const uint16_t stride_x,
12576 const uint16_t stride_y,
12577 const uint16_t pre_rshift,
12578 const uint16_t out_scale,
12579 const uint16_t post_rshift,
12581 const uint16_t out_tensor_dim_x,
12582 const uint16_t out_tensor_dim_y,
12585 #if defined(__zcc__)
12586 return tpt_nn_conv_dw_HWC_u8_s16_s8_sym_any(
12587 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12588 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12589 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12592 return riscv_nn_conv_dw_HWC_u8_s16_s8_sym_any(
12593 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12594 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12595 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12647 const uint16_t in_tensor_dim_x,
12648 const uint16_t in_tensor_dim_y,
12649 const uint16_t in_tensor_ch,
12650 const uint16_t in_tensor_group,
12651 const q7_t *ker_weight,
12652 const uint16_t out_tensor_ch,
12653 const uint16_t pad_x,
12654 const uint16_t pad_y,
12655 const uint16_t stride_x,
12656 const uint16_t stride_y,
12657 const int32_t *
bias,
12659 const int32_t *out_shift,
12660 const int32_t *out_scale,
12661 const int32_t out_offset,
12662 const int32_t in_offset,
12663 const int32_t act_min,
12664 const int32_t act_max,
12665 const uint16_t out_tensor_dim_x,
12666 const uint16_t out_tensor_dim_y,
12669 #if defined(__zcc__)
12671 tpt_nn_conv_1x1_asym_params aConv_params = {in_offset, out_offset, stride_x,
12672 stride_y, pad_x, pad_y, act_min, act_max};
12674 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
12676 tpt_nn_1x1_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12677 in_tensor_group, out_tensor_ch};
12679 return tpt_convolve_1x1_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
12680 bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
12683 return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(
12684 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12685 in_tensor_group, ker_weight, out_tensor_ch, pad_x, pad_y, stride_x,
12686 stride_y,
bias, out_tensor, out_shift, out_scale, out_offset, in_offset,
12687 act_min, act_max, out_tensor_dim_x, out_tensor_dim_y, tmp_buf);
12697 static inline int32_t
12699 const uint16_t in_tensor_ch) {
12700 #if defined(__zcc__)convol
12701 return tpt_convolve_1x1_s8_s8_s8_asym_bias_any_get_buf_size(
12704 return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
12748 const uint16_t in_tensor_dim_x,
12749 const uint16_t in_tensor_ch,
12750 const uint16_t in_tensor_group,
12751 const q7_t *ker_weight,
12752 const uint16_t out_tensor_ch,
12753 const uint16_t ker_dim_x,
12754 const uint16_t pad_x,
12755 const uint16_t stride_x,
12756 const int32_t *
bias,
12758 const int32_t *out_shift,
12759 const int32_t *out_scale,
12760 const int32_t out_offset,
12761 const int32_t in_offset,
12762 const int32_t act_min,
12763 const int32_t act_max,
12764 const uint16_t out_tensor_dim_x,
12767 #if defined(__zcc__)
12769 tpt_nn_conv_1xn_asym_params aConv_params = {in_offset, out_offset, stride_x, pad_x,
12772 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
12774 tpt_nn_1xn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_ch, in_tensor_group,
12775 ker_dim_x, out_tensor_dim_x, out_tensor_ch};
12777 return tpt_convolve_1xn_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
12781 return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(
12782 in_tensor, in_tensor_dim_x, in_tensor_ch, in_tensor_group, ker_weight,
12783 out_tensor_ch, ker_dim_x, pad_x, stride_x,
bias, out_tensor, out_shift,
12784 out_scale, out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
12799 const uint16_t ker_dim_x,
12800 const uint16_t ker_dim_y)
12802 #if defined(__zcc__)
12803 return tpt_convolve_1xn_s8_s8_s8_asym_bias_any_get_buffer_size(
12804 in_tensor_ch, ker_dim_x, ker_dim_y);
12806 return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
12807 in_tensor_ch, ker_dim_x, ker_dim_y);
12853 const uint16_t in_tensor_dim_x,
12854 const uint16_t in_tensor_dim_y,
12855 const uint16_t in_tensor_ch,
12856 const uint16_t in_tensor_group,
12857 const q7_t *ker_weight,
12858 const uint16_t out_tensor_ch,
12859 const uint16_t ker_dim_x,
12860 const uint16_t ker_dim_y,
12861 const uint16_t pad_x,
12862 const uint16_t pad_y,
12863 const uint16_t stride_x,
12864 const uint16_t stride_y,
12865 const int32_t *
bias,
12867 const int32_t *out_shift,
12868 const int32_t *out_scale,
12869 const int32_t out_offset,
12870 const int32_t in_offset,
12871 const int32_t act_min,
12872 const int32_t act_max,
12873 const uint16_t out_tensor_dim_x,
12874 const uint16_t out_tensor_dim_y,
12877 #if defined(__zcc__)
12879 tpt_nn_conv_asym_params aConv_params = {stride_x, stride_y, pad_x, pad_y,
12880 in_offset, out_offset, act_min, act_max};
12882 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
12884 tpt_nn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12885 in_tensor_group, ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y,
12888 return tpt_convolve_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
12892 return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any(
12893 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12894 in_tensor_group, ker_weight, out_tensor_ch, ker_dim_x, ker_dim_y, pad_x,
12895 pad_y, stride_x, stride_y,
bias, out_tensor, out_shift, out_scale,
12896 out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
12910 const uint16_t ker_dim_x,
12911 const uint16_t ker_dim_y)
12913 #if defined(__zcc__)
12914 return tpt_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
12915 in_tensor_ch, ker_dim_x, ker_dim_y);
12917 return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
12918 in_tensor_ch, ker_dim_x, ker_dim_y);
12962 const int32_t in_tensor_dim_x,
12963 const int32_t in_tensor_dim_y,
12964 const int32_t in_tensor_ch,
12965 const int8_t *ker_weight,
12966 const int32_t out_tensor_ch,
12967 const int32_t pad_x,
12968 const int32_t pad_y,
12969 const int32_t stride_x,
12970 const int32_t stride_y,
12971 const int32_t *
bias,
12972 int8_t *out_tensor,
12973 const int32_t *out_shift,
12974 const int32_t *out_scale,
12975 const int32_t out_tensor_dim_x,
12976 const int32_t out_tensor_dim_y,
12977 const int32_t out_offset,
12978 const int32_t in_offset,
12979 const int32_t act_min,
12980 const int32_t act_max,
12981 const int32_t dilation_x,
12982 const int32_t dilation_y,
12985 #if defined(__zcc__)
12986 return tpt_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(
12987 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12988 out_tensor_ch, pad_x, pad_y, stride_x, stride_y,
bias, out_tensor,
12989 out_shift, out_scale, out_tensor_dim_x, out_tensor_dim_y, out_offset,
12990 in_offset, act_min, act_max, dilation_x, dilation_y, tmp_buf);
12992 return riscv_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(
12993 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12994 out_tensor_ch, pad_x, pad_y, stride_x, stride_y,
bias, out_tensor,
12995 out_shift, out_scale, out_tensor_dim_x, out_tensor_dim_y, out_offset,
12996 in_offset, act_min, act_max, dilation_x, dilation_y, tmp_buf);
13048 const uint16_t in_tensor_dim_x,
13049 const uint16_t in_tensor_dim_y,
13050 const uint16_t in_tensor_ch,
13051 const q7_t *ker_weight,
13052 const uint16_t out_tensor_ch,
13053 const uint16_t ch_mult,
13054 const uint16_t ker_dim_x,
13055 const uint16_t ker_dim_y,
13056 const uint16_t pad_x,
13057 const uint16_t pad_y,
13058 const uint16_t stride_x,
13059 const uint16_t stride_y,
13060 const int32_t *
bias,
13062 const int32_t *out_shift,
13063 const int32_t *out_scale,
13064 const uint16_t out_tensor_dim_x,
13065 const uint16_t out_tensor_dim_y,
13066 const int32_t out_offset,
13067 const int32_t in_offset,
13068 const int32_t act_min,
13069 const int32_t act_max,
13070 const uint16_t dilation_x,
13071 const uint16_t dilation_y,
13074 #if defined(__zcc__)
13076 tpt_nn_dw_conv_asym_params aConv_params = {in_offset, out_offset, ch_mult,
13077 stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13079 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13081 tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13082 ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13084 return tpt_depthwise_conv_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13085 bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
13089 return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(
13090 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13091 out_tensor_ch, ch_mult, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x,
13092 stride_y,
bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13093 out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13094 dilation_y, tmp_buf);
13143 const uint16_t in_tensor_dim_x,
13144 const uint16_t in_tensor_dim_y,
13145 const uint16_t in_tensor_ch,
13146 const q7_t *ker_weight,
13147 const uint16_t out_tensor_ch,
13148 const uint16_t ker_dim_x,
13149 const uint16_t ker_dim_y,
13150 const uint16_t pad_x,
13151 const uint16_t pad_y,
13152 const uint16_t stride_x,
13153 const uint16_t stride_y,
13154 const int32_t *
bias,
13156 const int32_t *out_shift,
13157 const int32_t *out_scale,
13158 const uint16_t out_tensor_dim_x,
13159 const uint16_t out_tensor_dim_y,
13160 const int32_t out_offset,
13161 const int32_t in_offset,
13162 const int32_t act_min,
13163 const int32_t act_max,
13164 const uint16_t dilation_x,
13165 const uint16_t dilation_y,
13168 #if defined(__zcc__)
13170 tpt_nn_dw_conv_asym_fast_params aConv_params = {in_offset, out_offset,
13171 stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13173 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13175 tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13176 ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13178 return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any(out_tensor, in_tensor, ker_weight,
13182 return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(
13183 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13184 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13185 bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13186 out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13200 const uint16_t ker_dim_x,
13201 const uint16_t ker_dim_y)
13203 #if defined(__zcc__)
13204 return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13205 in_tensor_ch, ker_dim_x, ker_dim_y);
13207 return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13208 in_tensor_ch, ker_dim_x, ker_dim_y);
13253 const uint16_t in_tensor_dim_x,
13254 const uint16_t in_tensor_dim_y,
13255 const uint16_t in_tensor_ch,
13256 const uint8_t *ker_weight,
13257 const uint16_t ker_dim_x,
13258 const uint16_t ker_dim_y,
13259 const int16_t ch_mult,
13260 const int16_t pad_x,
13261 const int16_t pad_y,
13262 const int16_t stride_x,
13263 const int16_t stride_y,
13264 const int16_t dilation_x,
13265 const int16_t dilation_y,
13266 const int32_t *
bias,
13267 const int32_t in_offset,
13268 const int32_t ker_offset,
13269 const int32_t out_offset,
13270 uint8_t *out_tensor,
13271 const uint16_t out_tensor_dim_x,
13272 const uint16_t out_tensor_dim_y,
13273 const int32_t act_min,
13274 const int32_t act_max,
13275 const int32_t out_shift,
13276 const int32_t out_scale)
13278 #if defined(__zcc__)
13279 return tpt_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(
13280 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13281 ker_dim_x, ker_dim_y, ch_mult, pad_x, pad_y, stride_x, stride_y,
13282 dilation_x, dilation_y,
bias, in_offset, ker_offset, out_offset,
13283 out_tensor, out_tensor_dim_x, out_tensor_dim_y, act_min, act_max,
13284 out_shift, out_scale);
13286 return riscv_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(
13287 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13288 ker_dim_x, ker_dim_y, ch_mult, pad_x, pad_y, stride_x, stride_y,
13289 dilation_x, dilation_y,
bias, in_offset, ker_offset, out_offset,
13290 out_tensor, out_tensor_dim_x, out_tensor_dim_y, act_min, act_max,
13291 out_shift, out_scale);
13331 static inline int32_t hpm_nn_conv_1x1_HWC_f16_f16_f16_bias_any(
const float16_t *in_tensor,
13332 const uint16_t in_tensor_dim_x,
13333 const uint16_t in_tensor_dim_y,
13334 const uint16_t in_tensor_ch,
13335 const float16_t *ker_weight,
13336 const uint16_t out_tensor_ch,
13337 const uint16_t ker_dim_x,
13338 const uint16_t ker_dim_y,
13339 const uint16_t pad_x,
13340 const uint16_t pad_y,
13341 const uint16_t stride_x,
13342 const uint16_t stride_y,
13343 const float16_t *
bias,
13344 float16_t *out_tensor,
13345 const uint16_t out_tensor_dim_x,
13346 const uint16_t out_tensor_dim_y,
13348 float16_t *tmp_buf)
13350 #if defined(__zcc__)
13351 return tpt_nn_conv_1x1_HWC_f16_f16_f16_bias_any(
13352 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13353 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13354 bias, out_tensor, out_tensor_dim_x, out_tensor_dim_y,
in_tmp_buf,
13357 return riscv_nn_conv_1x1_HWC_f16_f16_f16_bias_any(
13358 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13359 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13360 bias, out_tensor, out_tensor_dim_x, out_tensor_dim_y,
in_tmp_buf,
13386 static inline int32_t hpm_nn_conv_HWC_f16_f16_f16_bias(
const float16_t *in_tensor,
13387 const uint16_t in_tensor_dim,
13388 const uint16_t in_tensor_ch,
13389 const float16_t *ker_weight,
13390 const uint16_t out_tensor_ch,
13391 const uint16_t ker_dim,
13392 const uint16_t pad,
13393 const uint16_t stride,
13394 const float16_t *
bias,
13395 float16_t *out_tensor,
13396 const uint16_t out_tensor_dim,
13398 float16_t *tmp_buf)
13400 #if defined(__zcc__)
13401 return tpt_nn_conv_HWC_f16_f16_f16_bias(
13402 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13403 ker_dim, pad, stride,
bias, out_tensor, out_tensor_dim,
in_tmp_buf,
13406 return riscv_nn_conv_HWC_f16_f16_f16_bias(
13407 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13408 ker_dim, pad, stride,
bias, out_tensor, out_tensor_dim,
in_tmp_buf,
13434 static inline int32_t hpm_nn_conv_dw_HWC_f16_f16_f16_bias(
const float16_t *in_tensor,
13435 const uint16_t in_tensor_dim,
13436 const uint16_t in_tensor_ch,
13437 const float16_t *ker_weight,
13438 const uint16_t out_tensor_ch,
13439 const uint16_t ker_dim,
13440 const uint16_t pad,
13441 const uint16_t stride,
13442 const float16_t *
bias,
13443 float16_t *out_tensor,
13444 const uint16_t out_tensor_dim,
13446 float16_t *tmp_buf)
13448 #if defined(__zcc__)
13449 return tpt_nn_conv_dw_HWC_f16_f16_f16_bias(
13450 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13451 ker_dim, pad, stride,
bias, out_tensor, out_tensor_dim,
in_tmp_buf,
13454 return riscv_nn_conv_dw_HWC_f16_f16_f16_bias(
13455 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13456 ker_dim, pad, stride,
bias, out_tensor, out_tensor_dim,
in_tmp_buf,
13468 #ifdef HPM_EN_MATH_NN_RVP32_LIB
13469 #if defined(__zcc__)
13470 #include "tpt_nn_convolution.h"
13472 #include "riscv_nn_convolution.h"
13517 const uint16_t in_tensor_dim_x,
13518 const uint16_t in_tensor_dim_y,
13519 const uint16_t in_tensor_ch,
13520 const uint16_t in_tensor_group,
13521 const q7_t *ker_weight,
13522 const uint16_t out_tensor_ch,
13523 const uint16_t ker_dim_x,
13524 const uint16_t ker_dim_y,
13525 const uint16_t pad_x,
13526 const uint16_t pad_y,
13527 const uint16_t stride_x,
13528 const uint16_t stride_y,
13529 const int32_t *
bias,
13531 const int32_t *out_shift,
13532 const int32_t *out_scale,
13533 const int32_t out_offset,
13534 const int32_t in_offset,
13535 const int32_t act_min,
13536 const int32_t act_max,
13537 const uint16_t out_tensor_dim_x,
13538 const uint16_t out_tensor_dim_y,
13541 #if defined(__zcc__)
13543 tpt_nn_conv_asym_params aConv_params = {stride_x, stride_y, pad_x, pad_y,
13544 in_offset, out_offset, act_min, act_max};
13546 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13548 tpt_nn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13549 in_tensor_group, ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y,
13552 return tpt_convolve_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13556 return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any(
13557 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13558 in_tensor_group, ker_weight, out_tensor_ch, ker_dim_x, ker_dim_y, pad_x,
13559 pad_y, stride_x, stride_y,
bias, out_tensor, out_shift, out_scale,
13560 out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
13612 const uint16_t in_tensor_dim_x,
13613 const uint16_t in_tensor_dim_y,
13614 const uint16_t in_tensor_ch,
13615 const uint16_t in_tensor_group,
13616 const q7_t *ker_weight,
13617 const uint16_t out_tensor_ch,
13618 const uint16_t pad_x,
13619 const uint16_t pad_y,
13620 const uint16_t stride_x,
13621 const uint16_t stride_y,
13622 const int32_t *
bias,
13624 const int32_t *out_shift,
13625 const int32_t *out_scale,
13626 const int32_t out_offset,
13627 const int32_t in_offset,
13628 const int32_t act_min,
13629 const int32_t act_max,
13630 const uint16_t out_tensor_dim_x,
13631 const uint16_t out_tensor_dim_y,
13634 #if defined(__zcc__)
13636 tpt_nn_conv_1x1_asym_params aConv_params = {in_offset, out_offset, stride_x,
13637 stride_y, pad_x, pad_y, act_min, act_max};
13639 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13641 tpt_nn_1x1_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13642 in_tensor_group, out_tensor_ch};
13644 return tpt_convolve_1x1_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13645 bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
13648 return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(
13649 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13650 in_tensor_group, ker_weight, out_tensor_ch, pad_x, pad_y, stride_x,
13651 stride_y,
bias, out_tensor, out_shift, out_scale, out_offset, in_offset,
13652 act_min, act_max, out_tensor_dim_x, out_tensor_dim_y, tmp_buf);
13704 const uint16_t in_tensor_dim_x,
13705 const uint16_t in_tensor_dim_y,
13706 const uint16_t in_tensor_ch,
13707 const q7_t *ker_weight,
13708 const uint16_t out_tensor_ch,
13709 const uint16_t ch_mult,
13710 const uint16_t ker_dim_x,
13711 const uint16_t ker_dim_y,
13712 const uint16_t pad_x,
13713 const uint16_t pad_y,
13714 const uint16_t stride_x,
13715 const uint16_t stride_y,
13716 const int32_t *
bias,
13718 const int32_t *out_shift,
13719 const int32_t *out_scale,
13720 const uint16_t out_tensor_dim_x,
13721 const uint16_t out_tensor_dim_y,
13722 const int32_t out_offset,
13723 const int32_t in_offset,
13724 const int32_t act_min,
13725 const int32_t act_max,
13726 const uint16_t dilation_x,
13727 const uint16_t dilation_y,
13730 #if defined(__zcc__)
13732 tpt_nn_dw_conv_asym_params aConv_params = {in_offset, out_offset, ch_mult,
13733 stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13735 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13737 tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13738 ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13740 return tpt_depthwise_conv_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13741 bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
13744 return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(
13745 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13746 out_tensor_ch, ch_mult, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x,
13747 stride_y,
bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13748 out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13749 dilation_y, tmp_buf);
13793 const uint16_t in_tensor_dim_x,
13794 const uint16_t in_tensor_ch,
13795 const uint16_t in_tensor_group,
13796 const q7_t *ker_weight,
13797 const uint16_t out_tensor_ch,
13798 const uint16_t ker_dim_x,
13799 const uint16_t pad_x,
13800 const uint16_t stride_x,
13801 const int32_t *
bias,
13803 const int32_t *out_shift,
13804 const int32_t *out_scale,
13805 const int32_t out_offset,
13806 const int32_t in_offset,
13807 const int32_t act_min,
13808 const int32_t act_max,
13809 const uint16_t out_tensor_dim_x,
13812 #if defined(__zcc__)
13814 tpt_nn_conv_1xn_asym_params aConv_params = {in_offset, out_offset, stride_x, pad_x,
13817 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13819 tpt_nn_1xn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_ch, in_tensor_group,
13820 ker_dim_x, out_tensor_dim_x, out_tensor_ch};
13822 return tpt_convolve_1xn_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13826 return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(
13827 in_tensor, in_tensor_dim_x, in_tensor_ch, in_tensor_group, ker_weight,
13828 out_tensor_ch, ker_dim_x, pad_x, stride_x,
bias, out_tensor, out_shift,
13829 out_scale, out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
13879 const uint16_t in_tensor_dim_x,
13880 const uint16_t in_tensor_dim_y,
13881 const uint16_t in_tensor_ch,
13882 const q7_t *ker_weight,
13883 const uint16_t out_tensor_ch,
13884 const uint16_t ker_dim_x,
13885 const uint16_t ker_dim_y,
13886 const uint16_t pad_x,
13887 const uint16_t pad_y,
13888 const uint16_t stride_x,
13889 const uint16_t stride_y,
13890 const int32_t *
bias,
13892 const int32_t *out_shift,
13893 const int32_t *out_scale,
13894 const uint16_t out_tensor_dim_x,
13895 const uint16_t out_tensor_dim_y,
13896 const int32_t out_offset,
13897 const int32_t in_offset,
13898 const int32_t act_min,
13899 const int32_t act_max,
13900 const uint16_t dilation_x,
13901 const uint16_t dilation_y,
13904 #if defined(__zcc__)
13906 tpt_nn_dw_conv_asym_fast_params aConv_params = {in_offset, out_offset,
13907 stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13909 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13911 tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13912 ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13914 return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any(out_tensor, in_tensor, ker_weight,
13918 return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(
13919 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13920 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13921 bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13922 out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13935 #if defined(__zcc__)
13936 return tpt_convolve_1x1_s8_s8_s8_asym_bias_any_get_buf_size(
13939 return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13953 const uint16_t ker_dim_x,
13954 const uint16_t ker_dim_y)
13956 #if defined(__zcc__)
13957 return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13958 in_tensor_ch, ker_dim_x, ker_dim_y);
13960 return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13961 in_tensor_ch, ker_dim_x, ker_dim_y);
13975 const uint16_t ker_dim_x,
13976 const uint16_t ker_dim_y)
13978 #if defined(__zcc__)
13979 return tpt_convolve_1xn_s8_s8_s8_asym_bias_any_get_buffer_size(
13980 in_tensor_ch, ker_dim_x, ker_dim_y);
13982 return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
13983 in_tensor_ch, ker_dim_x, ker_dim_y);
13996 const uint16_t ker_dim_x,
13997 const uint16_t ker_dim_y)
14000 #if defined(__zcc__)
14001 return tpt_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
14002 in_tensor_ch, ker_dim_x, ker_dim_y);
14004 return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
14005 in_tensor_ch, ker_dim_x, ker_dim_y);
14013 #ifdef HPM_MATH_NN_CONNECTED
14014 #ifdef HPM_EN_MATH_NN_LIB
14015 #if defined(__zcc__)
14016 #include "tpt_nn_fully_connected.h"
14018 #include "riscv_nn_fully_connected.h"
14066 const uint16_t
size,
14073 #if defined(__zcc__)
14107 const uint16_t
size,
14115 #if defined(__zcc__)
14142 const uint16_t
size,
14150 #if defined(__zcc__)
14185 const uint16_t
size,
14193 #if defined(__zcc__)
14221 const uint16_t
size,
14229 #if defined(__zcc__)
14234 return riscv_nn_fc_mat_vec_s16_s16_s8_sft_bias(
14263 const uint16_t
size,
14271 #if defined(__zcc__)
14272 return tpt_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(
14276 return riscv_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(
14308 const uint16_t
size,
14310 const uint16_t pre_rshift,
14311 const uint16_t out_scale,
14312 const uint16_t post_rshift,
14317 #if defined(__zcc__)
14319 pre_rshift, out_scale, post_rshift,
bias,
14323 pre_rshift, out_scale, post_rshift,
bias,
14354 const uint16_t
size,
14356 const uint16_t pre_rshift,
14357 const uint16_t out_scale,
14358 const uint16_t post_rshift,
14363 #if defined(__zcc__)
14365 pre_rshift, out_scale, post_rshift,
bias,
14369 pre_rshift, out_scale, post_rshift,
14400 const uint16_t
size,
14402 const uint16_t pre_rshift,
14403 const uint16_t out_scale,
14404 const uint16_t post_rshift,
14409 #if defined(__zcc__)
14411 pre_rshift, out_scale, post_rshift,
bias,
14415 pre_rshift, out_scale, post_rshift,
bias,
14446 const uint16_t
size,
14448 const uint16_t pre_rshift,
14449 const uint16_t out_scale,
14450 const uint16_t post_rshift,
14455 #if defined(__zcc__)
14457 pre_rshift, out_scale, post_rshift,
bias,
14461 pre_rshift, out_scale, post_rshift,
bias,
14492 const uint16_t
size,
14494 const uint16_t pre_rshift,
14495 const uint16_t out_scale,
14496 const uint16_t post_rshift,
14501 #if defined(__zcc__)
14503 pre_rshift, out_scale, post_rshift,
bias,
14507 pre_rshift, out_scale, post_rshift,
14537 const uint16_t
size,
14539 const uint16_t pre_rshift,
14540 const uint16_t out_scale,
14541 const uint16_t post_rshift,
14545 #if defined(__zcc__)
14579 const uint16_t
size,
14581 const uint16_t pre_rshift,
14582 const uint16_t out_scale,
14583 const uint16_t post_rshift,
14587 #if defined(__zcc__)
14621 const uint16_t
size,
14623 const uint16_t pre_rshift,
14624 const uint16_t out_scale,
14625 const uint16_t post_rshift,
14629 #if defined(__zcc__)
14663 const uint16_t
size,
14665 const uint16_t pre_rshift,
14666 const uint16_t out_scale,
14667 const uint16_t post_rshift,
14671 #if defined(__zcc__)
14705 const uint16_t
size,
14707 const uint16_t pre_rshift,
14708 const uint16_t out_scale,
14709 const uint16_t post_rshift,
14713 #if defined(__zcc__)
14748 const uint16_t
size,
14750 const uint16_t pre_rshift,
14751 const uint16_t out_scale,
14752 const uint16_t post_rshift,
14757 #if defined(__zcc__)
14759 pre_rshift, out_scale, post_rshift,
14763 pre_rshift, out_scale, post_rshift,
14795 const uint16_t
size,
14797 const uint16_t pre_rshift,
14798 const uint16_t out_scale,
14799 const uint16_t post_rshift,
14804 #if defined(__zcc__)
14806 pre_rshift, out_scale, post_rshift,
14810 pre_rshift, out_scale, post_rshift,
14841 const uint16_t
size,
14843 const uint16_t pre_rshift,
14844 const uint16_t out_scale,
14845 const uint16_t post_rshift,
14850 #if defined(__zcc__)
14852 pre_rshift, out_scale, post_rshift,
14856 pre_rshift, out_scale, post_rshift,
14888 const uint16_t
size,
14890 const uint16_t pre_rshift,
14891 const uint16_t out_scale,
14892 const uint16_t post_rshift,
14897 #if defined(__zcc__)
14899 pre_rshift, out_scale, post_rshift,
14903 pre_rshift, out_scale, post_rshift,
14935 const uint16_t
size,
14937 const uint16_t pre_rshift,
14938 const uint16_t out_scale,
14939 const uint16_t post_rshift,
14944 #if defined(__zcc__)
14946 pre_rshift, out_scale, post_rshift,
14950 pre_rshift, out_scale, post_rshift,
14980 const uint16_t
size,
14982 const uint16_t pre_rshift,
14983 const uint16_t out_scale,
14984 const uint16_t post_rshift,
14988 #if defined(__zcc__)
14990 pre_rshift, out_scale, post_rshift,
14994 pre_rshift, out_scale, post_rshift,
15025 const uint16_t
size,
15027 const uint16_t pre_rshift,
15028 const uint16_t out_scale,
15029 const uint16_t post_rshift,
15033 #if defined(__zcc__)
15035 pre_rshift, out_scale, post_rshift,
15039 pre_rshift, out_scale, post_rshift,
15069 const uint16_t
size,
15071 const uint16_t pre_rshift,
15072 const uint16_t out_scale,
15073 const uint16_t post_rshift,
15077 #if defined(__zcc__)
15079 pre_rshift, out_scale, post_rshift,
15083 pre_rshift, out_scale, post_rshift,
15114 const uint16_t
size,
15116 const uint16_t pre_rshift,
15117 const uint16_t out_scale,
15118 const uint16_t post_rshift,
15122 #if defined(__zcc__)
15124 pre_rshift, out_scale, post_rshift,
15128 pre_rshift, out_scale, post_rshift,
15159 const uint16_t
size,
15161 const uint16_t pre_rshift,
15162 const uint16_t out_scale,
15163 const uint16_t post_rshift,
15167 #if defined(__zcc__)
15169 pre_rshift, out_scale, post_rshift,
15173 pre_rshift, out_scale, post_rshift,
15189 const uint32_t
size,
15193 #if defined(__zcc__)
15211 const uint32_t
size,
15215 #if defined(__zcc__)
15232 const uint32_t
size,
15236 #if defined(__zcc__)
15275 const uint16_t in_vec_col,
15276 const uint16_t wt_mat_row,
15277 const uint16_t in_vec_group,
15278 const int32_t in_offset,
15279 const int32_t wt_offset,
15280 const int32_t out_scale,
15281 const int32_t out_shift,
15282 const int32_t out_offset,
15283 const int32_t *
bias,
15285 const int32_t act_min,
15286 const int32_t act_max,
15289 #if defined(__zcc__)
15291 tpt_nn_fc_params_asym_s8 aFc_params = {in_offset, wt_offset, out_offset, out_scale,
15292 out_shift, act_min, act_max};
15293 tpt_nn_fc_dims_asym_s8 aFC_dims = {in_vec_col, in_vec_group, wt_mat_row};
15296 &aFC_dims, tmp_buf);
15298 return riscv_nn_fc_s8_s8_s8_asym_bias(in_vec,
wt_mat, in_vec_col, wt_mat_row,
15299 in_vec_group, in_offset, wt_offset,
15300 out_scale, out_shift, out_offset,
bias,
15301 out_vec, act_min, act_max, tmp_buf);
15314 #if defined(__zcc__)
15315 return tpt_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15317 return riscv_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15327 #ifdef HPM_EN_MATH_NN_RVP32_LIB
15328 #if defined(__zcc__)
15329 #include "tpt_nn_fully_connected.h"
15331 #include "riscv_nn_fully_connected.h"
15366 const uint16_t in_vec_col,
15367 const uint16_t wt_mat_row,
15368 const uint16_t in_vec_group,
15369 const int32_t in_offset,
15370 const int32_t wt_offset,
15371 const int32_t out_scale,
15372 const int32_t out_shift,
15373 const int32_t out_offset,
15374 const int32_t *
bias,
15376 const int32_t act_min,
15377 const int32_t act_max,
15380 #if defined(__zcc__)
15382 tpt_nn_fc_params_asym_s8 aFc_params = {in_offset, wt_offset, out_offset, out_scale,
15383 out_shift, act_min, act_max};
15384 tpt_nn_fc_dims_asym_s8 aFC_dims = {in_vec_col, in_vec_group, wt_mat_row};
15387 &aFC_dims, tmp_buf);
15389 return riscv_nn_fc_s8_s8_s8_asym_bias(in_vec,
wt_mat, in_vec_col, wt_mat_row,
15390 in_vec_group, in_offset, wt_offset,
15391 out_scale, out_shift, out_offset,
bias,
15392 out_vec, act_min, act_max, tmp_buf);
15405 #if defined(__zcc__)
15406 return tpt_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15408 return riscv_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15416 #ifdef HPM_MATH_NN_POOLING
15417 #ifdef HPM_EN_MATH_NN_LIB
15418 #if defined(__zcc__)
15419 #include "tpt_nn_pooling.h"
15421 #include "riscv_nn_pooling.h"
15467 const uint16_t in_tensor_dim,
15468 const uint16_t in_tensor_ch,
15469 const uint16_t ker_dim,
15470 const uint16_t pad,
15471 const uint16_t stride,
15472 const uint16_t out_tensor_dim,
15476 #if defined(__zcc__)
15477 tpt_nn_avepool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15478 stride, out_tensor_dim,
in_tmp_buf, out_tensor);
15480 riscv_nn_avepool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15481 stride, out_tensor_dim,
in_tmp_buf, out_tensor);
15532 const uint16_t in_tensor_dim_x,
15533 const uint16_t in_tensor_dim_y,
15534 const uint16_t in_tensor_ch,
15535 const uint16_t ker_dim_x,
15536 const uint16_t ker_dim_y,
15537 const uint16_t pad_x,
15538 const uint16_t pad_y,
15539 const uint16_t stride_x,
15540 const uint16_t stride_y,
15541 const uint16_t out_tensor_dim_x,
15542 const uint16_t out_tensor_dim_y,
15545 const uint16_t out_lshift)
15547 #if defined(__zcc__)
15548 tpt_nn_avepool_HWC_s8_any(
15549 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_dim_x,
15550 ker_dim_y, pad_x, pad_y, stride_x, stride_y, out_tensor_dim_x,
15551 out_tensor_dim_y,
in_tmp_buf, out_tensor, out_lshift);
15553 riscv_nn_avepool_HWC_s8_any(
15554 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_dim_x,
15555 ker_dim_y, pad_x, pad_y, stride_x, stride_y, out_tensor_dim_x,
15556 out_tensor_dim_y,
in_tmp_buf, out_tensor, out_lshift);
15589 const int in_tensor_dim_x,
15590 const int out_tensor_dim_y,
15591 const int out_tensor_dim_x,
15592 const int stride_y,
15593 const int stride_x,
15594 const int ker_dim_y,
15595 const int ker_dim_x,
15600 const int in_tensor_ch,
15603 int8_t *out_tensor)
15605 #if defined(__zcc__)
15607 tpt_nn_avgpool_params_act_s8 aPool_params = {stride_x, stride_y, pad_x, pad_y,
15609 tpt_nn_avgpool_dims_act_s8 aPool_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
15610 ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y};
15612 return tpt_avgpool_s8_any_act(out_tensor, in_tensor, &aPool_params, &aPool_dims,
in_tmp_buf);
15615 return riscv_nn_avepool_HWC_s8_any_act(
15616 in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15617 stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15618 in_tensor_ch, in_tensor,
in_tmp_buf, out_tensor);
15632 #if defined(__zcc__)
15633 return tpt_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15636 return riscv_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15671 const uint16_t in_tensor_dim,
15672 const uint16_t in_tensor_ch,
15673 const uint16_t ker_dim,
15674 const uint16_t pad,
15675 const uint16_t stride,
15676 const uint16_t out_tensor_dim,
15680 #if defined(__zcc__)
15681 tpt_nn_maxpool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15682 stride, out_tensor_dim,
in_tmp_buf, out_tensor);
15684 riscv_nn_maxpool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15685 stride, out_tensor_dim,
in_tmp_buf, out_tensor);
15716 const uint16_t in_tensor_dim_x,
15717 const uint16_t out_tensor_dim_y,
15718 const uint16_t out_tensor_dim_x,
15719 const uint16_t stride_y,
15720 const uint16_t stride_x,
15721 const uint16_t ker_dim_y,
15722 const uint16_t ker_dim_x,
15723 const uint16_t pad_y,
15724 const uint16_t pad_x,
15725 const int8_t act_min,
15726 const int8_t act_max,
15727 const uint16_t in_tensor_ch,
15729 int16_t *tmp_buffer,
15730 int8_t *out_tensor)
15732 #if defined(__zcc__)
15733 return tpt_nn_maxpool_HWC_s8_any_act(
15734 in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15735 stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15736 in_tensor_ch, in_tensor, tmp_buffer, out_tensor);
15738 return riscv_nn_maxpool_HWC_s8_any_act(
15739 in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15740 stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15741 in_tensor_ch, in_tensor, tmp_buffer, out_tensor);
15751 #ifdef HPM_EN_MATH_NN_RVP32_LIB
15752 #if defined(__zcc__)
15753 #include "tpt_nn_pooling.h"
15755 #include "riscv_nn_pooling.h"
15787 const int in_tensor_dim_x,
15788 const int out_tensor_dim_y,
15789 const int out_tensor_dim_x,
15790 const int stride_y,
15791 const int stride_x,
15792 const int ker_dim_y,
15793 const int ker_dim_x,
15798 const int in_tensor_ch,
15801 int8_t *out_tensor)
15803 #if defined(__zcc__)
15805 tpt_nn_avgpool_params_act_s8 aPool_params = {stride_x, stride_y, pad_x, pad_y,
15807 tpt_nn_avgpool_dims_act_s8 aPool_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
15808 ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y};
15810 return tpt_avgpool_s8_any_act(out_tensor, in_tensor, &aPool_params, &aPool_dims,
in_tmp_buf);
15813 return riscv_nn_avepool_HWC_s8_any_act(
15814 in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15815 stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15816 in_tensor_ch, in_tensor,
in_tmp_buf, out_tensor);
15830 #if defined(__zcc__)
15831 return tpt_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15834 return riscv_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15842 #ifdef HPM_MATH_NN_SOFTMAX
15843 #ifdef HPM_EN_MATH_NN_LIB
15844 #if defined(__zcc__)
15845 #include "tpt_nn_softmax.h"
15847 #include "riscv_nn_softmax.h"
15875 const uint16_t
size,
15878 #if defined(__zcc__)
15893 const uint16_t
size,
15896 #if defined(__zcc__)
15899 riscv_nn_softmax_s16_fast(in_vec,
size,
out_vec);
15918 const int32_t in_tensor_row,
15919 const int32_t in_tensor_col,
15920 const int32_t scale,
15921 const int32_t lshift,
15922 const int32_t diff_min,
15923 int8_t *out_tensor)
15925 #if defined(__zcc__)
15926 tpt_softmax_s8_hp(out_tensor, in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
15929 riscv_nn_softmax_s8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
15930 diff_min, out_tensor);
15949 const int32_t in_tensor_row,
15950 const int32_t in_tensor_col,
15951 const int32_t scale,
15952 const int32_t lshift,
15953 const int32_t diff_min,
15954 uint8_t *out_tensor)
15956 #if defined(__zcc__)
15957 tpt_nn_softmax_u8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
15958 diff_min, out_tensor);
15960 riscv_nn_softmax_u8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
15961 diff_min, out_tensor);
15971 #ifdef HPM_EN_MATH_NN_RVP32_LIB
15972 #if defined(__zcc__)
15973 #include "tpt_nn_softmax.h"
15975 #include "riscv_nn_softmax.h"
15993 const int32_t in_tensor_row,
15994 const int32_t in_tensor_col,
15995 const int32_t scale,
15996 const int32_t lshift,
15997 const int32_t diff_min,
15998 int8_t *out_tensor)
16000 #if defined(__zcc__)
16001 tpt_softmax_s8_hp(out_tensor, in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
16004 riscv_nn_softmax_s8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
16005 diff_min, out_tensor);
16012 #ifdef HPM_MATH_NN_UTIL
16013 #ifdef HPM_EN_MATH_NN_LIB
16014 #if defined(__zcc__)
16015 #include "tpt_nn_util.h"
16017 #include "riscv_nn_util.h"
16037 static inline int32_t hpm_nn_exp_f16(
const float16_t *in_vec,
16038 const uint32_t
size,
16041 #if defined(__zcc__)
16066 int8_t *out_tensor,
16067 const uint32_t
size)
16069 #if defined(__zcc__)
16070 tpt_reshape_s8(out_tensor, in_tensor,
size);
16072 riscv_nn_reshape_s8(in_tensor, out_tensor,
size);
16101 #if defined(__zcc__)
16102 return tpt_nn_top_k_s8(in_vec,
size, k, val, idx);
16104 return riscv_nn_top_k_s8(in_vec,
size, k, val, idx);
16128 static inline int32_t hpm_nn_top_k_f16(float16_t *in_vec,
16134 #if defined(__zcc__)
16135 return tpt_nn_top_k_f16(in_vec,
size, k, val, idx);
16137 return riscv_nn_top_k_f16(in_vec,
size, k, val, idx);
16148 #ifdef HPM_EN_MATH_NN_RVP32_LIB
16149 #if defined(__zcc__)
16150 #include "tpt_nn_util.h"
16152 #include "riscv_nn_util.h"
16172 int8_t *out_tensor,
16173 const uint32_t
size)
16175 #if defined(__zcc__)
16176 tpt_reshape_s8(out_tensor, in_tensor,
size);
16178 riscv_nn_reshape_s8(in_tensor, out_tensor,
size);
#define HPM_FFA
Definition: hpm_soc.h:396
static void hpm_dsp_and_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
Compute the logical bitwise AND of two u32 vectors.
Definition: hpm_math.h:1998
static void hpm_dsp_and_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
Compute the logical bitwise AND of two u8 vectors.
Definition: hpm_math.h:2016
static void hpm_dsp_clip_q31(q31_t *src, q31_t *dst, q31_t low, q31_t high, uint32_t size)
Elementwise clipping of q31 function.
Definition: hpm_math.h:1927
static void hpm_dsp_clip_f32(float32_t *src, float32_t *dst, float32_t low, float32_t high, uint32_t size)
Elementwise clipping of f32 function.
Definition: hpm_math.h:1908
static void hpm_dsp_clip_q7(q7_t *src, q7_t *dst, q7_t low, q7_t high, uint32_t size)
Elementwise clipping of q7 function.
Definition: hpm_math.h:1965
static void hpm_dsp_clip_q15(q15_t *src, q15_t *dst, q15_t low, q15_t high, uint32_t size)
Elementwise clipping of q15 function.
Definition: hpm_math.h:1946
static void hpm_dsp_not_u16(u16_t *src, u16_t *dst, uint32_t size)
Compute the logical bitwise NOT of u16 vector.
Definition: hpm_math.h:2203
static void hpm_dsp_not_u32(u32_t *src, u32_t *dst, uint32_t size)
Compute the logical bitwise NOT of u32 vector.
Definition: hpm_math.h:2186
static void hpm_dsp_not_u8(u8_t *src, u8_t *dst, uint32_t size)
Compute the logical bitwise NOT of u8 vector.
Definition: hpm_math.h:2220
static void hpm_dsp_or_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
Compute the logical bitwise OR of two u8 vectors.
Definition: hpm_math.h:2085
static void hpm_dsp_or_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
Compute the logical bitwise OR of two u16 vectors.
Definition: hpm_math.h:2067
static void hpm_dsp_or_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
Compute the logical bitwise OR of two u32 vectors.
Definition: hpm_math.h:2049
static void hpm_dsp_xor_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
Compute the logical bitwise XOR of two u32 vectors.
Definition: hpm_math.h:2118
static void hpm_dsp_xor_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
Compute the logical bitwise XOR of two u16 vectors.
Definition: hpm_math.h:2136
static void hpm_dsp_xor_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
Compute the logical bitwise XOR of two u8 vectors.
Definition: hpm_math.h:2154
static void hpm_dsp_add_u8_u16(uint8_t *src1, uint8_t *src2, uint16_t *dst, uint32_t size)
Addition of U8 vectors.
Definition: hpm_math.h:1119
static void hpm_dsp_sub_u8_q7(uint8_t *src1, uint8_t *src2, q7_t *dst, uint32_t size)
Subtraction of u8 vectors.
Definition: hpm_math.h:1218
static q31_t hpm_dsp_div_q31(q31_t src1, q31_t src2)
Division of q31 inputs.
Definition: hpm_math.h:1345
static void hpm_dsp_mul_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
Multiplication of q15 vectors.
Definition: hpm_math.h:1273
static uint32_t hpm_dsp_dprod_u8(uint8_t *src1, uint8_t *src2, uint32_t size)
Dot production of U8 vectors.
Definition: hpm_math.h:1606
static void hpm_dsp_scale_q31(q31_t *src, q31_t scalefract, int8_t shift, q31_t *dst, uint32_t size)
To multiply a q31 vectors by a q31 scale.
Definition: hpm_math.h:1739
static void hpm_dsp_add_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
Addition of q15 vectors.
Definition: hpm_math.h:1079
static void hpm_dsp_offset_q7(q7_t *src, q7_t offset, q7_t *dst, uint32_t size)
The offset of q7 vectors.
Definition: hpm_math.h:1681
static void hpm_dsp_sub_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
Subtraction of q15 vectors.
Definition: hpm_math.h:1178
static q31_t hpm_dsp_div_s64_u32(q63_t src1, uint32_t src2)
Division of q63 inputs divided by a positive 32 bits.
Definition: hpm_math.h:1362
static void hpm_dsp_mul_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
Multiplication of q31 vectors.
Definition: hpm_math.h:1253
static q31_t hpm_dsp_div_u64_u32(uint64_t src1, uint32_t src2)
Division of positive 64-bits inputs divided by a positive 32-bits.
Definition: hpm_math.h:1379
static void hpm_dsp_abs_q7(q7_t *src, q7_t *dst, uint32_t size)
Absolute value of q7 vectors.
Definition: hpm_math.h:1020
static void hpm_dsp_neg_q15(q15_t *src, q15_t *dst, uint32_t size)
Negation of q15 vectors.
Definition: hpm_math.h:1437
static q63_t hpm_dsp_dprod_q31(q31_t *src1, q31_t *src2, uint32_t size)
Dot production of q31 vectors.
Definition: hpm_math.h:1501
static void hpm_dsp_offset_f32(float32_t *src, float32_t offset, float32_t *dst, uint32_t size)
The offset of floating-point vectors.
Definition: hpm_math.h:1621
static void hpm_dsp_scale_q7(q7_t *src, q7_t scalefract, int8_t shift, q7_t *dst, uint32_t size)
To multiply a q7 vectors by a q7 scale.
Definition: hpm_math.h:1785
static void hpm_dsp_add_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
Addition of q7 vectors.
Definition: hpm_math.h:1099
static void hpm_dsp_offset_u8(uint8_t *src, q7_t offset, uint8_t *dst, uint32_t size)
The offset of U8 vectors.
Definition: hpm_math.h:1701
static void hpm_dsp_sub_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
Subtraction of q7 vectors.
Definition: hpm_math.h:1198
static void hpm_dsp_shift_q31(q31_t *src, int8_t shift, q31_t *dst, uint32_t size)
Shifts a q31 vector with a specified shift number.
Definition: hpm_math.h:1846
static void hpm_dsp_scale_q15(q15_t *src, q15_t scalefract, int8_t shift, q15_t *dst, uint32_t size)
To multiply a q15 vectors by a q15 scale.
Definition: hpm_math.h:1762
static void hpm_dsp_add_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
Addition of q31 vectors.
Definition: hpm_math.h:1059
static void hpm_dsp_offset_q15(q15_t *src, q15_t offset, q15_t *dst, uint32_t size)
The offset of q15 vectors.
Definition: hpm_math.h:1661
static void hpm_dsp_scale_f32(float32_t *src, float32_t scale, float32_t *dst, uint32_t size)
To multiply a floating-point vectors by a floating-point scale.
Definition: hpm_math.h:1716
static void hpm_dsp_offset_q31(q31_t *src, q31_t offset, q31_t *dst, uint32_t size)
The offset of q31 vectors.
Definition: hpm_math.h:1641
static void hpm_dsp_add_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Addition of floating-potint vectors.
Definition: hpm_math.h:1039
static void hpm_dsp_sub_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Subtraction of floating-point vectors.
Definition: hpm_math.h:1138
static void hpm_dsp_neg_f32(float32_t *src, float32_t *dst, uint32_t size)
Negation of floating-potint vectors.
Definition: hpm_math.h:1397
static void hpm_dsp_neg_q31(q31_t *src, q31_t *dst, uint32_t size)
Negation of q31 vectors.
Definition: hpm_math.h:1417
static void hpm_dsp_neg_q7(q7_t *src, q7_t *dst, uint32_t size)
Negation of q15 vectors.
Definition: hpm_math.h:1457
static void hpm_dsp_shift_q7(q7_t *src, int8_t shift, q7_t *dst, uint32_t size)
Shifts a q7 vector with a specified shift number.
Definition: hpm_math.h:1867
static q31_t hpm_dsp_dprod_q7(q7_t *src1, q7_t *src2, uint32_t size)
Dot production of q7 vectors.
Definition: hpm_math.h:1566
static q63_t hpm_dsp_dprod_q15(q15_t *src1, q15_t *src2, uint32_t size)
Dot production of q15 vectors.
Definition: hpm_math.h:1524
static q31_t hpm_dsp_dprod_q7xq15(q7_t *src1, q15_t *src2, uint32_t size)
Dot production of q7 * q15 vectors.
Definition: hpm_math.h:1589
static float32_t hpm_dsp_dprod_f32(float32_t *src1, float32_t *src2, uint32_t size)
Dot production of floating-point vectors.
Definition: hpm_math.h:1476
static void hpm_dsp_scale_u8(uint8_t *src, q7_t scalefract, int8_t shift, uint8_t *dst, uint32_t size)
To multiply a u8 vectors by a q7 scale.
Definition: hpm_math.h:1807
static void hpm_dsp_sub_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
Subtraction of q31 vectors.
Definition: hpm_math.h:1158
static q31_t hpm_dsp_dprod_u8xq15(uint8_t *src1, q15_t *src2, uint32_t size)
Dot production of u8 * q15 vectors.
Definition: hpm_math.h:1548
static void hpm_dsp_abs_q31(q31_t *src, q31_t *dst, uint32_t size)
Absolute value of q31 vectors.
Definition: hpm_math.h:979
static void hpm_dsp_mul_u8_u16(uint8_t *src1, uint8_t *src2, uint16_t *dst, uint32_t size)
Multiplication of u8 vectors.
Definition: hpm_math.h:1313
static void hpm_dsp_shift_q15(q15_t *src, int8_t shift, q15_t *dst, uint32_t size)
Shifts a q15 vector with a specified shift number.
Definition: hpm_math.h:1825
static void hpm_dsp_div_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Division of floating-point vectors.
Definition: hpm_math.h:1328
static void hpm_dsp_abs_f32(float32_t *src, float32_t *dst, uint32_t size)
Absolute value of floating-potint vectors.
Definition: hpm_math.h:959
static void hpm_dsp_mul_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Multiplication of floating-point vectors.
Definition: hpm_math.h:1233
static void hpm_dsp_abs_q15(q15_t *src, q15_t *dst, uint32_t size)
Absolute value of q15 vectors.
Definition: hpm_math.h:1000
static void hpm_dsp_shift_u8(uint8_t *src, int8_t shift, uint8_t *dst, uint32_t size)
Shifts a u8 vector for a specified shift number.
Definition: hpm_math.h:1888
static void hpm_dsp_mul_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
Multiplication of q7 vectors.
Definition: hpm_math.h:1293
static void hpm_dsp_cmul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t size)
Multiply two folating-point complex vector.
Definition: hpm_math.h:2583
static void hpm_dsp_cconj_q15(const q15_t *src, q15_t *dst, uint32_t size)
Conjugate the q15 complex vector.
Definition: hpm_math.h:2318
static void hpm_dsp_cmul_real_f32(const float32_t *src, const float32_t *real, float32_t *dst, uint32_t size)
Multiply the folating-point complex vector by a real vector.
Definition: hpm_math.h:2644
static void hpm_dsp_cdprod_q31(const q31_t *src1, const q31_t *src2, uint32_t size, q31_t *dst)
Compute the dot product of the q31 complex vector.
Definition: hpm_math.h:2428
static void hpm_dsp_cconj_f32(const float32_t *src, float32_t *dst, uint32_t size)
Conjugate the floating-potint complex vector.
Definition: hpm_math.h:2298
static void hpm_dsp_cmul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t size)
Multiply two q15 complex vector.
Definition: hpm_math.h:2604
static void hpm_dsp_cmag_f32(const float32_t *src, float32_t *dst, uint32_t size)
Compute the magnitude of the floating-potint complex vector.
Definition: hpm_math.h:2465
static void hpm_dsp_cmag_q15(const q15_t *src, q15_t *dst, uint32_t size)
Compute the magnitude of the q15 complex vector.
Definition: hpm_math.h:2485
static void hpm_dsp_cmag_sqr_f32(const float32_t *src, float32_t *dst, uint32_t size)
Compute the magnitude squared of the floating-potint complex vector.
Definition: hpm_math.h:2524
static void hpm_dsp_cmag_q31(const q31_t *src, q31_t *dst, uint32_t size)
Compute the magnitude of the q31 complex vector.
Definition: hpm_math.h:2505
static void hpm_dsp_cmul_real_q31(const q31_t *src, const q31_t *real, q31_t *dst, uint32_t size)
Multiply the q31 complex vector by a real vector.
Definition: hpm_math.h:2684
static void hpm_dsp_cconj_q31(const q31_t *src, q31_t *dst, uint32_t size)
Conjugate the q31 complex vector.
Definition: hpm_math.h:2338
static void hpm_dsp_cdprod_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *dst)
Compute the dot product of the floating-potint complex vector.
Definition: hpm_math.h:2357
static void hpm_dsp_cdprod_q15(const q15_t *src1, const q15_t *src2, uint32_t size, q15_t *dst)
Compute the dot product of the q15 complex vector.
Definition: hpm_math.h:2393
static void hpm_dsp_cdprod_typ2_q31(const q31_t *src1, const q31_t *src2, uint32_t size, q63_t *rout, q63_t *iout)
Compute the dot product type2 of the q31 complex vector.
Definition: hpm_math.h:2447
static void hpm_dsp_cmag_sqr_q15(const q15_t *src, q15_t *dst, uint32_t size)
Compute the magnitude squared of the q15 complex vector.
Definition: hpm_math.h:2544
static void hpm_dsp_cmul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t size)
Multiply two q31 complex vector.
Definition: hpm_math.h:2625
static void hpm_dsp_cmag_sqr_q31(const q31_t *src, q31_t *dst, uint32_t size)
Compute the magnitude squared of the q31 complex vector.
Definition: hpm_math.h:2564
static void hpm_dsp_cdprod_typ2_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *rout, float32_t *iout)
Compute the dot product type2 of the floating-potint complex vector.
Definition: hpm_math.h:2372
static void hpm_dsp_cdprod_typ2_q15(const q15_t *src1, const q15_t *src2, uint32_t size, q31_t *rout, q31_t *iout)
Compute the dot product type2 of the q15 complex vector.
Definition: hpm_math.h:2411
static void hpm_dsp_cmul_real_q15(const q15_t *src, const q15_t *real, q15_t *dst, uint32_t size)
Multiply the q15 complex vector by a real vector.
Definition: hpm_math.h:2664
static q15_t hpm_dsp_pid_q15(riscv_dsp_pid_q15_t *instance, q15_t src)
Definition: hpm_math.h:2910
static void hpm_dsp_init_pid_q15(riscv_dsp_pid_q15_t *instance, int32_t set)
PID initializatopn control function of Q15 formats.
Definition: hpm_math.h:2928
static void hpm_dsp_inv_park_q31(q31_t a, q31_t b, q31_t *alpha, q31_t *beta, q31_t sin, q31_t cos)
Inverse Park transform of q31 input.
Definition: hpm_math.h:2839
static void hpm_dsp_park_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b, q31_t sin, q31_t cos)
Park transform of q31 input.
Definition: hpm_math.h:2804
static void hpm_dsp_init_pid_f32(riscv_dsp_pid_f32_t *instance, int32_t set)
PID initializatopn control function of floating-point formats.
Definition: hpm_math.h:2870
static void hpm_dsp_park_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b, float32_t sin, float32_t cos)
Park transform of floating-point input.
Definition: hpm_math.h:2785
static void hpm_dsp_inv_clarke_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b)
Inverse Clarke transform of q31 input.
Definition: hpm_math.h:2768
static void hpm_dsp_init_pid_q31(riscv_dsp_pid_q31_t *instance, int32_t set)
PID initializatopn control function of Q31 formats.
Definition: hpm_math.h:2903
static void hpm_dsp_inv_clarke_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b)
Inverse Clarke transform of floating-point input.
Definition: hpm_math.h:2752
static q31_t hpm_dsp_pid_q31(riscv_dsp_pid_q31_t *instance, q31_t src)
PID control of Q31 input.
Definition: hpm_math.h:2884
static void hpm_dsp_clarke_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta)
Clarke transform of floating-point input.
Definition: hpm_math.h:2721
static void hpm_dsp_inv_park_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta, float32_t sin, float32_t cos)
Inverse Park transform of floating-point input.
Definition: hpm_math.h:2821
static float32_t hpm_dsp_pid_f32(riscv_dsp_pid_f32_t *instance, float32_t src)
PID control of floating-point input.
Definition: hpm_math.h:2852
static void hpm_dsp_clarke_q31(q31_t a, q31_t b, q31_t *alpha, q31_t *beta)
Clarke transform of q31 input.
Definition: hpm_math.h:2737
static float32_t hpm_dsp_dist_euclidean_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Euclidean distance between two vectors.
Definition: hpm_math.h:3071
static float32_t hpm_dsp_dist_city_block_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Cityblock (Manhattan) distance between two vectors.
Definition: hpm_math.h:3017
static float32_t hpm_dsp_bdist_sokal_sneath_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Sokal-Sneath distance between two vectors.
Definition: hpm_math.h:3216
static float32_t hpm_dsp_dist_bray_curtis_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Bray-Curtis distance between two vectors.
Definition: hpm_math.h:2963
static float32_t hpm_dsp_bdist_kulsinski_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Kulsinski distance between two vectors.
Definition: hpm_math.h:3180
static float32_t hpm_dsp_bdist_jaccard_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Jaccard distance between two vectors.
Definition: hpm_math.h:3162
static float32_t hpm_dsp_dist_canberra_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Canberra distance between two vectors.
Definition: hpm_math.h:2981
static float32_t hpm_dsp_bdist_hamming_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Hamming distance between two vectors.
Definition: hpm_math.h:3144
static float32_t hpm_dsp_dist_cos_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Cosine distance between two vectors.
Definition: hpm_math.h:3053
static float32_t hpm_dsp_dist_corr_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Correlation distance between two vectors.
Definition: hpm_math.h:3035
static float32_t hpm_dsp_dist_chebyshev_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Chebyshev distance between two vectors.
Definition: hpm_math.h:2999
static float32_t hpm_dsp_bdist_rogers_tanimoto_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Roger Stanimoto distance between two vectors.
Definition: hpm_math.h:3234
static float32_t hpm_dsp_bdist_yule_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Yule distance between two vectors.
Definition: hpm_math.h:3252
static float32_t hpm_dsp_bdist_russell_rao_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Russell-Rao distance between two vectors.
Definition: hpm_math.h:3270
static float32_t hpm_dsp_bdist_dice_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Dice distance between two vectors.
Definition: hpm_math.h:3126
static float32_t hpm_dsp_bdist_sokal_michener_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Sokal-Michener distance between two vectors.
Definition: hpm_math.h:3198
static float32_t hpm_dsp_dist_minkowski_f32(const float32_t *src1, const float32_t *src2, int32_t order, uint32_t size)
Minkowski distance between two vectors.
Definition: hpm_math.h:3108
static float32_t hpm_dsp_dist_jensen_shannon_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Jensen-Shannon distance between two vectors.
Definition: hpm_math.h:3089
#define FFA_DATA_TYPE_COMPLEX_Q31
Definition: hpm_ffa_drv.h:39
hpm_stat_t ffa_calculate_fft_blocking(FFA_Type *ptr, fft_xfer_t *fft_xfer)
Perform FFT transformation in blocking mode.
Definition: hpm_ffa_drv.c:118
#define FFA_DATA_TYPE_COMPLEX_Q15
Definition: hpm_ffa_drv.h:40
static void hpm_dsp_corr_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
Convolution of the q31 vectors.
Definition: hpm_math.h:3909
static void hpm_dsp_bq_df1_32x64_q31(const riscv_dsp_bq_df1_32x64_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3975
static void hpm_dsp_lfir_f32(const riscv_dsp_lfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Function for the floating-point lattice FIR filter.
Definition: hpm_math.h:3425
static void hpm_dsp_spafir_q15(riscv_dsp_spafir_q15_t *instance, q15_t *src, q15_t *dst, q15_t *buf1, q31_t *buf2, uint32_t size)
Definition: hpm_math.h:3519
static void hpm_dsp_corr_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
Correlation of the q7 vectors.
Definition: hpm_math.h:3935
static void hpm_dsp_dcmfir_q31(const riscv_dsp_dcmfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3477
static void hpm_dsp_nlms_q15(riscv_dsp_nlms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t size)
Definition: hpm_math.h:3629
static void hpm_dsp_liir_q15(const riscv_dsp_liir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:4018
static void hpm_dsp_bq_df1_f32(const riscv_dsp_bq_df1_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3945
static void hpm_dsp_spafir_f32(riscv_dsp_spafir_f32_t *instance, float32_t *src, float32_t *dst, float32_t *buf, uint32_t size)
Definition: hpm_math.h:3513
static void hpm_dsp_conv_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
Convolution of the floating-point vectors.
Definition: hpm_math.h:3647
static void hpm_dsp_spafir_q7(riscv_dsp_spafir_q7_t *instance, q7_t *src, q7_t *dst, q7_t *buf1, q31_t *buf2, uint32_t size)
Definition: hpm_math.h:3531
static void hpm_dsp_fir_fast_q15(const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Function for the q15 FIR filter.
Definition: hpm_math.h:3391
static void hpm_dsp_bq_df2T_f64(const riscv_dsp_bq_df2T_f64_t *instance, float64_t *src, float64_t *dst, uint32_t size)
Definition: hpm_math.h:3987
static void hpm_dsp_bq_df1_fast_q15(const riscv_dsp_bq_df1_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3957
static void hpm_dsp_dcmfir_fast_q15(const riscv_dsp_dcmfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3489
static void hpm_dsp_liir_fast_q31(const riscv_dsp_liir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:4012
static void hpm_dsp_lms_q31(const riscv_dsp_lms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t size)
Function for the q31 LMS filter.
Definition: hpm_math.h:3575
static void hpm_dsp_fir_f32(const riscv_dsp_fir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Function for the floating-point FIR filter.
Definition: hpm_math.h:3310
static void hpm_dsp_conv_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
Convolution of the q31 vectors.
Definition: hpm_math.h:3701
static void hpm_dsp_bq_df2T_f32(const riscv_dsp_bq_df2T_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3981
static void hpm_dsp_lms_f32(const riscv_dsp_lms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t size)
Structure for the floatint-point standard LMS Filters.
Definition: hpm_math.h:3552
static void hpm_dsp_corr_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
Correlation of the floating-point vectors.
Definition: hpm_math.h:3853
static void hpm_dsp_fir_q15(const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Function for the q15 FIR filter.
Definition: hpm_math.h:3372
static void hpm_dsp_lfir_q15(const riscv_dsp_lfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Function for the q15 lattice FIR filter.
Definition: hpm_math.h:3440
static int32_t hpm_dsp_conv_partial_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the q15 vectors.
Definition: hpm_math.h:3778
static void hpm_dsp_dcmfir_f32(const riscv_dsp_dcmfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3465
static void hpm_dsp_nlms_f32(riscv_dsp_nlms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t size)
Structure for the f32 normalized LMS filter.
Definition: hpm_math.h:3609
static void hpm_dsp_upsplfir_q15(const riscv_dsp_upsplfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3501
static void hpm_dsp_conv_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
Convolution of the q15 vectors.
Definition: hpm_math.h:3673
static void hpm_dsp_upsplfir_q31(const riscv_dsp_upsplfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3507
static void hpm_dsp_dcmfir_q15(const riscv_dsp_dcmfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3471
static void hpm_dsp_dcmfir_fast_q31(const riscv_dsp_dcmfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3483
static void hpm_dsp_spafir_q31(riscv_dsp_spafir_q31_t *instance, q31_t *src, q31_t *dst, q31_t *buf, uint32_t size)
Definition: hpm_math.h:3525
static void hpm_dsp_fir_q31(const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Function for the q31 FIR filter.
Definition: hpm_math.h:3331
static void hpm_dsp_lfir_q31(const riscv_dsp_lfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Function for the q31 lattice FIR filter.
Definition: hpm_math.h:3459
static void hpm_dsp_upsplfir_f32(const riscv_dsp_upsplfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3495
static int32_t hpm_dsp_conv_partial_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the q31 vectors.
Definition: hpm_math.h:3804
static void hpm_dsp_liir_q31(const riscv_dsp_liir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:4006
static int32_t hpm_dsp_conv_partial_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the floating-point vectors.
Definition: hpm_math.h:3752
static void hpm_dsp_fir_fast_q31(const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Function for the q31 FIR filter.
Definition: hpm_math.h:3352
static void hpm_dsp_liir_f32(const riscv_dsp_liir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:4000
static void hpm_dsp_conv_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
Convolution of the q7 vectors.
Definition: hpm_math.h:3727
static void hpm_dsp_bq_df1_fast_q31(const riscv_dsp_bq_df1_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3969
static void hpm_dsp_bq_df1_q15(const riscv_dsp_bq_df1_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3951
static void hpm_dsp_corr_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
Correlation of the q15 vectors.
Definition: hpm_math.h:3879
static void hpm_dsp_liir_fast_q15(const riscv_dsp_liir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:4024
static int32_t hpm_dsp_conv_partial_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the q7 vectors.
Definition: hpm_math.h:3830
static void hpm_dsp_bq_df1_q31(const riscv_dsp_bq_df1_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3963
static void hpm_dsp_lms_q15(const riscv_dsp_lms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t size)
Function for the q15 LMS filter.
Definition: hpm_math.h:3598
static void hpm_dsp_bq_stereo_df2T_f32(const riscv_dsp_bq_stereo_df2T_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3993
static void hpm_dsp_nlms_q31(riscv_dsp_nlms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t size)
Structure for the q31 normalized LMS filter.
Definition: hpm_math.h:3621
static void hpm_dsp_fir_q7(const riscv_dsp_fir_q7_t *instance, q7_t *src, q7_t *dst, uint32_t size)
Function for the q7 FIR filter.
Definition: hpm_math.h:3410
static void hpm_dsp_mat_sub_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
Substraction of two floating-potint matrices.
Definition: hpm_math.h:4500
static void hpm_dsp_mat_oprod_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t size1, uint32_t size2)
Outer production of two q31 matrices.
Definition: hpm_math.h:4684
static void hpm_dsp_mat_trans_q15(const q15_t *src, q15_t *dst, uint32_t row, uint32_t col)
Transpose the q15 matricex.
Definition: hpm_math.h:4595
static void hpm_dsp_mat_mul_mxv_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for f32 formats.
Definition: hpm_math.h:4718
static void hpm_dsp_mat_mul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two floating-point matrices.
Definition: hpm_math.h:4170
static void hpm_dsp_cmat_mul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two floating-point complex matrices.
Definition: hpm_math.h:4201
static int32_t hpm_dsp_mat_inv_f32(float32_t *src, float32_t *dst, uint32_t size)
Compute the inverse matrix of the floating-potint matrix.
Definition: hpm_math.h:4139
static void hpm_dsp_mat_mul_vxm_q7(const q7_t *src1, const q7_t *src2, q7_t *dst, uint32_t col, uint32_t col2)
Multiplication of q7 vetor by matrix.
Definition: hpm_math.h:4378
static void hpm_dsp_mat_add_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
Addition of two q31 matrices.
Definition: hpm_math.h:4120
static void hpm_dsp_mat_mul_f64(const float64_t *src1, const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Definition: hpm_math.h:4181
static void hpm_dsp_mat_trans_u8(const uint8_t *src, uint8_t *dst, uint32_t row, uint32_t col)
Transpose the u8 matricex.
Definition: hpm_math.h:4631
static void hpm_dsp_mat_scale_q15(const q15_t *src, q15_t scale_fract, int32_t shift, q15_t *dst, uint32_t row, uint32_t col)
Multiplt a scale value of q15 matrix.
Definition: hpm_math.h:4433
static void hpm_dsp_mat_mul_fast_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Definition: hpm_math.h:4302
static void hpm_dsp_mat_mul_mxv_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for q15 formats.
Definition: hpm_math.h:4738
static void hpm_dsp_mat_mul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q31 matrices.
Definition: hpm_math.h:4292
static void hpm_dsp_mat_trans_f64(const float64_t *src, float64_t *dst, uint32_t row, uint32_t col)
Transpose the double-precision floating-potint matrices.
Definition: hpm_math.h:4563
static void hpm_dsp_cmat_mul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q15 complex matrices.
Definition: hpm_math.h:4265
static void hpm_dsp_cmat_mul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q31 complex matrices.
Definition: hpm_math.h:4329
static void hpm_dsp_mat_trans_f32(const float32_t *src, float32_t *dst, uint32_t row, uint32_t col)
Transpose the floating-potint matricex.
Definition: hpm_math.h:4581
static void hpm_dsp_mat_mul_mxv_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for q31 formats.
Definition: hpm_math.h:4758
static void hpm_dsp_mat_mul_fast_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Definition: hpm_math.h:4238
static void hpm_dsp_mat_trans_q31(const q31_t *src, q31_t *dst, uint32_t row, uint32_t col)
Transpose the q31 matricex.
Definition: hpm_math.h:4613
static void hpm_dsp_mat_sub_f64(const float64_t *src1, const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col)
Substraction of two double-precision floating-potint matrices.
Definition: hpm_math.h:4480
static void hpm_dsp_mat_add_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
Addition of two floating-potint matrices.
Definition: hpm_math.h:4078
static void hpm_dsp_mat_add_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
Addition of two q15 matrices.
Definition: hpm_math.h:4099
static void hpm_dsp_mat_scale_f32(const float32_t *src, float32_t scale, float32_t *dst, uint32_t row, uint32_t col)
Multiplt a scale value of floating-potint matrix.
Definition: hpm_math.h:4408
static void hpm_dsp_mat_mul_mxv_q7(const q7_t *src1, const q7_t *src2, q7_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for q7 formats.
Definition: hpm_math.h:4778
static void hpm_dsp_mat_mul_q7(const q7_t *src1, const q7_t *src2, q7_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q7 matrices.
Definition: hpm_math.h:4356
static int32_t hpm_dsp_mat_pwr2_cache_f64(const float64_t *src, float64_t *dst, uint32_t size)
Definition: hpm_math.h:4392
static void hpm_dsp_mat_mul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q15 matrices.
Definition: hpm_math.h:4228
static void hpm_dsp_mat_scale_q31(const q31_t *src, q31_t scale_fract, int32_t shift, q31_t *dst, uint32_t row, uint32_t col)
Multiplt a scale value of q31 matrix.
Definition: hpm_math.h:4458
static void hpm_dsp_mat_trans_q7(const q7_t *src, q7_t *dst, uint32_t row, uint32_t col)
Transpose the q7 matrices.
Definition: hpm_math.h:4646
static void hpm_dsp_mat_sub_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
Substraction of two q31 matrices.
Definition: hpm_math.h:4542
static int32_t hpm_dsp_mat_inv_f64(float64_t *src, float64_t *dst, uint32_t size)
Definition: hpm_math.h:4149
static void hpm_dsp_mat_sub_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
Substraction of two q15 matrices.
Definition: hpm_math.h:4521
static void hpm_nn_activate_s16(q15_t *in_out, uint32_t size, uint16_t int_bits, riscv_nn_activation_fun act_fun)
This function uses sigmoid or tanh function to perform activation for signed 16-bit integer input vec...
Definition: hpm_math.h:6864
static void size
Definition: hpm_math.h:6899
static void hpm_nn_leaky_relu_s8(q7_t *in_out, uint32_t size, q15_t slope) riscv_nn_leaky_relu_s8(in_out
This function uses the leaky ReLU function to perform activation for signed 8-bit integer input vecto...
static void hpm_nn_relu_s16(q15_t *in_out, uint32_t size)
This function uses the ReLU function to perform activation for signed 16-bit integer input vectors.
Definition: hpm_math.h:6950
static void hpm_nn_activate_s8(q7_t *in_out, uint32_t size, uint16_t int_bits, riscv_nn_activation_fun act_fun)
This function uses the sigmoid or tanh function to perform activation for signed 8-bit integer input ...
Definition: hpm_math.h:6837
static void slope
Definition: hpm_math.h:6899
static void hpm_nn_relu_any_s8(q7_t *data, uint16_t size, q7_t max_val)
This function uses the ReLU function to perform activation for signed 8-bit integer input vectors.
Definition: hpm_math.h:6910
static void hpm_nn_relu_s8(q7_t *in_out, uint32_t size)
This function uses the ReLU function to perform activation for signed 8-bit integer input vectors.
Definition: hpm_math.h:6935
static void hpm_nn_add_s8_sym(const q7_t *in_tensor1, const q7_t *in_tensor2, const int16_t *scale1, const int16_t *scale2, const uint32_t size, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out)
This function performs element-wise addition for signed 8-bit integer input vectors with two-stage sh...
Definition: hpm_math.h:7058
static int hpm_nn_ew_add_s8_asym(const int8_t *in_tensor1, const int8_t *in_tensor2, const int32_t in_offset1, const int32_t in_scale1, const int32_t in_rshift1, const int32_t in_offset2, const int32_t in_scale2, const int32_t in_rshift2, const int32_t lshift, int8_t *out, const int32_t out_offset, const int32_t out_scale, const int32_t out_rshift, const int32_t act_min, const int32_t act_max, const uint32_t size)
This function performs element-wise addition for signed 8-bit integer input vectors.
Definition: hpm_math.h:7166
static int hpm_nn_ew_mul_s8_asym(const int8_t *in_tensor1, const int8_t *in_tensor2, const int32_t in_offset1, const int32_t in_offset2, int8_t *out, const int32_t out_offset, const int32_t out_scale, const int32_t out_shift, const int32_t act_min, const int32_t act_max, const uint32_t size)
This function performs element-wise multiplication for signed 8-bit integer input vectors.
Definition: hpm_math.h:7235
static void hpm_nn_add_s8_sym_round(const q7_t *in_tensor1, const q7_t *in_tensor2, const uint32_t scale1, const uint32_t scale2, const uint32_t size, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out)
This function performs element-wise addition for signed 8-bit integer input vectors with two-stage sh...
Definition: hpm_math.h:7096
static void hpm_nn_concate_s8_z(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint16_t out_tensor_z, const uint32_t out_offset_z)
This function concatenates the int8_t/uint8_t input tensor along the z-axis with the output tensor.
Definition: hpm_math.h:7495
static void hpm_nn_concate_s8_x(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint16_t out_tensor_x, const uint32_t out_offset_x)
This function concatenates the int8_t/uint8_t input tensor along the x-axis with the output tensor.
Definition: hpm_math.h:7425
static void hpm_nn_concate_s8_y(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint16_t out_tensor_y, const uint32_t out_offset_y)
This function concatenates the int8_t/uint8_t input tensor along the y-axis with the output tensor.
Definition: hpm_math.h:7460
static void hpm_nn_concate_s8_w(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint32_t out_offset_w)
This function concatenates the int8_t/uint8_t input tensor along the w-axis with the output tensor.
Definition: hpm_math.h:7390
static int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with bi...
Definition: hpm_math.h:9630
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:12065
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:11993
static int32_t hpm_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(const uint8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint8_t *ker_weight, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const int16_t ch_mult, const int16_t pad_x, const int16_t pad_y, const int16_t stride_x, const int16_t stride_y, const int16_t dilation_x, const int16_t dilation_y, const int32_t *bias, const int32_t in_offset, const int32_t ker_offset, const int32_t out_offset, uint8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, const int32_t act_min, const int32_t act_max, const int32_t out_shift, const int32_t out_scale)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:13252
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs with bias inputs and ...
Definition: hpm_math.h:10040
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs with symmetric...
Definition: hpm_math.h:11742
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:9064
static int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 8-bit int...
Definition: hpm_math.h:9303
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:10276
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer depthwise convolution with shift-based quantization on th...
Definition: hpm_math.h:8483
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:11856
static int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs/outputs in any x and...
Definition: hpm_math.h:9223
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:10917
static int32_t hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:12798
static int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 8-bit int...
Definition: hpm_math.h:8902
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint16_t in_tensor_group, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit interger inputs/outputs in any x and ...
Definition: hpm_math.h:12646
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:12211
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:8655
static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast(const q15_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q15_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q15_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 16-bit integer convolution with shift-based quantization on the ou...
Definition: hpm_math.h:8301
static int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 16-bit in...
Definition: hpm_math.h:9383
static int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 8-...
Definition: hpm_math.h:9924
static int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 16...
Definition: hpm_math.h:9982
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:11511
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:11393
static int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 16...
Definition: hpm_math.h:9691
static int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:9144
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs in any x and y d...
Definition: hpm_math.h:11920
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer convolution with shift-based quantization on the outputs.
Definition: hpm_math.h:7860
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs with symmetric quan...
Definition: hpm_math.h:10448
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ch_mult, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t dilation_x, const uint16_t dilation_y, q15_t *tmp_buf)
This function performs depthwise convolution for signed 8-bit interger inputs/outputs in any x and y ...
Definition: hpm_math.h:13047
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer convolution for RGB images with shift-based quantization ...
Definition: hpm_math.h:7702
static int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs/outputs with ...
Definition: hpm_math.h:9866
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:10505
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:12353
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:10562
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 8-bit integer convolution in any x and y dimensions with shift-bas...
Definition: hpm_math.h:8133
static int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs and signed 16-b...
Definition: hpm_math.h:9808
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:11268
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:10217
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:11799
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs with bias inpu...
Definition: hpm_math.h:11452
static int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:12909
static int32_t hpm_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(const int8_t *in_tensor, const int32_t in_tensor_dim_x, const int32_t in_tensor_dim_y, const int32_t in_tensor_ch, const int8_t *ker_weight, const int32_t out_tensor_ch, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, int8_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_tensor_dim_x, const int32_t out_tensor_dim_y, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const int32_t dilation_x, const int32_t dilation_y, int16_t *tmp_buf)
This function performs depthwise 3x3 kernels convolution for signed 8-bit integer inputs/outputs in a...
Definition: hpm_math.h:12961
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs with bias inputs...
Definition: hpm_math.h:11334
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs in any x and y dime...
Definition: hpm_math.h:10771
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:10099
static void hpm_nn_conv_HWC_s8_s8_s8_sft_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer convolution in any x and y dimensions with shift-based qu...
Definition: hpm_math.h:7950
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs in any x and y dimens...
Definition: hpm_math.h:10626
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:12698
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs with bias inputs an...
Definition: hpm_math.h:10158
static int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:8739
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:12423
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:12138
static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(const q15_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q15_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q15_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 16-bit integer convolution in any x and y dimensions with shift-ba...
Definition: hpm_math.h:8396
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:10391
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:12494
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with sy...
Definition: hpm_math.h:9750
static int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 16-bit in...
Definition: hpm_math.h:8984
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast signed 8-bit integer convolution for RGB images with shift-based quantiza...
Definition: hpm_math.h:7781
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:11685
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer depthwise convolution in any x and y dimensions with shif...
Definition: hpm_math.h:8574
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs in any x and y dime...
Definition: hpm_math.h:11128
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:11198
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:13199
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t dilation_x, const uint16_t dilation_y, q15_t *in_tmp_buf)
This function performs fast depthwise convolution for signed 8-bit integer inputs/outputs in any x an...
Definition: hpm_math.h:13142
static int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs/outputs in any x and...
Definition: hpm_math.h:8820
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 8-bit integer convolution with shift-based quantization on the out...
Definition: hpm_math.h:8038
static int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs and signed 16-b...
Definition: hpm_math.h:9510
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs with symmetric q...
Definition: hpm_math.h:11628
static int hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_ch, const uint16_t in_tensor_group, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t pad_x, const uint16_t stride_x, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t out_tensor_dim_x, q15_t *in_tmp_buf)
This function performs 1xn kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:12747
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:11570
static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias(const q15_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q15_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q15_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 16-bit integer convolution with shift-based quantization on the outputs...
Definition: hpm_math.h:8220
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:10844
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs in any x and y d...
Definition: hpm_math.h:12282
static int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs/outputs with ...
Definition: hpm_math.h:9570
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:12565
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:11058
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with bi...
Definition: hpm_math.h:9449
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs in any x and y dimens...
Definition: hpm_math.h:10988
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:7615
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs with symmetric quanti...
Definition: hpm_math.h:10334
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:10699
static int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint16_t in_tensor_group, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs convolution for signed 8-bit integer inputs/outputs in any x and y dimensions ...
Definition: hpm_math.h:12852
static int32_t hpm_nn_fc_s16_s16_s16_sft_bias(const q15_t *in_vec, const q15_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q15_t *bias, q15_t *out_vec, q15_t *tmp_buf)
This is a fully connected layer function for signed 16-bit integer inputs with shift-based quantizati...
Definition: hpm_math.h:14140
static int32_t hpm_nn_fc_u8_s16_s8_sym_bias(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:14490
static int32_t hpm_nn_fc_u8_s8_s8_sym_bias_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:14886
static int32_t out_vec
Definition: hpm_math.h:14079
static int32_t in_tmp_buf
Definition: hpm_math.h:14080
static int32_t hpm_nn_fc_s8_s16_s8_sym(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:14577
static int32_t hpm_nn_fc_s8_s16_s8_sym_bias(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:14352
static int32_t hpm_nn_fc_u8_s16_s8_sym_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:15157
static int32_t bias
Definition: hpm_math.h:14079
static int32_t hpm_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(const q15_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q15_t *out_vec, q15_t *tmp_buf)
This function multiplies a signed 16-bit integer input vector by a signed 8-bit integer weight matrix...
Definition: hpm_math.h:14261
static int32_t hpm_nn_fc_mat_vec_s16_s16_s8_sft_bias(const q15_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q15_t *out_vec, q15_t *tmp_buf)
This function multiplies a signed 16-bit integer input vector by a signed 8-bit integer weight matrix...
Definition: hpm_math.h:14219
static void hpm_nn_fc_mat_vec_s8_wt_converter(const q7_t *wt_mat, const uint32_t size, const uint32_t wt_row_num, q7_t *wt_mat_out)
This is a weight converter for riscv_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast.
Definition: hpm_math.h:15231
static int32_t hpm_nn_fc_s8_s16_s8_sym_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:15023
static int32_t hpm_nn_fc_s8_s8_s8_asym_bias(const int8_t *in_vec, const int8_t *wt_mat, const uint16_t in_vec_col, const uint16_t wt_mat_row, const uint16_t in_vec_group, const int32_t in_offset, const int32_t wt_offset, const int32_t out_scale, const int32_t out_shift, const int32_t out_offset, const int32_t *bias, int8_t *out_vec, const int32_t act_min, const int32_t act_max, q15_t *tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs with bias inputs and asymmet...
Definition: hpm_math.h:15273
static int32_t hpm_nn_fc_s8_s8_s8_sym(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with symmetric quant...
Definition: hpm_math.h:14535
static void hpm_nn_fc_s8_wt_converter(const q7_t *wt_mat, const uint32_t size, const uint32_t wt_row_num, q7_t *wt_mat_out)
This is a weight converter for those fully-connected functions with signed 8-bit weight data and name...
Definition: hpm_math.h:15188
static int32_t hpm_nn_fc_u8_s16_s8_sym_bias_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:14933
static int32_t hpm_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(const uint16_t in_vec_col)
This function is used to get the needed size, in bytes, by the temporary buffer of riscv_nn_fc_s8_s8_...
Definition: hpm_math.h:15312
static int32_t out_rshift
Definition: hpm_math.h:14079
static int32_t hpm_nn_fc_u8_u8_s8_sym_bias_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with bias inputs,...
Definition: hpm_math.h:14839
static int32_t hpm_nn_fc_s8_s8_s8_sym_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with interleaved mul...
Definition: hpm_math.h:14978
static int32_t hpm_nn_fc_s8_s8_s8_sft_bias_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs with interleaved multiplicat...
Definition: hpm_math.h:14105
static int32_t hpm_nn_fc_u8_s8_s8_sym_bias(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:14444
static int32_t hpm_nn_fc_s8_s8_s8_sft_bias(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q7_t *out_vec, q15_t *in_tmp_buf) return riscv_nn_fc_s8_s8_s8_sft_bias(in_vec
This is a fully connected layer function for signed 8-bit integer inputs with shift-based quantizatio...
static int32_t hpm_nn_fc_s8_s8_s8_sym_bias_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with bias inputs,...
Definition: hpm_math.h:14746
static int32_t wt_row_num
Definition: hpm_math.h:14078
static int32_t hpm_nn_fc_s8_s8_s8_sym_bias(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with bias inputs and...
Definition: hpm_math.h:14306
static int32_t hpm_nn_fc_u8_u8_s8_sym_bias(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with bias inputs a...
Definition: hpm_math.h:14398
static int32_t bias_lshift
Definition: hpm_math.h:14079
static void hpm_nn_fc_s16_wt_converter(const q15_t *wt_mat, const uint32_t size, const uint32_t wt_row_num, q15_t *wt_mat_out)
This is a weight converter for those fully-connected functions with signed 16-bit weight data and nam...
Definition: hpm_math.h:15210
static int32_t wt_mat
Definition: hpm_math.h:14078
static int32_t hpm_nn_fc_u8_s16_s8_sym(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:14703
static int32_t hpm_nn_fc_u8_u8_s8_sym(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with symmetric qua...
Definition: hpm_math.h:14619
static int32_t hpm_nn_fc_u8_s8_s8_sym_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:15112
static int32_t hpm_nn_fc_u8_u8_s8_sym_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with interleaved m...
Definition: hpm_math.h:15067
static int32_t hpm_nn_fc_s8_s16_s8_sym_bias_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:14793
static int32_t hpm_nn_fc_s16_s16_s16_sft_bias_fast(const q15_t *in_vec, const q15_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q15_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 16-bit integer inputs with interleaved multiplica...
Definition: hpm_math.h:14183
static int32_t hpm_nn_fc_u8_s8_s8_sym(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:14661
static int32_t hpm_nn_avepool_HWC_s8_any_act(const int in_tensor_dim_y, const int in_tensor_dim_x, const int out_tensor_dim_y, const int out_tensor_dim_x, const int stride_y, const int stride_x, const int ker_dim_y, const int ker_dim_x, const int pad_y, const int pad_x, const int act_min, const int act_max, const int in_tensor_ch, int8_t *in_tensor, int16_t *in_tmp_buf, int8_t *out_tensor)
This is an average pooling function for S8 inputs with any x and y dimension with the actvating param...
Definition: hpm_math.h:15588
static void hpm_nn_maxpool_HWC_s8(q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t out_tensor_dim, q7_t *in_tmp_buf, q7_t *out_tensor)
This is a max pooling function for signed 8-bit integer inputs.
Definition: hpm_math.h:15670
static int32_t hpm_nn_maxpool_HWC_s8_any_act(const uint16_t in_tensor_dim_y, const uint16_t in_tensor_dim_x, const uint16_t out_tensor_dim_y, const uint16_t out_tensor_dim_x, const uint16_t stride_y, const uint16_t stride_x, const uint16_t ker_dim_y, const uint16_t ker_dim_x, const uint16_t pad_y, const uint16_t pad_x, const int8_t act_min, const int8_t act_max, const uint16_t in_tensor_ch, int8_t *in_tensor, int16_t *tmp_buffer, int8_t *out_tensor)
This is a max pooling function for signed 8-bit integer inputs in any x and y dimensions with the act...
Definition: hpm_math.h:15715
static void hpm_nn_avepool_HWC_s8(q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t out_tensor_dim, q7_t *in_tmp_buf, q7_t *out_tensor)
This is an average pooling function for signed 8-bit integer inputs.
Definition: hpm_math.h:15466
static int32_t hpm_nn_avepool_HWC_s8_any_act_get_buffer_size(const int out_tensor_dim_x, const int in_tensor_ch)
This function is used to obtain the required size, in bytes, for the input temporary buffer of riscv_...
Definition: hpm_math.h:15630
static void hpm_nn_avepool_HWC_s8_any(q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q7_t *in_tmp_buf, q7_t *out_tensor, const uint16_t out_lshift)
This is an average pooling function for signed 8-bit integer inputs in any x and y dimensions.
Definition: hpm_math.h:15531
static void hpm_nn_softmax_s8_hp(const int8_t *in_tensor, const int32_t in_tensor_row, const int32_t in_tensor_col, const int32_t scale, const int32_t lshift, const int32_t diff_min, int8_t *out_tensor)
This is a softmax function for signed 8-bit integer input tensor with high precision algorithm.
Definition: hpm_math.h:15917
static void hpm_nn_softmax_u8_hp(const uint8_t *in_tensor, const int32_t in_tensor_row, const int32_t in_tensor_col, const int32_t scale, const int32_t lshift, const int32_t diff_min, uint8_t *out_tensor)
This is a softmax function for unsigned 8-bit integer input tensor with high precision algorithm.
Definition: hpm_math.h:15948
static void hpm_nn_softmax_s8_fast(const q7_t *in_vec, const uint16_t size, q7_t *out_vec)
This is a softmax function for signed 8-bit integer input vectors.
Definition: hpm_math.h:15874
static void hpm_nn_softmax_s16_fast(const q15_t *in_vec, const uint16_t size, q15_t *out_vec)
This is a softmax function for signed 16-bit integer input vectors.
Definition: hpm_math.h:15892
static int32_t hpm_nn_top_k_s8(q7_t *in_vec, uint32_t size, uint32_t k, q7_t *val, uint32_t *idx)
This function finds the k largest values and their indices from the signed 8-bit integer input vector...
Definition: hpm_math.h:16095
static void hpm_nn_reshape_s8(const int8_t *in_tensor, int8_t *out_tensor, const uint32_t size)
This function turns the input tensor into another tensor with the same data but in a different shape.
Definition: hpm_math.h:16065
static void hpm_dsp_sort_merge_init_f32(riscv_dsp_sort_merge_f32_t *instance, riscv_dsp_sort_order order, float32_t *buf)
Definition: hpm_math.h:6547
__STATIC_FORCEINLINE int32_t hpm_nn_read_s8x4_ia(const int8_t **in_s8)
Read 4 s8 from s8 pointer and post increment pointer.
Definition: hpm_math.h:6743
static void hpm_dsp_sort_f32(const riscv_dsp_sort_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Generic sorting function.
Definition: hpm_math.h:6525
#define Q31_MIN
Definition: hpm_math.h:6607
#define RIGHT_SHIFT(_shift)
Definition: hpm_math.h:6605
#define LEFT_SHIFT(_shift)
Definition: hpm_math.h:6604
__STATIC_FORCEINLINE void hpm_nn_q7_to_q15_with_offset(const int8_t *src, int16_t *dst, int32_t block_size, int16_t offset)
Definition: hpm_math.h:6753
__STATIC_FORCEINLINE q31_t hpm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift)
Definition: hpm_math.h:6687
__STATIC_FORCEINLINE q31_t hpm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
Rounding divide by power of two.
Definition: hpm_math.h:6668
#define Q31_MAX
Definition: hpm_math.h:6606
static void hpm_dsp_sort_merge_f32(const riscv_dsp_sort_merge_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Merge sort.
Definition: hpm_math.h:6587
__STATIC_FORCEINLINE const q7_t * read_and_pad(const q7_t *source, q31_t *out1, q31_t *out2)
read and expand one q7 word into two q15 words
Definition: hpm_math.h:6726
__STATIC_FORCEINLINE const q7_t * read_and_pad_reordered(const q7_t *source, q31_t *out1, q31_t *out2)
read and expand one q7 word into two q15 words with reordering
Definition: hpm_math.h:6712
__STATIC_FORCEINLINE q31_t hpm_nn_read_q7x4_ia(const q7_t **in_q7)
Read 4 q7 from q7 pointer and post increment pointer.
Definition: hpm_math.h:6698
static void write_q15x2_ia(q15_t **pQ15, q31_t value)
Definition: hpm_math.h:6609
static void hpm_dsp_sort_init_f32(riscv_dsp_sort_f32_t *instance, riscv_dsp_sort_alg alg, riscv_dsp_sort_order order)
Definition: hpm_math.h:6478
__STATIC_FORCEINLINE q31_t hpm_nn_sat_doubling_high_mult(const q31_t m1, const q31_t m2)
Saturating doubling high multiply. Result matches NEON instruction VQRDMULH.
Definition: hpm_math.h:6643
__STATIC_FORCEINLINE q31_t hpm_nn_read_q15x2_ia(const q15_t **in_q15)
Read 2 q15 elements and post increment pointer.
Definition: hpm_math.h:6625
static float32_t hpm_dsp_std_f32(const float32_t *src, uint32_t size)
Standard deviation of the floating-potint vector.
Definition: hpm_math.h:565
static uint32_t hpm_dsp_gaussian_naive_bayes_est_f32(const riscv_dsp_gaussian_naivebayes_f32_t *instance, const float32_t *src, float32_t *buf)
Naive Gaussian Bayesian Estimator.
Definition: hpm_math.h:810
static float32_t hpm_dsp_var_f32(const float32_t *src, uint32_t size)
Variance of the floating-potint vector.
Definition: hpm_math.h:656
static q15_t hpm_dsp_max_q15(const q15_t *src, uint32_t size, uint32_t *index)
Maximum value of the q15 vector.
Definition: hpm_math.h:120
static q63_t hpm_dsp_pwr_q15(const q15_t *src, uint32_t size)
Sum of the squares of the q15 vector.
Definition: hpm_math.h:422
static q7_t hpm_dsp_mean_q7(const q7_t *src, uint32_t size)
Mean value of the q7 vector.
Definition: hpm_math.h:361
static q15_t hpm_dsp_absmin_q15(const q15_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the q15 vector.
Definition: hpm_math.h:908
static uint8_t hpm_dsp_max_u8(const uint8_t *src, uint32_t size, uint32_t *index)
Max value of the u8 vector.
Definition: hpm_math.h:180
static q7_t hpm_dsp_absmin_q7(const q7_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the q7 vector.
Definition: hpm_math.h:922
static uint8_t hpm_dsp_min_u8(const uint8_t *src, uint32_t size, uint32_t *index)
Minimum value of the u8 vector.
Definition: hpm_math.h:275
static q7_t hpm_dsp_max_q7(const q7_t *src, uint32_t size, uint32_t *index)
Maximum value of the q7 vector.
Definition: hpm_math.h:160
static float32_t hpm_dsp_absmin_f32(const float32_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the floating-potint vector.
Definition: hpm_math.h:880
static q31_t hpm_dsp_rms_q31(const q31_t *src, uint32_t size)
RMS of the q31 vector.
Definition: hpm_math.h:545
static q7_t hpm_dsp_min_q7(const q7_t *src, uint32_t size, uint32_t *index)
Minimum value of the q7 vector.
Definition: hpm_math.h:255
static q63_t hpm_dsp_var_q31(const q31_t *src, uint32_t size)
Variance of the q31 vector.
Definition: hpm_math.h:708
static q31_t hpm_dsp_max_q31(const q31_t *src, uint32_t size, uint32_t *index)
Maximum value of the q31 vector.
Definition: hpm_math.h:140
static q31_t hpm_dsp_mean_q31(const q31_t *src, uint32_t size)
Mean value of the q31 vector.
Definition: hpm_math.h:337
static q31_t hpm_dsp_absmax_q31(const q31_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the q31 vector.
Definition: hpm_math.h:852
static float32_t hpm_dsp_entropy_f32(const float32_t *src, uint32_t size)
Entropy of the floating-potint vector.
Definition: hpm_math.h:729
static float32_t hpm_dsp_mean_f32(const float32_t *src, uint32_t size)
Mean value of the floating-potint vector.
Definition: hpm_math.h:289
static float32_t hpm_dsp_rms_f32(const float32_t *src, uint32_t size)
RMS of the floating-potint vector.
Definition: hpm_math.h:493
static float32_t hpm_dsp_lse_f32(const float32_t *src, uint32_t size)
Log-Sum-Exp of the floating-potint vector.
Definition: hpm_math.h:770
static q31_t hpm_dsp_pwr_q7(const q7_t *src, uint32_t size)
Sum of the squares of the q7 vector.
Definition: hpm_math.h:473
static q31_t hpm_dsp_absmin_q31(const q31_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the q31 vector.
Definition: hpm_math.h:894
static float32_t hpm_dsp_max_f32(const float32_t *src, uint32_t size, uint32_t *index)
Maximum value of the floating-potint vector.
Definition: hpm_math.h:95
static float32_t hpm_dsp_absmax_f32(const float32_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the floating-potint vector.
Definition: hpm_math.h:824
static q31_t hpm_dsp_min_q31(const q31_t *src, uint32_t size, uint32_t *index)
Minimum value of the q31 vector.
Definition: hpm_math.h:235
static q15_t hpm_dsp_std_u8(const uint8_t *src, uint32_t size)
Standard deviation of the u8 vector.
Definition: hpm_math.h:642
static q15_t hpm_dsp_min_q15(const q15_t *src, uint32_t size, uint32_t *index)
Minimum value of the q15 vector.
Definition: hpm_math.h:215
static q15_t hpm_dsp_rms_q15(const q15_t *src, uint32_t size)
RMS of the q15 vector.
Definition: hpm_math.h:519
static float32_t hpm_dsp_min_f32(const float32_t *src, uint32_t size, uint32_t *index)
Minimum value of the floating-potint vector.
Definition: hpm_math.h:195
static q63_t hpm_dsp_pwr_q31(const q31_t *src, uint32_t size)
Sum of the squares of the q31 vector.
Definition: hpm_math.h:448
static float32_t hpm_dsp_max_val_f32(const float32_t *src, uint32_t size)
Definition: hpm_math.h:107
static q31_t hpm_dsp_var_q15(const q15_t *src, uint32_t size)
Variance of the q15 vector.
Definition: hpm_math.h:682
static q7_t hpm_dsp_absmax_q7(const q7_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the q7 vector.
Definition: hpm_math.h:866
static q15_t hpm_dsp_mean_q15(const q15_t *src, uint32_t size)
Mean value of the q15 vector.
Definition: hpm_math.h:313
static q15_t hpm_dsp_std_q15(const q15_t *src, uint32_t size)
Standard deviation of the q15 vector.
Definition: hpm_math.h:591
static float32_t hpm_dsp_lse_dprod_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *buffer)
Dot product with Log-Sum-Exp of the floating-potint vector.
Definition: hpm_math.h:790
static q15_t hpm_dsp_absmax_q15(const q15_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the q15 vector.
Definition: hpm_math.h:838
static uint8_t hpm_dsp_mean_u8(const uint8_t *src, uint32_t size)
Mean value of the u8 vector.
Definition: hpm_math.h:383
static q31_t hpm_dsp_std_q31(const q31_t *src, uint32_t size)
Standard deviation of the q31 vector.
Definition: hpm_math.h:617
static float32_t hpm_dsp_relative_entropy_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Relative Entropy of the floating-potint vector.
Definition: hpm_math.h:752
static float32_t hpm_dsp_pwr_f32(const float32_t *src, uint32_t size)
Sum of the squares of the floating-potint vector.
Definition: hpm_math.h:397
static void hpm_dsp_svm_linear_est_f32(const riscv_dsp_svm_linear_f32_t *instance, const float32_t *src, int32_t *result)
SVM linear prediction.
Definition: hpm_math.h:4818
static void hpm_dsp_svm_rbf_est_f32(const riscv_dsp_svm_rbf_f32_t *instance, const float32_t *src, int32_t *result)
SVM rbf prediction.
Definition: hpm_math.h:4846
static void hpm_dsp_svm_poly_est_f32(const riscv_dsp_svm_poly_f32_t *instance, const float32_t *src, int32_t *result)
SVM polynomial prediction.
Definition: hpm_math.h:4860
static void hpm_dsp_svm_sigmoid_est_f32(const riscv_dsp_svm_sigmoid_f32_t *instance, const float32_t *src, int32_t *result)
SVM Sigmoid prediction.
Definition: hpm_math.h:4832
static void hpm_dsp_dup_f32(float32_t *src, float32_t *dst, uint32_t size)
Duplicate the floating vector.
Definition: hpm_math.h:6225
static void hpm_dsp_set_f32(float32_t val, float32_t *dst, uint32_t size)
Set the floating-point vector.
Definition: hpm_math.h:6278
static float32_t hpm_dsp_atan2_f32(float32_t srcy, float32_t src2)
Definition: hpm_math.h:5997
static void hpm_dsp_convert_q31_q15(q31_t *src, q15_t *dst, uint32_t size)
Convert a Q31 vector to Q15.
Definition: hpm_math.h:6159
static void hpm_dsp_set_q15(q15_t val, q15_t *dst, uint32_t size)
Set the Q15 vector.
Definition: hpm_math.h:6291
static float32_t hpm_dsp_exp_f32(float32_t src)
Calculate exponential value of f32 vector.
Definition: hpm_math.h:6360
static float32_t hpm_dsp_sin_f32(float32_t src)
Definition: hpm_math.h:5945
static float32_t hpm_dsp_sigmoid_f32(float32_t src)
Calculate sigmoid value of f32 vector.
Definition: hpm_math.h:6386
static void hpm_dsp_convert_q31_f32(q31_t *src, float32_t *dst, uint32_t size)
Convert a Q31 vector to floating.
Definition: hpm_math.h:6142
static q15_t hpm_dsp_atan_q15(q15_t src)
Definition: hpm_math.h:5991
static q31_t hpm_dsp_sin_q31(q31_t src)
Definition: hpm_math.h:5965
static void hpm_dsp_convert_f32_q31(float32_t *src, q31_t *dst, uint32_t size)
Convert a floating-point vector to Q31.
Definition: hpm_math.h:6073
static q31_t hpm_dsp_cos_q31(q31_t src)
Definition: hpm_math.h:5932
static q31_t hpm_dsp_atan2_q31(q31_t srcy, q31_t src2)
Definition: hpm_math.h:6009
static void hpm_dsp_dup_q31(q31_t *src, q31_t *dst, uint32_t size)
Duplicate the Q31 vector.
Definition: hpm_math.h:6251
static void hpm_dsp_convert_q15_q7(q15_t *src, q7_t *dst, uint32_t size)
Convert a Q15 vector to Q7.
Definition: hpm_math.h:6129
static void hpm_dsp_set_q31(q31_t val, q31_t *dst, uint32_t size)
Set the Q31 vector.
Definition: hpm_math.h:6304
static q15_t hpm_dsp_sin_q15(q15_t src)
Definition: hpm_math.h:5971
static void hpm_dsp_dup_q15(q15_t *src, q15_t *dst, uint32_t size)
Duplicate the Q15 vector.
Definition: hpm_math.h:6238
static void hpm_dsp_convert_q15_q31(q15_t *src, q31_t *dst, uint32_t size)
Convert a Q15 vector to Q31.
Definition: hpm_math.h:6116
static q15_t hpm_dsp_cos_q15(q15_t src)
Definition: hpm_math.h:5938
static float32_t hpm_dsp_cos_f32(float32_t src)
Definition: hpm_math.h:5926
static void hpm_dsp_barycenter_f32(const float32_t *src, const float32_t *weights, float32_t *out, uint32_t numofvec, uint32_t dimofvec)
Barycenter of the floating-potint type.
Definition: hpm_math.h:6348
static q15_t hpm_dsp_atan2_q15(q15_t srcy, q15_t src2)
Definition: hpm_math.h:6003
static void hpm_dsp_convert_q7_q15(q7_t *src, q15_t *dst, uint32_t size)
Convert a Q7 vector to Q15.
Definition: hpm_math.h:6198
static void hpm_dsp_convert_q31_q7(q31_t *src, q7_t *dst, uint32_t size)
Convert a Q31 vector to Q7.
Definition: hpm_math.h:6172
static void hpm_dsp_convert_f32_q15(float32_t *src, q15_t *dst, uint32_t size)
Convert a floating-point vector to Q15.
Definition: hpm_math.h:6060
static void hpm_dsp_set_q7(q7_t val, q7_t *dst, uint32_t size)
Set the Q7 vector.
Definition: hpm_math.h:6317
static q31_t hpm_dsp_atan_q31(q31_t src)
Definition: hpm_math.h:5985
static void hpm_dsp_convert_q7_q31(q7_t *src, q31_t *dst, uint32_t size)
Convert a Q7 vector to Q31.
Definition: hpm_math.h:6211
static void hpm_dsp_convert_q7_f32(q7_t *src, float32_t *dst, uint32_t size)
Convert a Q7 vector to floating.
Definition: hpm_math.h:6185
static float32_t hpm_dsp_log_f32(float32_t src)
Calculate the natural logarithm value of f32 vector.
Definition: hpm_math.h:6412
static void hpm_dsp_convert_q15_f32(q15_t *src, float32_t *dst, uint32_t size)
Convert a Q15 vector to floating.
Definition: hpm_math.h:6103
static float32_t hpm_dsp_weighted_sum_f32(const float32_t *src, const float32_t *weight, uint32_t size)
Weighted Sum of the floating-potint vector.
Definition: hpm_math.h:6332
static void hpm_dsp_convert_f32_q7(float32_t *src, q7_t *dst, uint32_t size)
Convert a floating-point vector to Q7.
Definition: hpm_math.h:6090
static q31_t hpm_dsp_sqrt_q31(q31_t src)
Square root of the q31 input.
Definition: hpm_math.h:6034
static float32_t hpm_dsp_sqrt_f32(float32_t src)
Square root of the floating-potint input.
Definition: hpm_math.h:6022
static void hpm_dsp_dup_q7(q7_t *src, q7_t *dst, uint32_t size)
Duplicate the Q7 vector.
Definition: hpm_math.h:6264
static q15_t hpm_dsp_sqrt_q15(q15_t src)
Square root of the q15 input.
Definition: hpm_math.h:6046
static float32_t hpm_dsp_atan_f32(float32_t src)
Definition: hpm_math.h:5979
uint32_t hpm_math_sw_reverse_bit32_msb_to_lsb(uint32_t msb)
Reserve 32bit data msb to lsb.
uint8_t hpm_math_sw_reverse_bit8_msb_to_lsb(uint8_t msb)
Reserve 8bit data msb to lsb.
uint32_t hpm_math_sw_reverse_bit32_lsb_to_msb(uint32_t lsb)
Reserve 32bit data lsb to msb.
uint8_t hpm_math_sw_reverse_bit8_lsb_to_msb(uint8_t lsb)
Reserve 8bit data lsb to msb.
FFT transform context.
Definition: hpm_ffa_drv.h:75
void * dst
Definition: hpm_ffa_drv.h:81
uint8_t dst_data_type
Definition: hpm_ffa_drv.h:78
uint8_t src_data_type
Definition: hpm_ffa_drv.h:77
const void * src
Definition: hpm_ffa_drv.h:80
uint32_t num_points
Definition: hpm_ffa_drv.h:79
uint16_t is_ifft
Definition: hpm_ffa_drv.h:76