17 #define HPM_DSP_HW_NDS32 1
19 #ifdef CONFIG_HPM_MATH_HAS_EXTRA_CONFIG
20 #include CONFIG_HPM_MATH_HAS_EXTRA_CONFIG
28 #define HPM_MATH_DSP_STATISTICS 1
29 #define HPM_MATH_DSP_BASIC 1
30 #define HPM_MATH_DSP_COMPLEX 1
31 #define HPM_MATH_DSP_CONTROLLER 1
32 #define HPM_MATH_DSP_DISTANCE 1
33 #define HPM_MATH_DSP_FILTERING 1
34 #define HPM_MATH_DSP_MATRIX 1
35 #define HPM_MATH_DSP_SVM 1
36 #define HPM_MATH_DSP_TRANSFORM 1
37 #define HPM_MATH_DSP_UTILS 1
38 #define HPM_MATH_DSP_SORT 1
40 #define HPM_MATH_NN_ACTIVATION 1
41 #define HPM_MATH_NN_TINYENGINE 1
42 #define HPM_MATH_NN_BASIC 1
43 #define HPM_MATH_NN_CONCATENATION 1
44 #define HPM_MATH_NN_CONVOLUTION 1
45 #define HPM_MATH_NN_CONNECTED 1
46 #define HPM_MATH_NN_POOLING 1
47 #define HPM_MATH_NN_SOFTMAX 1
48 #define HPM_MATH_NN_UTIL 1
50 #define HPM_DSP_CORE HPM_DSP_HW_NDS32
52 #define HPM_MATH_PI (3.14159265358979323846)
62 #define HPM_MATH_SW_FFT_CHECKLIST
71 #ifdef HPM_MATH_DSP_STATISTICS
79 #ifdef HPM_EN_MATH_DSP_LIB
85 #include "riscv_dsp_statistics_math.h"
97 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
100 tpt_max_f32(&res, index, src,
size);
103 return riscv_dsp_max_f32(src,
size, index);
109 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
110 return riscv_dsp_max_val_f32(src,
size);
122 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
125 tpt_max_q15(&res, index, src,
size);
128 return riscv_dsp_max_q15(src,
size, index);
142 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
145 tpt_max_q31(&res, index, src,
size);
148 return riscv_dsp_max_q31(src,
size, index);
162 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
165 tpt_max_q7(&res, index, src,
size);
168 return riscv_dsp_max_q7(src,
size, index);
182 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
183 return riscv_dsp_max_u8(src,
size, index);
197 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
200 tpt_min_f32(&res, index, src,
size);
203 return riscv_dsp_min_f32(src,
size, index);
217 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
220 tpt_min_q15(&res, index, src,
size);
223 return riscv_dsp_min_q15(src,
size, index);
237 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
240 tpt_min_q31(&res, index, src,
size);
243 return riscv_dsp_min_q31(src,
size, index);
257 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
260 tpt_min_q7(&res, index, src,
size);
263 return riscv_dsp_min_q7(src,
size, index);
277 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
278 return riscv_dsp_min_u8(src,
size, index);
291 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
294 tpt_mean_f32(&res, src,
size);
297 return riscv_dsp_mean_f32(src,
size);
315 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
318 tpt_mean_q15(&res, src,
size);
321 return riscv_dsp_mean_q15(src,
size);
339 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
342 tpt_mean_q31(&res, src,
size);
345 return riscv_dsp_mean_q31(src,
size);
363 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
366 tpt_mean_q7(&res, src,
size);
369 return riscv_dsp_mean_q7(src,
size);
385 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
386 return riscv_dsp_mean_u8(src,
size);
399 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
402 tpt_power_f32(&res, src,
size);
405 return riscv_dsp_pwr_f32(src,
size);
424 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
427 tpt_power_q15(&res, src,
size);
430 return riscv_dsp_pwr_q15(src,
size);
450 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
453 tpt_power_q31(&res, src,
size);
456 return riscv_dsp_pwr_q31(src,
size);
475 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
478 tpt_power_q7(&res, src,
size);
481 return riscv_dsp_pwr_q7(src,
size);
495 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
498 tpt_rms_f32(&res, src,
size);
501 return riscv_dsp_rms_f32(src,
size);
521 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
524 tpt_rms_q15(&res, src,
size);
527 return riscv_dsp_rms_q15(src,
size);
547 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
550 tpt_rms_q31(&res, src,
size);
553 return riscv_dsp_rms_q31(src,
size);
567 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
570 tpt_std_f32(&res, src,
size);
573 return riscv_dsp_std_f32(src,
size);
593 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
596 tpt_std_q15(&res, src,
size);
599 return riscv_dsp_std_q15(src,
size);
619 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
622 tpt_std_q31(&res, src,
size);
625 return riscv_dsp_std_q31(src,
size);
644 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
645 return riscv_dsp_std_u8(src,
size);
658 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
661 tpt_var_f32(&res, src,
size);
664 return riscv_dsp_var_f32(src,
size);
684 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
687 tpt_var_q15(&res, src,
size);
690 return riscv_dsp_var_q15(src,
size);
710 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
713 tpt_var_q31(&res, src,
size);
716 return riscv_dsp_var_q31(src,
size);
731 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
733 return tpt_entropy_f32(src,
size);
735 return riscv_dsp_entropy_f32(src,
size);
754 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
756 return tpt_relative_entropy_f32(src1, src2,
size);
758 return riscv_dsp_relative_entropy_f32(src1, src2,
size);
772 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
774 tpt_lse_f32(src,
size);
776 return riscv_dsp_lse_f32(src,
size);
792 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
794 return tpt_lse_dprod_f32(src1, src2,
size, buffer);
796 return riscv_dsp_lse_dprod_f32(src1, src2,
size, buffer);
812 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
813 return riscv_dsp_gaussian_naive_bayes_est_f32(instance, src, buf);
826 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
827 return riscv_dsp_absmax_f32(src,
size, index);
840 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
841 return riscv_dsp_absmax_q15(src,
size, index);
854 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
855 return riscv_dsp_absmax_q31(src,
size, index);
868 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
869 return riscv_dsp_absmax_q7(src,
size, index);
882 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
883 return riscv_dsp_absmin_f32(src,
size, index);
896 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
897 return riscv_dsp_absmin_q31(src,
size, index);
910 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
911 return riscv_dsp_absmin_q15(src,
size, index);
924 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
925 return riscv_dsp_absmin_q7(src,
size, index);
937 #ifdef HPM_MATH_DSP_BASIC
944 #ifdef HPM_EN_MATH_DSP_LIB
947 #include "tpt_math.h"
950 #include "riscv_dsp_basic_math.h"
961 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
963 tpt_abs_f32(dst, src,
size);
965 riscv_dsp_abs_f32(src, dst,
size);
981 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
983 tpt_abs_q31(dst, src,
size);
985 riscv_dsp_abs_q31(src, dst,
size);
1002 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1004 tpt_abs_q15(dst, src,
size);
1006 riscv_dsp_abs_q15(src, dst,
size);
1022 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1024 tpt_abs_q7(dst, src,
size);
1026 riscv_dsp_abs_q7(src, dst,
size);
1041 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1043 tpt_add_f32(dst, src1, src2,
size);
1045 riscv_dsp_add_f32(src1, src2, dst,
size);
1061 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1063 tpt_add_q31(dst, src1, src2,
size);
1065 riscv_dsp_add_q31(src1, src2, dst,
size);
1081 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1083 tpt_add_q15(dst, src1, src2,
size);
1085 riscv_dsp_add_q15(src1, src2, dst,
size);
1101 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1103 tpt_add_q7(dst, src1, src2,
size);
1105 riscv_dsp_add_q7(src1, src2, dst,
size);
1121 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1123 tpt_add_u8_u16(dst, src1, src2,
size);
1125 riscv_dsp_add_u8_u16(src1, src2, dst,
size);
1140 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1142 tpt_sub_f32(dst, src1, src2,
size);
1144 riscv_dsp_sub_f32(src1, src2, dst,
size);
1160 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1162 tpt_sub_q31(dst, src1, src2,
size);
1164 riscv_dsp_sub_q31(src1, src2, dst,
size);
1180 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1182 tpt_sub_q15(dst, src1, src2,
size);
1184 riscv_dsp_sub_q15(src1, src2, dst,
size);
1200 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1202 tpt_sub_q7(dst, src1, src2,
size);
1204 riscv_dsp_sub_q7(src1, src2, dst,
size);
1220 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1221 riscv_dsp_sub_u8_q7(src1, src2, dst,
size);
1235 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1237 tpt_mult_f32(dst, src1, src2,
size);
1239 riscv_dsp_mul_f32(src1, src2, dst,
size);
1255 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1257 tpt_mult_q31(dst, src1, src2,
size);
1259 riscv_dsp_mul_q31(src1, src2, dst,
size);
1275 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1277 tpt_mult_q15(dst, src1, src2,
size);
1279 riscv_dsp_mul_q15(src1, src2, dst,
size);
1295 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1297 tpt_mult_q7(dst, src1, src2,
size);
1299 riscv_dsp_mul_q7(src1, src2, dst,
size);
1315 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1316 riscv_dsp_mul_u8_u16(src1, src2, dst,
size);
1330 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1332 tpt_div_f32(dst, src1, src2,
size);
1334 riscv_dsp_div_f32(src1, src2, dst,
size);
1347 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1349 return tpt_div_q31(src1, src2);
1351 return riscv_dsp_div_q31(src1, src2);
1364 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1366 return tpt_div_s64_u32(src1, src2);
1368 return riscv_dsp_div_s64_u32(src1, src2);
1381 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1383 return tpt_div_u64_u32(src1, src2);
1385 return riscv_dsp_div_u64_u32(src1, src2);
1399 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1401 tpt_negate_f32(dst, src,
size);
1403 riscv_dsp_neg_f32(src, dst,
size);
1419 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1421 tpt_negate_q31(dst, src,
size);
1423 riscv_dsp_neg_q31(src, dst,
size);
1439 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1441 tpt_negate_q15(dst, src,
size);
1443 riscv_dsp_neg_q15(src, dst,
size);
1459 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1461 tpt_negate_q7(dst, src,
size);
1463 riscv_dsp_neg_q7(src, dst,
size);
1478 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1481 tpt_dot_prod_f32(&res, src1, src2,
size);
1484 return riscv_dsp_dprod_f32(src1, src2,
size);
1503 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1506 tpt_dot_prod_q31(&res, src1, src2,
size);
1509 return riscv_dsp_dprod_q31(src1, src2,
size);
1526 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1529 tpt_dot_prod_q15(&res, src1, src2,
size);
1532 return riscv_dsp_dprod_q15(src1, src2,
size);
1550 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1551 return riscv_dsp_dprod_u8xq15(src1, src2,
size);
1568 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1571 tpt_dot_prod_q7(&res, src1, src2,
size);
1574 return riscv_dsp_dprod_q7(src1, src2,
size);
1591 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1592 return riscv_dsp_dprod_q7xq15(src1, src2,
size);
1608 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1609 return riscv_dsp_dprod_u8(src1, src2,
size);
1623 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1625 tpt_offset_f32(dst, src, offset,
size);
1627 riscv_dsp_offset_f32(src, offset, dst,
size);
1643 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1645 tpt_offset_q31(dst, src, offset,
size);
1647 riscv_dsp_offset_q31(src, offset, dst,
size);
1663 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1665 tpt_offset_q15(dst, src, offset,
size);
1667 riscv_dsp_offset_q15(src, offset, dst,
size);
1683 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1685 tpt_offset_q7(dst, src, offset,
size);
1687 riscv_dsp_offset_q7(src, offset, dst,
size);
1703 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1704 riscv_dsp_offset_u8(src, offset, dst,
size);
1718 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1720 tpt_scale_f32(dst, src, scale,
size);
1722 riscv_dsp_scale_f32(src, scale, dst,
size);
1741 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1743 tpt_scale_q31(dst, src, scalefract, shift,
size);
1745 riscv_dsp_scale_q31(src, scalefract, shift, dst,
size);
1764 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1766 tpt_scale_q15(dst, src, scalefract, shift,
size);
1768 riscv_dsp_scale_q15(src, scalefract, shift, dst,
size);
1787 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1789 tpt_scale_q7(dst, src, scalefract, shift,
size);
1791 riscv_dsp_scale_q7(src, scalefract, shift, dst,
size);
1809 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1810 riscv_dsp_scale_u8(src, scalefract, shift, dst,
size);
1827 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1829 tpt_shift_q15(dst, src, shift,
size);
1831 riscv_dsp_shift_q15(src, shift, dst,
size);
1848 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1850 tpt_shift_q31(dst, src, shift,
size);
1852 riscv_dsp_shift_q31(src, shift, dst,
size);
1869 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1871 tpt_shift_q7(dst, src, shift,
size);
1873 riscv_dsp_shift_q7(src, shift, dst,
size);
1890 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1891 riscv_dsp_shift_u8(src, shift, dst,
size);
1910 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1912 tpt_clip_f32(dst, src, low, high,
size);
1914 riscv_dsp_clip_f32(src, dst, low, high,
size);
1929 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1931 tpt_clip_q31(dst, src, low, high,
size);
1933 riscv_dsp_clip_q31(src, dst, low, high,
size);
1948 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1950 tpt_clip_q15(dst, src, low, high,
size);
1952 riscv_dsp_clip_q15(src, dst, low, high,
size);
1967 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1969 tpt_clip_q7(dst, src, low, high,
size);
1971 riscv_dsp_clip_q7(src, dst, low, high,
size);
2000 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2002 tpt_and_32bit(dst, src1, src2,
size);
2004 riscv_dsp_and_u32(src1, src2, dst,
size);
2019 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2021 tpt_and_16bit(dst, src1, src2,
size);
2023 riscv_dsp_and_u16(src1, src2, dst,
size);
2038 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2040 tpt_and_8bit(dst, src1, src2,
size);
2042 riscv_dsp_and_u8(src1, src2, dst,
size);
2071 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2073 tpt_or_32bit(dst, src1, src2,
size);
2075 riscv_dsp_or_u32(src1, src2, dst,
size);
2089 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2091 tpt_or_16bit(dst, src1, src2,
size);
2093 riscv_dsp_or_u16(src1, src2, dst,
size);
2107 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2109 tpt_or_8bit(dst, src1, src2,
size);
2111 riscv_dsp_or_u8(src1, src2, dst,
size);
2140 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2142 tpt_xor_32bit(dst, src1, src2,
size);
2144 riscv_dsp_xor_u32(src1, src2, dst,
size);
2158 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2160 tpt_xor_16bit(dst, src1, src2,
size);
2162 riscv_dsp_xor_u16(src1, src2, dst,
size);
2176 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2178 tpt_xor_8bit(dst, src1, src2,
size);
2180 riscv_dsp_xor_u8(src1, src2, dst,
size);
2208 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2210 tpt_not_32bit(dst, src,
size);
2212 riscv_dsp_not_u32(src, dst,
size);
2225 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2227 tpt_not_16bit(dst, src,
size);
2229 riscv_dsp_not_u16(src, dst,
size);
2242 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2244 tpt_not_8bit(dst, src,
size);
2246 riscv_dsp_not_u8(src, dst,
size);
2292 #ifdef HPM_MATH_DSP_COMPLEX
2303 #ifdef HPM_EN_MATH_DSP_LIB
2306 #include "tpt_math.h"
2309 #include "riscv_dsp_complex_math.h"
2320 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2322 tpt_cmplx_conj_f32(dst, src,
size);
2324 riscv_dsp_cconj_f32(src, dst,
size);
2340 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2342 tpt_cmplx_conj_q15(dst, src,
size);
2344 riscv_dsp_cconj_q15(src, dst,
size);
2360 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2362 tpt_cmplx_conj_q31(dst, src,
size);
2364 riscv_dsp_cconj_q31(src, dst,
size);
2379 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2380 riscv_dsp_cdprod_f32(src1, src2,
size, dst);
2394 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2396 tpt_cmplx_dot_prod_f32(rout, iout, src1, src2,
size);
2398 riscv_dsp_cdprod_typ2_f32(src1, src2,
size, rout, iout);
2415 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2416 riscv_dsp_cdprod_q15(src1, src2,
size, dst);
2433 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2434 riscv_dsp_cdprod_typ2_q15(src1, src2,
size, rout, iout);
2450 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2451 riscv_dsp_cdprod_q31(src1, src2,
size, dst);
2469 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2471 tpt_cmplx_dot_prod_q31(rout, iout, src1, src2,
size);
2473 riscv_dsp_cdprod_typ2_q31(src1, src2,
size, rout, iout);
2487 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2489 tpt_cmplx_mag_f32(dst, src,
size);
2491 riscv_dsp_cmag_f32(src, dst,
size);
2507 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2509 tpt_cmplx_mag_q15(dst, src,
size);
2511 riscv_dsp_cmag_q15(src, dst,
size);
2527 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2529 tpt_cmplx_mag_q31(dst, src,
size);
2531 riscv_dsp_cmag_q31(src, dst,
size);
2546 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2548 tpt_cmplx_mag_squared_f32(dst, src,
size);
2550 riscv_dsp_cmag_sqr_f32(src, dst,
size);
2566 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2568 tpt_cmplx_mag_squared_q15(dst, src,
size);
2570 riscv_dsp_cmag_sqr_q15(src, dst,
size);
2586 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2588 tpt_cmplx_mag_squared_q31(dst, src,
size);
2590 riscv_dsp_cmag_sqr_q31(src, dst,
size);
2605 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2607 tpt_cmplx_mult_cmplx_f32(dst, src1, src2,
size);
2609 riscv_dsp_cmul_f32(src1, src2, dst,
size);
2626 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2628 tpt_cmplx_mult_cmplx_q15(dst, src1, src2,
size);
2630 riscv_dsp_cmul_q15(src1, src2, dst,
size);
2647 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2649 tpt_cmplx_mult_cmplx_q31(dst, src1, src2,
size);
2651 riscv_dsp_cmul_q31(src1, src2, dst,
size);
2666 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2668 tpt_cmplx_mult_real_f32(dst, src, real,
size);
2670 riscv_dsp_cmul_real_f32(src, real, dst,
size);
2686 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2688 tpt_cmplx_mult_real_q15(dst, src, real,
size);
2690 riscv_dsp_cmul_real_q15(src, real, dst,
size);
2706 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2708 tpt_cmplx_mult_real_q31(dst, src, real,
size);
2710 riscv_dsp_cmul_real_q31(src, real, dst,
size);
2722 #ifdef HPM_MATH_DSP_CONTROLLER
2729 #ifdef HPM_EN_MATH_DSP_LIB
2731 #include "riscv_dsp_controller_math.h"
2743 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2744 riscv_dsp_clarke_f32(a, b, alpha, beta);
2759 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2760 riscv_dsp_clarke_q31(a, b, alpha, beta);
2774 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2775 riscv_dsp_inv_clarke_f32(alpha, beta, a, b);
2790 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2791 riscv_dsp_inv_clarke_q31(alpha, beta, a, b);
2805 static inline void hpm_dsp_park_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b, float32_t sin, float32_t cos)
2807 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2808 riscv_dsp_park_f32(alpha, beta, a, b, sin, cos);
2824 static inline void hpm_dsp_park_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b, q31_t sin, q31_t cos)
2826 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2827 riscv_dsp_park_q31(alpha, beta, a, b, sin, cos);
2841 static inline void hpm_dsp_inv_park_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta, float32_t sin, float32_t cos)
2843 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2844 riscv_dsp_inv_park_f32(a, b, alpha, beta, sin, cos);
2861 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2862 riscv_dsp_inv_park_q31(a, b, alpha, beta, sin, cos);
2874 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2875 return riscv_dsp_pid_f32(instance, src);
2892 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2893 riscv_dsp_init_pid_f32(instance, set);
2906 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2907 return riscv_dsp_pid_q31(instance, src);
2925 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2926 riscv_dsp_init_pid_q31(instance, set);
2932 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2933 return riscv_dsp_pid_q15(instance, src);
2950 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2951 riscv_dsp_init_pid_q15(instance, set);
2962 #ifdef HPM_MATH_DSP_DISTANCE
2969 #ifdef HPM_EN_MATH_DSP_LIB
2971 #include "tpt_math.h"
2973 #include "riscv_dsp_distance_math.h"
2985 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2987 return tpt_braycurtis_distance_f32(src1, src2,
size);
2989 return riscv_dsp_dist_bray_curtis_f32(src1, src2,
size);
3003 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3005 return tpt_canberra_distance_f32(src1, src2,
size);
3007 return riscv_dsp_dist_canberra_f32(src1, src2,
size);
3021 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3023 return tpt_chebyshev_distance_f32(src1, src2,
size);
3025 return riscv_dsp_dist_chebyshev_f32(src1, src2,
size);
3039 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3041 return tpt_cityblock_distance_f32(src1, src2,
size);
3043 return riscv_dsp_dist_city_block_f32(src1, src2,
size);
3057 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3059 return tpt_correlation_distance_f32(src1, src2,
size);
3061 return riscv_dsp_dist_corr_f32(src1, src2,
size);
3075 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3077 return tpt_cosine_distance_f32(src1, src2,
size);
3079 return riscv_dsp_dist_cos_f32(src1, src2,
size);
3093 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3095 return tpt_euclidean_distance_f32(src1, src2,
size);
3097 return riscv_dsp_dist_euclidean_f32(src1, src2,
size);
3111 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3113 return tpt_jensenshannon_distance_f32(src1, src2,
size);
3115 return riscv_dsp_dist_jensen_shannon_f32(src1, src2,
size);
3130 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3132 return tpt_minkowski_distance_f32(src1, src2, order,
size);
3134 return riscv_dsp_dist_minkowski_f32(src1, src2, order,
size);
3148 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3150 return tpt_dice_distance(src1, src2, numofbool);
3152 return riscv_dsp_bdist_dice_u32_f32(src1, src2, numofbool);
3166 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3168 return tpt_hamming_distance(src1, src2, numofbool);
3170 return riscv_dsp_bdist_hamming_u32_f32(src1, src2, numofbool);
3184 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3186 return tpt_jaccard_distance(src1, src2, numofbool);
3188 return riscv_dsp_bdist_jaccard_u32_f32(src1, src2, numofbool);
3202 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3204 return tpt_kulsinski_distance(src1, src2, numofbool);
3206 return riscv_dsp_bdist_kulsinski_u32_f32(src1, src2, numofbool);
3220 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3222 return tpt_sokalmichener_distance(src1, src2, numofbool);
3224 return riscv_dsp_bdist_sokal_michener_u32_f32(src1, src2, numofbool);
3238 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3240 return tpt_sokalsneath_distance(src1, src2, numofbool);
3242 return riscv_dsp_bdist_sokal_sneath_u32_f32(src1, src2, numofbool);
3256 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3258 return tpt_rogerstanimoto_distance(src1, src2, numofbool);
3260 return riscv_dsp_bdist_rogers_tanimoto_u32_f32(src1, src2, numofbool);
3274 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3276 return tpt_yule_distance(src1, src2, numofbool);
3278 return riscv_dsp_bdist_yule_u32_f32(src1, src2, numofbool);
3292 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3294 return tpt_russellrao_distance(src1, src2, numofbool);
3296 return riscv_dsp_bdist_russell_rao_u32_f32(src1, src2, numofbool);
3308 #ifdef HPM_MATH_DSP_FILTERING
3315 #ifdef HPM_EN_MATH_DSP_LIB
3318 #include "tpt_math.h"
3321 #include "riscv_dsp_filtering_math.h"
3330 static inline void hpm_dsp_fir_f32(
const riscv_dsp_fir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t
size)
3332 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3333 riscv_dsp_fir_f32(instance, src, dst,
size);
3351 static inline void hpm_dsp_fir_q31(
const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t
size)
3353 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3354 riscv_dsp_fir_q31(instance, src, dst,
size);
3374 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3375 riscv_dsp_fir_fast_q31(instance, src, dst,
size);
3392 static inline void hpm_dsp_fir_q15(
const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t
size)
3394 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3395 riscv_dsp_fir_q15(instance, src, dst,
size);
3413 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3414 riscv_dsp_fir_fast_q15(instance, src, dst,
size);
3430 static inline void hpm_dsp_fir_q7(
const riscv_dsp_fir_q7_t *instance, q7_t *src, q7_t *dst, uint32_t
size)
3432 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3433 riscv_dsp_fir_q7(instance, src, dst,
size);
3445 static inline void hpm_dsp_lfir_f32(
const riscv_dsp_lfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t
size)
3447 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3448 riscv_dsp_lfir_f32(instance, src, dst,
size);
3462 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3463 riscv_dsp_lfir_q15(instance, src, dst,
size);
3481 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3482 riscv_dsp_lfir_q31(instance, src, dst,
size);
3487 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3488 riscv_dsp_dcmfir_f32(instance, src, dst,
size);
3493 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3494 riscv_dsp_dcmfir_q15(instance, src, dst,
size);
3499 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3500 riscv_dsp_dcmfir_q31(instance, src, dst,
size);
3505 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3506 riscv_dsp_dcmfir_fast_q31(instance, src, dst,
size);
3511 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3512 riscv_dsp_dcmfir_fast_q15(instance, src, dst,
size);
3517 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3518 riscv_dsp_upsplfir_f32(instance, src, dst,
size);
3523 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3524 riscv_dsp_upsplfir_q15(instance, src, dst,
size);
3529 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3530 riscv_dsp_upsplfir_q31(instance, src, dst,
size);
3533 static inline void hpm_dsp_spafir_f32(riscv_dsp_spafir_f32_t *instance, float32_t *src, float32_t *dst, float32_t *buf, uint32_t
size)
3535 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3536 riscv_dsp_spafir_f32(instance, src, dst, buf,
size);
3539 static inline void hpm_dsp_spafir_q15(riscv_dsp_spafir_q15_t *instance, q15_t *src, q15_t *dst, q15_t *buf1, q31_t *buf2, uint32_t
size)
3541 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3542 riscv_dsp_spafir_q15(instance, src, dst, buf1, buf2,
size);
3547 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3548 riscv_dsp_spafir_q31(instance, src, dst, buf,
size);
3551 static inline void hpm_dsp_spafir_q7(riscv_dsp_spafir_q7_t *instance, q7_t *src, q7_t *dst, q7_t *buf1, q31_t *buf2, uint32_t
size)
3553 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3554 riscv_dsp_spafir_q7(instance, src, dst, buf1, buf2,
size);
3572 static inline void hpm_dsp_lms_f32(
const riscv_dsp_lms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t
size)
3574 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3575 riscv_dsp_lms_f32(instance, src, ref, dst, err,
size);
3595 static inline void hpm_dsp_lms_q31(
const riscv_dsp_lms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t
size)
3597 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3598 riscv_dsp_lms_q31(instance, src, ref, dst, err,
size);
3618 static inline void hpm_dsp_lms_q15(
const riscv_dsp_lms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t
size)
3620 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3621 riscv_dsp_lms_q15(instance, src, ref, dst, err,
size);
3629 static inline void hpm_dsp_nlms_f32(riscv_dsp_nlms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t
size)
3631 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3632 riscv_dsp_nlms_f32(instance, src, ref, dst, err,
size);
3641 static inline void hpm_dsp_nlms_q31(riscv_dsp_nlms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t
size)
3643 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3644 riscv_dsp_nlms_q31(instance, src, ref, dst, err,
size);
3649 static inline void hpm_dsp_nlms_q15(riscv_dsp_nlms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t
size)
3651 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3652 riscv_dsp_nlms_q15(instance, src, ref, dst, err,
size);
3667 static inline void hpm_dsp_conv_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
3669 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3671 tpt_conv_f32(dst, src1, len1, src2, len2);
3673 riscv_dsp_conv_f32(src1, len1, src2, len2, dst);
3693 static inline void hpm_dsp_conv_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
3695 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3697 tpt_conv_q15(dst, src1, len1, src2, len2);
3699 riscv_dsp_conv_q15(src1, len1, src2, len2, dst);
3721 static inline void hpm_dsp_conv_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
3723 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3725 tpt_conv_q31(dst, src1, len1, src2, len2);
3727 riscv_dsp_conv_q31(src1, len1, src2, len2, dst);
3747 static inline void hpm_dsp_conv_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
3749 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3751 tpt_conv_q7(dst, src1, len1, src2, len2);
3753 riscv_dsp_conv_q7(src1, len1, src2, len2, dst);
3772 static inline int32_t
hpm_dsp_conv_partial_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst, uint32_t startindex, uint32_t
size)
3774 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3776 return tpt_conv_partial_f32(dst, src1, len1, src2, len2, startindex,
size);
3778 return riscv_dsp_conv_partial_f32(src1, len1, src2, len2, dst, startindex,
3800 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3802 return tpt_conv_partial_q15(dst, src1, len1, src2, len2, startindex,
size);
3804 return riscv_dsp_conv_partial_q15(src1, len1, src2, len2, dst, startindex,
3826 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3828 return tpt_conv_partial_q31(dst, src1, len1, src2, len2, startindex,
size);
3830 return riscv_dsp_conv_partial_q31(src1, len1, src2, len2, dst, startindex,
3852 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3854 return tpt_conv_partial_q7(dst, src1, len1, src2, len2, startindex,
size);
3856 return riscv_dsp_conv_partial_q7(src1, len1, src2, len2, dst, startindex,
3873 static inline void hpm_dsp_corr_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
3875 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3877 tpt_correlate_f32(dst, src1, len1, src2, len2);
3879 riscv_dsp_corr_f32(src1, len1, src2, len2, dst);
3899 static inline void hpm_dsp_corr_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
3901 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3903 tpt_correlate_q15(dst, src1, len1, src2, len2);
3905 riscv_dsp_corr_q15(src1, len1, src2, len2, dst);
3929 static inline void hpm_dsp_corr_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
3931 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3933 tpt_correlate_q31(dst, src1, len1, src2, len2);
3935 riscv_dsp_corr_q31(src1, len1, src2, len2, dst);
3955 static inline void hpm_dsp_corr_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
3957 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3959 tpt_correlate_q7(dst, src1, len1, src2, len2);
3961 riscv_dsp_corr_q7(src1, len1, src2, len2, dst);
3967 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3968 riscv_dsp_bq_df1_f32(instance, src, dst,
size);
3973 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3974 riscv_dsp_bq_df1_q15(instance, src, dst,
size);
3979 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3980 riscv_dsp_bq_df1_fast_q15(instance, src, dst,
size);
3985 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3986 riscv_dsp_bq_df1_q31(instance, src, dst,
size);
3991 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3992 riscv_dsp_bq_df1_fast_q31(instance, src, dst,
size);
3997 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3998 riscv_dsp_bq_df1_32x64_q31(instance, src, dst,
size);
4003 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4004 riscv_dsp_bq_df2T_f32(instance, src, dst,
size);
4009 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4010 riscv_dsp_bq_df2T_f64(instance, src, dst,
size);
4015 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4016 riscv_dsp_bq_stereo_df2T_f32(instance, src, dst,
size);
4020 static inline void hpm_dsp_liir_f32(
const riscv_dsp_liir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t
size)
4022 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4023 riscv_dsp_liir_f32(instance, src, dst,
size);
4028 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4029 riscv_dsp_liir_q31(instance, src, dst,
size);
4034 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4035 riscv_dsp_liir_fast_q31(instance, src, dst,
size);
4040 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4041 riscv_dsp_liir_q15(instance, src, dst,
size);
4046 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4047 riscv_dsp_liir_fast_q15(instance, src, dst,
size);
4058 #ifdef HPM_MATH_DSP_MATRIX
4083 #ifdef HPM_EN_MATH_DSP_LIB
4085 #include "tpt_math.h"
4087 #include "riscv_dsp_matrix_math.h"
4098 static inline void hpm_dsp_mat_add_f32(
const float32_t *src1,
const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
4100 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4102 tpt_mat_add_f32(dst, src1, src2, row, col);
4104 riscv_dsp_mat_add_f32(src1, src2, dst, row, col);
4117 static inline void hpm_dsp_mat_add_f64(
const float64_t *src1,
const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col)
4119 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4121 tpt_mat_add_f64(dst, src1, src2, row, col);
4123 riscv_dsp_mat_add_f64(src1, src2, dst, row, col);
4138 static inline void hpm_dsp_mat_add_q15(
const q15_t *src1,
const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
4140 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4142 tpt_mat_add_q15(dst, src1, src2, row, col);
4144 riscv_dsp_mat_add_q15(src1, src2, dst, row, col);
4159 static inline void hpm_dsp_mat_add_q31(
const q31_t *src1,
const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
4161 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4163 tpt_mat_add_q31(dst, src1, src2, row, col);
4165 riscv_dsp_mat_add_q31(src1, src2, dst, row, col);
4180 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4182 return tpt_mat_inverse_f32(dst, src,
size);
4184 return riscv_dsp_mat_inv_f32(src, dst,
size);
4190 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4192 return tpt_mat_inverse_f64(dst, src,
size);
4194 return riscv_dsp_mat_inv_f64(src, dst,
size);
4209 static inline void hpm_dsp_mat_mul_f32(
const float32_t *src1,
const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4211 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4213 return tpt_mat_mult_f32(dst, src1, src2, row, col, col2);
4215 riscv_dsp_mat_mul_f32(src1, src2, dst, row, col, col2);
4220 static inline void hpm_dsp_mat_mul_f64(
const float64_t *src1,
const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4222 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4224 return tpt_mat_mult_f64(dst, src1, src2, row, col, col2);
4226 riscv_dsp_mat_mul_f64(src1, src2, dst, row, col, col2);
4240 static inline void hpm_dsp_cmat_mul_f32(
const float32_t *src1,
const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4242 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4244 return tpt_mat_cmplx_mult_f32(dst, src1, src2, row, col, col2);
4246 riscv_dsp_cmat_mul_f32(src1, src2, dst, row, col, col2);
4267 static inline void hpm_dsp_mat_mul_q15(
const q15_t *src1,
const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4269 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4271 return tpt_mat_mult_q15(dst, src1, src2, row, col, col2);
4273 riscv_dsp_mat_mul_q15(src1, src2, dst, row, col, col2);
4277 static inline void hpm_dsp_mat_mul_fast_q15(
const q15_t *src1,
const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4279 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4281 return tpt_mat_mult_q15(dst, src1, src2, row, col, col2);
4283 riscv_dsp_mat_mul_fast_q15(src1, src2, dst, row, col, col2);
4304 static inline void hpm_dsp_cmat_mul_q15(
const q15_t *src1,
const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4306 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4308 return tpt_mat_cmplx_mult_q15(dst, src1, src2, row, col, col2);
4310 riscv_dsp_cmat_mul_q15(src1, src2, dst, row, col, col2);
4331 static inline void hpm_dsp_mat_mul_q31(
const q31_t *src1,
const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4333 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4335 return tpt_mat_mult_q31(dst, src1, src2, row, col, col2);
4337 riscv_dsp_mat_mul_q31(src1, src2, dst, row, col, col2);
4341 static inline void hpm_dsp_mat_mul_fast_q31(
const q31_t *src1,
const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4343 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4345 return tpt_mat_mult_q31(dst, src1, src2, row, col, col2);
4347 riscv_dsp_mat_mul_fast_q31(src1, src2, dst, row, col, col2);
4368 static inline void hpm_dsp_cmat_mul_q31(
const q31_t *src1,
const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4370 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4372 return tpt_mat_cmplx_mult_q31(dst, src1, src2, row, col, col2);
4374 riscv_dsp_cmat_mul_q31(src1, src2, dst, row, col, col2);
4395 static inline void hpm_dsp_mat_mul_q7(
const q7_t *src1,
const q7_t *src2, q7_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4397 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4398 riscv_dsp_mat_mul_q7(src1, src2, dst, row, col, col2);
4419 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4421 tpt_mat_mul_mxv_q7(dst, src1, src2, col, col2);
4423 riscv_dsp_mat_mul_vxm_q7(src1, src2, dst, col, col2);
4433 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4434 return riscv_dsp_mat_pwr2_cache_f64(src, dst,
size);
4447 static inline void hpm_dsp_mat_scale_f32(
const float32_t *src, float32_t scale, float32_t *dst, uint32_t row, uint32_t col)
4449 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4451 tpt_mat_scale_f32(dst, src, row, col, scale);
4453 riscv_dsp_mat_scale_f32(src, scale, dst, row, col);
4472 static inline void hpm_dsp_mat_scale_q15(
const q15_t *src, q15_t scale_fract, int32_t shift, q15_t *dst, uint32_t row, uint32_t col)
4474 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4476 tpt_mat_scale_q15(dst, src, row, col, scale_fract, shift);
4478 riscv_dsp_mat_scale_q15(src, scale_fract, shift, dst, row, col);
4497 static inline void hpm_dsp_mat_scale_q31(
const q31_t *src, q31_t scale_fract, int32_t shift, q31_t *dst, uint32_t row, uint32_t col)
4499 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4501 tpt_mat_scale_q31(dst, src, row, col, scale_fract, shift);
4503 riscv_dsp_mat_scale_q31(src, scale_fract, shift, dst, row, col);
4520 float64_t *dst, uint32_t row, uint32_t col)
4522 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4524 tpt_mat_sub_f64(dst, src1, src2, row, col);
4526 riscv_dsp_mat_sub_f64(src1, src2, dst, row, col);
4539 static inline void hpm_dsp_mat_sub_f32(
const float32_t *src1,
const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
4541 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4543 tpt_mat_sub_f32(dst, src1, src2, row, col);
4545 riscv_dsp_mat_sub_f32(src1, src2, dst, row, col);
4560 static inline void hpm_dsp_mat_sub_q15(
const q15_t *src1,
const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
4562 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4564 tpt_mat_sub_q15(dst, src1, src2, row, col);
4566 riscv_dsp_mat_sub_q15(src1, src2, dst, row, col);
4581 static inline void hpm_dsp_mat_sub_q31(
const q31_t *src1,
const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
4583 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4585 tpt_mat_sub_q31(dst, src1, src2, row, col);
4587 riscv_dsp_mat_sub_q31(src1, src2, dst, row, col);
4604 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4606 tpt_mat_trans_f64(dst, src, row, col);
4608 riscv_dsp_mat_trans_f64(src, dst, row, col);
4622 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4623 riscv_dsp_mat_trans_f32(src, dst, row, col);
4636 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4638 tpt_mat_trans_q15(dst, src, row, col);
4640 riscv_dsp_mat_trans_q15(src, dst, row, col);
4654 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4656 tpt_mat_trans_q31(dst, src, row, col);
4658 riscv_dsp_mat_trans_q31(src, dst, row, col);
4672 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4673 riscv_dsp_mat_trans_u8(src, dst, row, col);
4687 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4688 riscv_dsp_mat_trans_q7(src, dst, row, col);
4724 q31_t * dst, uint32_t size1, uint32_t size2)
4726 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4728 tpt_mat_oprod_q31(dst, src1, src2, size1, size2);
4730 riscv_dsp_mat_oprod_q31(src1, src2, dst, size1, size2);
4758 float32_t *dst, uint32_t row, uint32_t col)
4760 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4762 tpt_mat_mul_mxv_f32(dst, src1, src2, row, col);
4764 riscv_dsp_mat_mul_mxv_f32(src1, src2, dst, row, col);
4778 q15_t *dst, uint32_t row, uint32_t col)
4780 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4782 tpt_mat_mul_mxv_q15(dst, src1, src2, row, col);
4784 riscv_dsp_mat_mul_mxv_q15(src1, src2, dst, row, col);
4798 q31_t *dst, uint32_t row, uint32_t col)
4800 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4802 tpt_mat_mul_mxv_q31(dst, src1, src2, row, col);
4804 riscv_dsp_mat_mul_mxv_q31(src1, src2, dst, row, col);
4818 q7_t *dst, uint32_t row, uint32_t col)
4820 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4822 tpt_mat_mul_mxv_q7(dst, src1, src2, row, col);
4824 riscv_dsp_mat_mul_mxv_q7(src1, src2, dst, row, col);
4837 #ifdef HPM_MATH_DSP_SVM
4845 #ifdef HPM_EN_MATH_DSP_LIB
4847 #include "tpt_math.h"
4849 #include "riscv_dsp_svm_math.h"
4859 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4860 riscv_dsp_svm_linear_est_f32(instance, src, result);
4873 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4874 riscv_dsp_svm_sigmoid_est_f32(instance, src, result);
4887 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4888 riscv_dsp_svm_rbf_est_f32(instance, src, result);
4901 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4902 riscv_dsp_svm_poly_est_f32(instance, src, result);
4914 #ifdef HPM_MATH_DSP_TRANSFORM
4921 #ifdef HPM_EN_MATH_DSP_LIB
4923 #include "tpt_math.h"
4925 #include "riscv_dsp_transform_math.h"
4957 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4959 return tpt_cfft_f32(src, m,
false);
4961 return riscv_dsp_cfft_rd2_f32(src, m);
4975 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4977 return tpt_cfft_f32(src, m,
true);
4979 return riscv_dsp_cifft_rd2_f32(src, m);
5000 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5002 return tpt_cfft_q15(src, m,
false);
5004 return riscv_dsp_cfft_rd2_q15(src, m);
5024 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5026 return tpt_cfft_q15(src, m,
true);
5028 return riscv_dsp_cifft_rd2_q15(src, m);
5048 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5050 return tpt_cfft_q31(src, m,
false);
5052 return riscv_dsp_cfft_rd2_q31(src, m);
5073 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5075 return tpt_cfft_q31(src, m,
true);
5077 return riscv_dsp_cifft_rd2_q31(src, m);
5114 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5116 return tpt_cfft_f32(src, m,
false);
5118 return riscv_dsp_cfft_rd4_f32(src, m);
5133 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5135 return tpt_cfft_f32(src, m,
true);
5137 return riscv_dsp_cifft_rd4_f32(src, m);
5157 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5159 return tpt_cfft_q15(src, m,
false);
5161 return riscv_dsp_cfft_rd4_q15(src, m);
5181 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5183 return tpt_cfft_q15(src, m,
true);
5185 return riscv_dsp_cifft_rd4_q15(src, m);
5205 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5207 return tpt_cfft_q31(src, m,
false);
5209 return riscv_dsp_cfft_rd4_q31(src, m);
5229 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5231 return tpt_cfft_q31(src, m,
true);
5233 return riscv_dsp_cifft_rd4_q31(src, m);
5260 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5262 tpt_cfft_f32(src, m,
false);
5264 riscv_dsp_cfft_f32(src, m);
5277 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5279 tpt_cfft_f64(src, m,
false);
5281 riscv_dsp_cfft_f64(src, m);
5294 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5296 tpt_cfft_f32(src, m,
true);
5298 riscv_dsp_cifft_f32(src, m);
5311 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5313 tpt_cfft_f64(src, m,
true);
5315 riscv_dsp_cifft_f64(src, m);
5335 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5337 tpt_cfft_q15(src, m,
false);
5339 riscv_dsp_cfft_q15(src, m);
5358 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5360 tpt_cfft_q15(src, m,
true);
5362 riscv_dsp_cifft_q15(src, m);
5381 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5383 tpt_cfft_q31(src, m,
false);
5385 riscv_dsp_cfft_q31(src, m);
5404 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5406 tpt_cfft_q31(src, m,
true);
5408 riscv_dsp_cifft_q31(src, m);
5444 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5446 return tpt_rfft_f32(src, src, m,
false);
5448 return riscv_dsp_rfft_f32(src, m);
5462 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5463 return riscv_dsp_rfft_f64(src, m);
5476 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5477 return riscv_dsp_rifft_f32(src, m);
5490 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5491 return riscv_dsp_rifft_f64(src, m);
5510 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5511 return riscv_dsp_rfft_q15(src, m);
5530 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5531 return riscv_dsp_rifft_q15(src, m);
5550 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5551 return riscv_dsp_rfft_q31(src, m);
5570 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5571 return riscv_dsp_rifft_q31(src, m);
5595 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5596 riscv_dsp_dct_f32(src, m);
5608 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5609 riscv_dsp_idct_f32(src, m);
5627 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5628 riscv_dsp_dct_q15(src, m);
5646 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5647 riscv_dsp_idct_q15(src, m);
5665 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5666 riscv_dsp_dct_q31(src, m);
5684 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5685 riscv_dsp_idct_q31(src, m);
5709 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5710 riscv_dsp_dct4_f32(src, m);
5722 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5723 riscv_dsp_idct4_f32(src, m);
5741 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5742 riscv_dsp_dct4_q15(src, m);
5760 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5761 riscv_dsp_idct4_q15(src, m);
5779 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5780 riscv_dsp_dct4_q31(src, m);
5798 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5799 riscv_dsp_idct4_q31(src, m);
5818 #if defined(HPMSOC_HAS_HPMSDK_FFA) && defined(HPM_EN_MATH_DSP_LIB)
5821 #include "hpm_soc.h"
5834 static inline void hpm_ffa_cfft_q15(q15_t *src, uint32_t m)
5853 static inline void hpm_ffa_cfft_q31(q31_t *src, uint32_t m)
5865 #if defined(HPM_IP_FEATURE_FFA_FP32) && HPM_IP_FEATURE_FFA_FP32
5866 static inline void hpm_ffa_cfft_f32(
float *src, uint32_t m)
5876 ffa_set_coef_max_index(
HPM_FFA, 0);
5877 ffa_set_output_max_index(
HPM_FFA, 20);
5878 ffa_set_input_max_index(
HPM_FFA, 20 - m);
5890 static inline void hpm_ffa_cifft_q15(q15_t *src, uint32_t m)
5910 static inline void hpm_ffa_cifft_q31(q31_t *src, uint32_t m)
5922 #if defined(HPM_IP_FEATURE_FFA_FP32) && HPM_IP_FEATURE_FFA_FP32
5923 static inline void hpm_ffa_cifft_f32(
float *src, uint32_t m)
5933 ffa_set_coef_max_index(
HPM_FFA, 0x0);
5934 ffa_set_output_max_index(
HPM_FFA, 10);
5935 ffa_set_input_max_index(
HPM_FFA, 20);
5949 #ifdef HPM_MATH_DSP_UTILS
5959 #ifdef HPM_EN_MATH_DSP_LIB
5961 #include <tpt_math.h>
5963 #include "riscv_dsp_utils_math.h"
5967 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5968 return riscv_dsp_cos_f32(src);
5973 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5974 return riscv_dsp_cos_q31(src);
5979 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5980 return riscv_dsp_cos_q15(src);
5986 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5987 return riscv_dsp_sin_f32(src);
5991 #if defined (__riscv_zfh)
5996 static inline float16_t hpm_dsp_sin_f16(float16_t src)
5998 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5999 return riscv_dsp_sin_f16(src);
6006 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6007 return riscv_dsp_sin_q31(src);
6012 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6013 return riscv_dsp_sin_q15(src);
6020 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6021 return riscv_dsp_atan_f32(src);
6026 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6027 return riscv_dsp_atan_q31(src);
6032 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6033 return riscv_dsp_atan_q15(src);
6038 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6039 return riscv_dsp_atan2_f32(srcy, src2);
6044 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6045 return riscv_dsp_atan2_q15(srcy, src2);
6050 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6051 return riscv_dsp_atan2_q31(srcy, src2);
6063 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6064 return riscv_dsp_sqrt_f32(src);
6075 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6076 return riscv_dsp_sqrt_q31(src);
6087 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6088 return riscv_dsp_sqrt_q15(src);
6101 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6102 riscv_dsp_convert_f32_q15(src, dst,
size);
6114 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6116 tpt_f32_to_q31(dst, src,
size);
6118 riscv_dsp_convert_f32_q31(src, dst,
size);
6131 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6132 riscv_dsp_convert_f32_q7(src, dst,
size);
6144 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6145 riscv_dsp_convert_q15_f32(src, dst,
size);
6157 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6158 riscv_dsp_convert_q15_q31(src, dst,
size);
6170 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6171 riscv_dsp_convert_q15_q7(src, dst,
size);
6183 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6185 tpt_q31_to_f32(dst, src,
size);
6187 riscv_dsp_convert_q31_f32(src, dst,
size);
6200 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6201 riscv_dsp_convert_q31_q15(src, dst,
size);
6213 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6214 riscv_dsp_convert_q31_q7(src, dst,
size);
6226 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6227 riscv_dsp_convert_q7_f32(src, dst,
size);
6239 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6240 riscv_dsp_convert_q7_q15(src, dst,
size);
6252 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6253 riscv_dsp_convert_q7_q31(src, dst,
size);
6266 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6267 riscv_dsp_dup_f32(src, dst,
size);
6279 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6280 riscv_dsp_dup_q15(src, dst,
size);
6292 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6293 riscv_dsp_dup_q31(src, dst,
size);
6305 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6306 riscv_dsp_dup_q7(src, dst,
size);
6319 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6320 riscv_dsp_set_f32(val, dst,
size);
6332 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6333 riscv_dsp_set_q15(val, dst,
size);
6345 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6346 riscv_dsp_set_q31(val, dst,
size);
6358 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6359 riscv_dsp_set_q7(val, dst,
size);
6373 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6374 return riscv_dsp_weighted_sum_f32(src, weight,
size);
6387 static inline void hpm_dsp_barycenter_f32(
const float32_t *src,
const float32_t *weights, float32_t *out, uint32_t numofvec, uint32_t dimofvec)
6389 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6390 riscv_dsp_barycenter_f32(src, weights, out, numofvec, dimofvec);
6401 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6402 return riscv_dsp_exp_f32(src);
6406 #if defined (__riscv_zfh)
6412 static inline float16_t hpm_dsp_exp_f16(float16_t src)
6414 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6415 return riscv_dsp_exp_f16(src);
6427 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6428 return riscv_dsp_sigmoid_f32(src);
6432 #if defined (__riscv_zfh)
6438 static inline float16_t hpm_dsp_sigmoid_f16(float16_t src)
6440 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6441 return riscv_dsp_sigmoid_f16(src);
6453 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6454 return riscv_dsp_log_f32(src);
6458 #if defined (__riscv_zfh)
6464 static inline float16_t hpm_dsp_log_f16(float16_t src)
6466 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6467 return riscv_dsp_log_f16(src);
6481 #ifdef HPM_MATH_DSP_SORT
6493 #ifdef HPM_EN_MATH_DSP_LIB
6494 #include "riscv_dsp_sort_math.h"
6517 static inline void hpm_dsp_sort_init_f32(riscv_dsp_sort_f32_t * instance, riscv_dsp_sort_alg alg, riscv_dsp_sort_order order)
6519 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6520 riscv_dsp_sort_init_f32(instance, alg, order);
6564 static inline void hpm_dsp_sort_f32(
const riscv_dsp_sort_f32_t * instance,float32_t * src, float32_t * dst, uint32_t
size)
6566 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6567 riscv_dsp_sort_f32(instance, src, dst,
size);
6588 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6589 riscv_dsp_sort_merge_init_f32(instance, order, buf);
6628 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6629 riscv_dsp_sort_merge_f32(instance, src, dst,
size);
6636 #ifdef HPM_MATH_NN_TINYENGINE
6637 #ifdef HPM_EN_MATH_DSP_LIB
6639 #include "riscv_math_types.h"
6641 #include "riscv_simd_convert.h"
6643 #define LEFT_SHIFT(_shift) (_shift > 0 ? _shift : 0)
6644 #define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift)
6645 #define Q31_MAX ((q31_t)(0x7FFFFFFFL))
6646 #define Q31_MIN ((q31_t)(0x80000000L))
6653 (*pQ15)[0] = (val & 0x0FFFF);
6654 (*pQ15)[1] = (val >> 16) & 0x0FFFF;
6668 val = *(q31_t *)(*in_q15);
6685 q63_t mult = 1 << 30;
6687 if ((m1 < 0) ^ (m2 < 0)) {
6690 mult = mult + (q63_t)m1 * m2;
6691 result = mult / (1UL << 31);
6693 if ((m1 == m2) && (m1 == (int32_t)
Q31_MIN)) {
6711 const q31_t remainder_mask = (1l << exponent) - 1;
6712 int32_t remainder = remainder_mask & dividend;
6714 result = dividend >> exponent;
6715 q31_t threshold = remainder_mask >> 1;
6719 if (remainder > threshold) {
6726 __STATIC_FORCEINLINE q31_t
hpm_nn_requantize(
const q31_t val,
const q31_t multiplier,
const q31_t shift)
6741 val = *(q31_t *)(*in_q7);
6755 *out2 = __SXTB16_ROR(inA, 8);
6756 *out1 = __SXTB16(inA);
6765 __STATIC_FORCEINLINE
const q7_t *
read_and_pad(
const q7_t *source, q31_t *out1, q31_t *out2)
6768 q31_t inAbuf1 = __SXTB16_ROR(inA, 8);
6769 q31_t inAbuf2 = __SXTB16(inA);
6771 *out2 = __PKHTB(inAbuf1, inAbuf2, 16);
6772 *out1 = __PKHBT(inAbuf2, inAbuf1, 16);
6786 val = *(int32_t *)(*in_s8);
6800 int32_t out_q15x2_1;
6801 int32_t out_q15x2_2;
6804 block_cnt = block_size >> 2;
6807 const int32_t offset_q15x2 = __PKHBT(offset, offset, 16);
6808 while (block_cnt > 0) {
6813 in_q15x2_1 = __SXTAB16(offset_q15x2, __ROR(in_q7x4, 8));
6814 in_q15x2_2 = __SXTAB16(offset_q15x2, in_q7x4);
6816 out_q15x2_2 = __PKHTB(in_q15x2_1, in_q15x2_2, 16);
6817 out_q15x2_1 = __PKHBT(in_q15x2_2, in_q15x2_1, 16);
6825 block_cnt = block_size % 0x4;
6827 while (block_cnt > 0) {
6828 *dst++ = (int16_t)*src++ + offset;
6838 #ifdef HPM_MATH_NN_ACTIVATION
6839 #ifdef HPM_EN_MATH_NN_LIB
6840 #if defined(__zcc__)
6841 #include "tpt_nn_activation.h"
6843 #include "riscv_nn_activation.h"
6879 riscv_nn_activation_fun act_fun)
6881 #if defined(__zcc__)
6882 tpt_nn_activate_s8(in_out,
size, int_bits, act_fun);
6884 riscv_nn_activate_s8(in_out,
size, int_bits, act_fun);
6906 riscv_nn_activation_fun act_fun)
6908 #if defined(__zcc__)
6909 tpt_nn_activate_s16(in_out,
size, int_bits, act_fun);
6911 riscv_nn_activate_s16(in_out,
size, int_bits, act_fun);
6935 #if defined(__zcc__)
6936 tpt_nn_leaky_relu_q7(in_out, in_out,
size,
slope);
6951 #if defined(__zcc__)
6952 tpt_nn_relu_any_q7(data,
size, max_val);
6954 riscv_nn_relu_any_s8(data,
size, max_val);
6976 #if defined(__zcc__)
6977 tpt_nn_relu_q7(in_out,
size);
6979 riscv_nn_relu_s8(in_out,
size);
6991 #if defined(__zcc__)
6992 tpt_nn_relu_q15(in_out,
size);
6994 riscv_nn_relu_s16(in_out,
size);
7008 static inline int32_t hpm_nn_sigmoid_f16(
const float16_t *in_vec,
7012 #if defined(__zcc__)
7015 return riscv_nn_sigmoid_f16(in_vec,
size,
out_vec);
7027 static inline int32_t hpm_nn_tanh_f16(
const float16_t *in_vec,
7031 #if defined(__zcc__)
7045 #ifdef HPM_MATH_NN_BASIC
7046 #ifdef HPM_EN_MATH_NN_LIB
7047 #if defined(__zcc__)
7048 #include "tpt_nn_basic.h"
7050 #include "riscv_nn_basic.h"
7098 const q7_t *in_tensor2,
7099 const int16_t *scale1,
7100 const int16_t *scale2,
7101 const uint32_t
size,
7102 const uint16_t pre_rshift,
7103 const uint16_t out_scale,
7104 const uint16_t post_rshift,
7107 #if defined(__zcc__)
7108 tpt_nn_add_s8_sym(in_tensor1, in_tensor2, scale1, scale2,
size, pre_rshift,
7109 out_scale, post_rshift, out);
7111 riscv_nn_add_s8_sym(in_tensor1, in_tensor2, scale1, scale2,
size, pre_rshift,
7112 out_scale, post_rshift, out);
7136 const q7_t *in_tensor2,
7137 const uint32_t scale1,
7138 const uint32_t scale2,
7139 const uint32_t
size,
7140 const uint16_t pre_rshift,
7141 const uint16_t out_scale,
7142 const uint16_t post_rshift,
7145 #if defined(__zcc__)
7146 tpt_nn_add_s8_sym_round(in_tensor1, in_tensor2, scale1, scale2,
size,
7147 pre_rshift, out_scale, post_rshift, out);
7149 riscv_nn_add_s8_sym_round(in_tensor1, in_tensor2, scale1, scale2,
size,
7150 pre_rshift, out_scale, post_rshift, out);
7206 const int8_t *in_tensor2,
7207 const int32_t in_offset1,
7208 const int32_t in_scale1,
7209 const int32_t in_rshift1,
7210 const int32_t in_offset2,
7211 const int32_t in_scale2,
7212 const int32_t in_rshift2,
7213 const int32_t lshift,
7215 const int32_t out_offset,
7216 const int32_t out_scale,
7218 const int32_t act_min,
7219 const int32_t act_max,
7220 const uint32_t
size)
7222 #if defined(__zcc__)
7223 return tpt_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
7224 in_rshift1, in_offset2, in_scale2, in_rshift2,
7225 lshift, out, out_offset, out_scale,
out_rshift,
7226 act_min, act_max,
size);
7228 return riscv_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
7229 in_rshift1, in_offset2, in_scale2, in_rshift2,
7230 lshift, out, out_offset, out_scale,
out_rshift,
7231 act_min, act_max,
size);
7275 const int8_t *in_tensor2,
7276 const int32_t in_offset1,
7277 const int32_t in_offset2,
7279 const int32_t out_offset,
7280 const int32_t out_scale,
7281 const int32_t out_shift,
7282 const int32_t act_min,
7283 const int32_t act_max,
7284 const uint32_t
size)
7286 #if defined(__zcc__)
7287 return tpt_nn_ew_mul_s8_asym(in_tensor1, in_tensor2, in_offset1, in_offset2,
7288 out, out_offset, out_scale, out_shift, act_min,
7291 return riscv_nn_ew_mul_s8_asym(in_tensor1, in_tensor2, in_offset1, in_offset2,
7292 out, out_offset, out_scale, out_shift, act_min,
7303 #ifdef HPM_EN_MATH_NN_RVP32_LIB
7304 #if defined(__zcc__)
7305 #include "tpt_nn_basic.h"
7307 #include "riscv_nn_basic.h"
7363 const int8_t *in_tensor2,
7364 const int32_t in_offset1,
7365 const int32_t in_scale1,
7366 const int32_t in_rshift1,
7367 const int32_t in_offset2,
7368 const int32_t in_scale2,
7369 const int32_t in_rshift2,
7370 const int32_t lshift,
7372 const int32_t out_offset,
7373 const int32_t out_scale,
7375 const int32_t act_min,
7376 const int32_t act_max,
7377 const uint32_t
size)
7379 #if defined(__zcc__)
7380 return tpt_elementwise_add_s8(out, out_offset, out_scale, -
out_rshift, act_min,
7381 act_max, in_tensor1, in_tensor2, in_offset1, in_scale1,
7382 in_rshift1, in_offset2, in_scale2, in_rshift2,
7385 return riscv_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
7386 in_rshift1, in_offset2, in_scale2, in_rshift2,
7387 lshift, out, out_offset, out_scale,
out_rshift,
7388 act_min, act_max,
size);
7396 #ifdef HPM_MATH_NN_CONCATENATION
7397 #ifdef HPM_EN_MATH_NN_LIB
7398 #if defined(__zcc__)
7399 #include "tpt_nn_concatenation.h"
7401 #include "riscv_nn_concatenation.h"
7430 const uint16_t in_tensor_x,
7431 const uint16_t in_tensor_y,
7432 const uint16_t in_tensor_z,
7433 const uint16_t in_tensor_w,
7435 const uint32_t out_offset_w)
7437 #if defined(__zcc__)
7438 tpt_concatenation_s8_w(out_tensor, in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7439 in_tensor_w, out_offset_w);
7441 riscv_nn_concate_s8_w(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7442 in_tensor_w, out_tensor, out_offset_w);
7465 const uint16_t in_tensor_x,
7466 const uint16_t in_tensor_y,
7467 const uint16_t in_tensor_z,
7468 const uint16_t in_tensor_w,
7470 const uint16_t out_tensor_x,
7471 const uint32_t out_offset_x)
7473 #if defined(__zcc__)
7474 tpt_nn_concate_s8_x(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7475 in_tensor_w, out_tensor, out_tensor_x, out_offset_x);
7477 riscv_nn_concate_s8_x(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7478 in_tensor_w, out_tensor, out_tensor_x, out_offset_x);
7500 const uint16_t in_tensor_x,
7501 const uint16_t in_tensor_y,
7502 const uint16_t in_tensor_z,
7503 const uint16_t in_tensor_w,
7505 const uint16_t out_tensor_y,
7506 const uint32_t out_offset_y)
7508 #if defined(__zcc__)
7509 tpt_nn_concate_s8_y(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7510 in_tensor_w, out_tensor, out_tensor_y, out_offset_y);
7512 riscv_nn_concate_s8_y(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7513 in_tensor_w, out_tensor, out_tensor_y, out_offset_y);
7535 const uint16_t in_tensor_x,
7536 const uint16_t in_tensor_y,
7537 const uint16_t in_tensor_z,
7538 const uint16_t in_tensor_w,
7540 const uint16_t out_tensor_z,
7541 const uint32_t out_offset_z)
7543 #if defined(__zcc__)
7544 tpt_nn_concate_s8_z(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7545 in_tensor_w, out_tensor, out_tensor_z, out_offset_z);
7547 riscv_nn_concate_s8_z(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7548 in_tensor_w, out_tensor, out_tensor_z, out_offset_z);
7559 #ifdef HPM_MATH_NN_CONVOLUTION
7560 #ifdef HPM_EN_MATH_NN_LIB
7561 #if defined(__zcc__)
7562 #include "tpt_nn_convolution.h"
7564 #include "riscv_nn_convolution.h"
7655 const uint16_t in_tensor_dim_x,
7656 const uint16_t in_tensor_dim_y,
7657 const uint16_t in_tensor_ch,
7658 const q7_t *ker_weight,
7659 const uint16_t out_tensor_ch,
7660 const uint16_t ker_dim_x,
7661 const uint16_t ker_dim_y,
7662 const uint16_t pad_x,
7663 const uint16_t pad_y,
7664 const uint16_t stride_x,
7665 const uint16_t stride_y,
7670 const uint16_t out_tensor_dim_x,
7671 const uint16_t out_tensor_dim_y,
7675 #if defined(__zcc__)
7676 return tpt_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(
7677 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
7678 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
7682 return riscv_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(
7683 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
7684 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
7742 const uint16_t in_tensor_dim,
7743 const q7_t *ker_weight,
7744 const uint16_t out_tensor_ch,
7745 const uint16_t ker_dim,
7747 const uint16_t stride,
7752 const uint16_t out_tensor_dim,
7756 #if defined(__zcc__)
7757 return tpt_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(
7758 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7762 return riscv_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(
7763 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7821 const uint16_t in_tensor_dim,
7822 const q7_t *ker_weight,
7823 const uint16_t out_tensor_ch,
7824 const uint16_t ker_dim,
7826 const uint16_t stride,
7831 const uint16_t out_tensor_dim,
7835 #if defined(__zcc__)
7836 return tpt_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(
7837 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7841 return riscv_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(
7842 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7900 const uint16_t in_tensor_dim,
7901 const uint16_t in_tensor_ch,
7902 const q7_t *ker_weight,
7903 const uint16_t out_tensor_ch,
7904 const uint16_t ker_dim,
7906 const uint16_t stride,
7911 const uint16_t out_tensor_dim,
7915 #if defined(__zcc__)
7916 return tpt_nn_conv_HWC_s8_s8_s8_sft_bias(
7917 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
7921 return riscv_nn_conv_HWC_s8_s8_s8_sft_bias(
7922 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
7990 const uint16_t in_tensor_dim_x,
7991 const uint16_t in_tensor_dim_y,
7992 const uint16_t in_tensor_ch,
7993 const q7_t *ker_weight,
7994 const uint16_t out_tensor_ch,
7995 const uint16_t ker_dim_x,
7996 const uint16_t ker_dim_y,
7997 const uint16_t pad_x,
7998 const uint16_t pad_y,
7999 const uint16_t stride_x,
8000 const uint16_t stride_y,
8005 const uint16_t out_tensor_dim_x,
8006 const uint16_t out_tensor_dim_y,
8010 #if defined(__zcc__)
8011 tpt_nn_conv_HWC_s8_s8_s8_sft_bias_any(
8012 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8013 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8017 riscv_nn_conv_HWC_s8_s8_s8_sft_bias_any(
8018 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8019 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8078 const uint16_t in_tensor_dim,
8079 const uint16_t in_tensor_ch,
8080 const q7_t *ker_weight,
8081 const uint16_t out_tensor_ch,
8082 const uint16_t ker_dim,
8084 const uint16_t stride,
8089 const uint16_t out_tensor_dim,
8093 #if defined(__zcc__)
8094 return tpt_nn_conv_HWC_s8_s8_s8_sft_bias_fast(
8095 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8099 return riscv_nn_conv_HWC_s8_s8_s8_sft_bias_fast(
8100 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8173 const uint16_t in_tensor_dim_x,
8174 const uint16_t in_tensor_dim_y,
8175 const uint16_t in_tensor_ch,
8176 const q7_t *ker_weight,
8177 const uint16_t out_tensor_ch,
8178 const uint16_t ker_dim_x,
8179 const uint16_t ker_dim_y,
8180 const uint16_t pad_x,
8181 const uint16_t pad_y,
8182 const uint16_t stride_x,
8183 const uint16_t stride_y,
8188 const uint16_t out_tensor_dim_x,
8189 const uint16_t out_tensor_dim_y,
8193 #if defined(__zcc__)
8194 return tpt_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(
8195 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8196 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8200 return riscv_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(
8201 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8202 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8260 const uint16_t in_tensor_dim,
8261 const uint16_t in_tensor_ch,
8262 const q15_t *ker_weight,
8263 const uint16_t out_tensor_ch,
8264 const uint16_t ker_dim,
8266 const uint16_t stride,
8271 const uint16_t out_tensor_dim,
8275 #if defined(__zcc__)
8276 return tpt_nn_conv_HWC_s16_s16_s16_sft_bias(
8277 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8281 return riscv_nn_conv_HWC_s16_s16_s16_sft_bias(
8282 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8341 const uint16_t in_tensor_dim,
8342 const uint16_t in_tensor_ch,
8343 const q15_t *ker_weight,
8344 const uint16_t out_tensor_ch,
8345 const uint16_t ker_dim,
8347 const uint16_t stride,
8352 const uint16_t out_tensor_dim,
8356 #if defined(__zcc__)
8357 return tpt_nn_conv_HWC_s16_s16_s16_sft_bias_fast(
8358 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8362 return riscv_nn_conv_HWC_s16_s16_s16_sft_bias_fast(
8363 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8436 const uint16_t in_tensor_dim_x,
8437 const uint16_t in_tensor_dim_y,
8438 const uint16_t in_tensor_ch,
8439 const q15_t *ker_weight,
8440 const uint16_t out_tensor_ch,
8441 const uint16_t ker_dim_x,
8442 const uint16_t ker_dim_y,
8443 const uint16_t pad_x,
8444 const uint16_t pad_y,
8445 const uint16_t stride_x,
8446 const uint16_t stride_y,
8451 const uint16_t out_tensor_dim_x,
8452 const uint16_t out_tensor_dim_y,
8456 #if defined(__zcc__)
8457 return tpt_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(
8458 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8459 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8463 return riscv_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(
8464 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8465 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8523 const uint16_t in_tensor_dim,
8524 const uint16_t in_tensor_ch,
8525 const q7_t *ker_weight,
8526 const uint16_t out_tensor_ch,
8527 const uint16_t ker_dim,
8529 const uint16_t stride,
8534 const uint16_t out_tensor_dim,
8538 #if defined(__zcc__)
8539 return tpt_nn_conv_dw_HWC_s8_s8_s8_sft_bias(
8540 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8544 return riscv_nn_conv_dw_HWC_s8_s8_s8_sft_bias(
8545 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8614 const uint16_t in_tensor_dim_x,
8615 const uint16_t in_tensor_dim_y,
8616 const uint16_t in_tensor_ch,
8617 const q7_t *ker_weight,
8618 const uint16_t out_tensor_ch,
8619 const uint16_t ker_dim_x,
8620 const uint16_t ker_dim_y,
8621 const uint16_t pad_x,
8622 const uint16_t pad_y,
8623 const uint16_t stride_x,
8624 const uint16_t stride_y,
8629 const uint16_t out_tensor_dim_x,
8630 const uint16_t out_tensor_dim_y,
8634 #if defined(__zcc__)
8635 return tpt_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(
8636 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8637 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8641 return riscv_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(
8642 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8643 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8695 const uint16_t in_tensor_dim_x,
8696 const uint16_t in_tensor_dim_y,
8697 const uint16_t in_tensor_ch,
8698 const q7_t *ker_weight,
8699 const uint16_t out_tensor_ch,
8700 const uint16_t ker_dim_x,
8701 const uint16_t ker_dim_y,
8702 const uint16_t pad_x,
8703 const uint16_t pad_y,
8704 const uint16_t stride_x,
8705 const uint16_t stride_y,
8707 const uint16_t pre_rshift,
8708 const uint16_t out_scale,
8709 const uint16_t post_rshift,
8711 const uint16_t out_tensor_dim_x,
8712 const uint16_t out_tensor_dim_y,
8715 #if defined(__zcc__)
8717 tpt_nn_conv_1x1_sym_params S1 = {stride_x, stride_y, pad_x, pad_y, pre_rshift, out_scale, post_rshift};
8718 tpt_nn_1x1_sym_dims S2 = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_dim_x, ker_dim_y,
8719 out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
8720 return tpt_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(
8721 out_tensor_ch, in_tensor, ker_weight,
bias, &S1, &S2,
in_tmp_buf);
8724 return riscv_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(
8725 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8726 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8727 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8779 const uint16_t in_tensor_dim_x,
8780 const uint16_t in_tensor_dim_y,
8781 const uint16_t in_tensor_ch,
8782 const q7_t *ker_weight,
8783 const uint16_t out_tensor_ch,
8784 const uint16_t ker_dim_x,
8785 const uint16_t ker_dim_y,
8786 const uint16_t pad_x,
8787 const uint16_t pad_y,
8788 const uint16_t stride_x,
8789 const uint16_t stride_y,
8791 const uint16_t pre_rshift,
8792 const uint16_t out_scale,
8793 const uint16_t post_rshift,
8795 const uint16_t out_tensor_dim_x,
8796 const uint16_t out_tensor_dim_y,
8799 #if defined(__zcc__)
8800 return tpt_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(
8801 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8802 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8803 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8806 return riscv_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(
8807 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8808 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8809 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8860 const uint16_t in_tensor_dim_x,
8861 const uint16_t in_tensor_dim_y,
8862 const uint16_t in_tensor_ch,
8863 const q7_t *ker_weight,
8864 const uint16_t out_tensor_ch,
8865 const uint16_t ker_dim_x,
8866 const uint16_t ker_dim_y,
8867 const uint16_t pad_x,
8868 const uint16_t pad_y,
8869 const uint16_t stride_x,
8870 const uint16_t stride_y,
8872 const uint16_t pre_rshift,
8873 const uint16_t out_scale,
8874 const uint16_t post_rshift,
8876 const uint16_t out_tensor_dim_x,
8877 const uint16_t out_tensor_dim_y,
8880 #if defined(__zcc__)
8881 return tpt_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(
8882 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8883 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8884 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8887 return riscv_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(
8888 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8889 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8890 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8942 const uint16_t in_tensor_dim_x,
8943 const uint16_t in_tensor_dim_y,
8944 const uint16_t in_tensor_ch,
8945 const q7_t *ker_weight,
8946 const uint16_t out_tensor_ch,
8947 const uint16_t ker_dim_x,
8948 const uint16_t ker_dim_y,
8949 const uint16_t pad_x,
8950 const uint16_t pad_y,
8951 const uint16_t stride_x,
8952 const uint16_t stride_y,
8954 const uint16_t pre_rshift,
8955 const uint16_t out_scale,
8956 const uint16_t post_rshift,
8958 const uint16_t out_tensor_dim_x,
8959 const uint16_t out_tensor_dim_y,
8962 #if defined(__zcc__)
8963 return tpt_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(
8964 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8965 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8966 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8969 return riscv_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(
8970 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8971 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8972 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9024 const uint16_t in_tensor_dim_x,
9025 const uint16_t in_tensor_dim_y,
9026 const uint16_t in_tensor_ch,
9027 const q7_t *ker_weight,
9028 const uint16_t out_tensor_ch,
9029 const uint16_t ker_dim_x,
9030 const uint16_t ker_dim_y,
9031 const uint16_t pad_x,
9032 const uint16_t pad_y,
9033 const uint16_t stride_x,
9034 const uint16_t stride_y,
9036 const uint16_t pre_rshift,
9037 const uint16_t out_scale,
9038 const uint16_t post_rshift,
9040 const uint16_t out_tensor_dim_x,
9041 const uint16_t out_tensor_dim_y,
9044 #if defined(__zcc__)
9045 return tpt_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(
9046 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9047 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9048 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9051 return riscv_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(
9052 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9053 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9054 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9104 const uint16_t in_tensor_dim_x,
9105 const uint16_t in_tensor_dim_y,
9106 const uint16_t in_tensor_ch,
9107 const q7_t *ker_weight,
9108 const uint16_t out_tensor_ch,
9109 const uint16_t ker_dim_x,
9110 const uint16_t ker_dim_y,
9111 const uint16_t pad_x,
9112 const uint16_t pad_y,
9113 const uint16_t stride_x,
9114 const uint16_t stride_y,
9115 const uint16_t pre_rshift,
9116 const uint16_t out_scale,
9117 const uint16_t post_rshift,
9119 const uint16_t out_tensor_dim_x,
9120 const uint16_t out_tensor_dim_y,
9123 #if defined(__zcc__)
9124 return tpt_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(
9125 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9126 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9127 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9130 return riscv_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(
9131 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9132 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9133 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9184 const uint16_t in_tensor_dim_x,
9185 const uint16_t in_tensor_dim_y,
9186 const uint16_t in_tensor_ch,
9187 const q7_t *ker_weight,
9188 const uint16_t out_tensor_ch,
9189 const uint16_t ker_dim_x,
9190 const uint16_t ker_dim_y,
9191 const uint16_t pad_x,
9192 const uint16_t pad_y,
9193 const uint16_t stride_x,
9194 const uint16_t stride_y,
9195 const uint16_t pre_rshift,
9196 const uint16_t out_scale,
9197 const uint16_t post_rshift,
9199 const uint16_t out_tensor_dim_x,
9200 const uint16_t out_tensor_dim_y,
9203 #if defined(__zcc__)
9204 return tpt_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(
9205 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9206 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9207 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9210 return riscv_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(
9211 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9212 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9213 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9263 const uint16_t in_tensor_dim_x,
9264 const uint16_t in_tensor_dim_y,
9265 const uint16_t in_tensor_ch,
9266 const q7_t *ker_weight,
9267 const uint16_t out_tensor_ch,
9268 const uint16_t ker_dim_x,
9269 const uint16_t ker_dim_y,
9270 const uint16_t pad_x,
9271 const uint16_t pad_y,
9272 const uint16_t stride_x,
9273 const uint16_t stride_y,
9274 const uint16_t pre_rshift,
9275 const uint16_t out_scale,
9276 const uint16_t post_rshift,
9278 const uint16_t out_tensor_dim_x,
9279 const uint16_t out_tensor_dim_y,
9282 #if defined(__zcc__)
9283 return tpt_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(
9284 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9285 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9286 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9289 return riscv_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(
9290 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9291 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9292 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9343 const uint16_t in_tensor_dim_x,
9344 const uint16_t in_tensor_dim_y,
9345 const uint16_t in_tensor_ch,
9346 const q7_t *ker_weight,
9347 const uint16_t out_tensor_ch,
9348 const uint16_t ker_dim_x,
9349 const uint16_t ker_dim_y,
9350 const uint16_t pad_x,
9351 const uint16_t pad_y,
9352 const uint16_t stride_x,
9353 const uint16_t stride_y,
9354 const uint16_t pre_rshift,
9355 const uint16_t out_scale,
9356 const uint16_t post_rshift,
9358 const uint16_t out_tensor_dim_x,
9359 const uint16_t out_tensor_dim_y,
9362 #if defined(__zcc__)
9363 return tpt_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(
9364 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9365 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9366 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9369 return riscv_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(
9370 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9371 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9372 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9423 const uint16_t in_tensor_dim_x,
9424 const uint16_t in_tensor_dim_y,
9425 const uint16_t in_tensor_ch,
9426 const q7_t *ker_weight,
9427 const uint16_t out_tensor_ch,
9428 const uint16_t ker_dim_x,
9429 const uint16_t ker_dim_y,
9430 const uint16_t pad_x,
9431 const uint16_t pad_y,
9432 const uint16_t stride_x,
9433 const uint16_t stride_y,
9434 const uint16_t pre_rshift,
9435 const uint16_t out_scale,
9436 const uint16_t post_rshift,
9438 const uint16_t out_tensor_dim_x,
9439 const uint16_t out_tensor_dim_y,
9442 #if defined(__zcc__)
9443 return tpt_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(
9444 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9445 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9446 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9449 return riscv_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(
9450 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9451 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9452 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9489 const uint16_t in_tensor_dim,
9490 const q7_t *ker_weight,
9491 const uint16_t out_tensor_ch,
9492 const uint16_t ker_dim,
9494 const uint16_t stride,
9496 const uint16_t pre_rshift,
9497 const uint16_t out_scale,
9498 const uint16_t post_rshift,
9500 const uint16_t out_tensor_dim,
9504 #if defined(__zcc__)
9505 return tpt_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(
9506 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9507 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9510 return riscv_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(
9511 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9512 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9550 const uint16_t in_tensor_dim,
9551 const q7_t *ker_weight,
9552 const uint16_t out_tensor_ch,
9553 const uint16_t ker_dim,
9555 const uint16_t stride,
9557 const uint16_t pre_rshift,
9558 const uint16_t out_scale,
9559 const uint16_t post_rshift,
9561 const uint16_t out_tensor_dim,
9565 #if defined(__zcc__)
9566 return tpt_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(
9567 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9568 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9571 return riscv_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(
9572 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9573 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9610 const uint16_t in_tensor_dim,
9611 const q7_t *ker_weight,
9612 const uint16_t out_tensor_ch,
9613 const uint16_t ker_dim,
9615 const uint16_t stride,
9617 const uint16_t pre_rshift,
9618 const uint16_t out_scale,
9619 const uint16_t post_rshift,
9621 const uint16_t out_tensor_dim,
9625 #if defined(__zcc__)
9626 return tpt_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(
9627 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9628 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9631 return riscv_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(
9632 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9633 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9670 const uint16_t in_tensor_dim,
9671 const q7_t *ker_weight,
9672 const uint16_t out_tensor_ch,
9673 const uint16_t ker_dim,
9675 const uint16_t stride,
9677 const uint16_t pre_rshift,
9678 const uint16_t out_scale,
9679 const uint16_t post_rshift,
9681 const uint16_t out_tensor_dim,
9685 #if defined(__zcc__)
9686 return tpt_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(
9687 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9688 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9691 return riscv_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(
9692 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9693 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9731 const uint16_t in_tensor_dim,
9732 const q7_t *ker_weight,
9733 const uint16_t out_tensor_ch,
9734 const uint16_t ker_dim,
9736 const uint16_t stride,
9738 const uint16_t pre_rshift,
9739 const uint16_t out_scale,
9740 const uint16_t post_rshift,
9742 const uint16_t out_tensor_dim,
9746 #if defined(__zcc__)
9747 return tpt_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(
9748 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9749 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9752 return riscv_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(
9753 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9754 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9790 const uint16_t in_tensor_dim,
9791 const q7_t *ker_weight,
9792 const uint16_t out_tensor_ch,
9793 const uint16_t ker_dim,
9795 const uint16_t stride,
9796 const uint16_t pre_rshift,
9797 const uint16_t out_scale,
9798 const uint16_t post_rshift,
9800 const uint16_t out_tensor_dim,
9804 #if defined(__zcc__)
9805 return tpt_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(
9806 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9807 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9810 return riscv_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(
9811 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9812 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9848 const uint16_t in_tensor_dim,
9849 const q7_t *ker_weight,
9850 const uint16_t out_tensor_ch,
9851 const uint16_t ker_dim,
9853 const uint16_t stride,
9854 const uint16_t pre_rshift,
9855 const uint16_t out_scale,
9856 const uint16_t post_rshift,
9858 const uint16_t out_tensor_dim,
9862 #if defined(__zcc__)
9863 return tpt_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(
9864 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9865 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9868 return riscv_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(
9869 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9870 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9906 const uint16_t in_tensor_dim,
9907 const q7_t *ker_weight,
9908 const uint16_t out_tensor_ch,
9909 const uint16_t ker_dim,
9911 const uint16_t stride,
9912 const uint16_t pre_rshift,
9913 const uint16_t out_scale,
9914 const uint16_t post_rshift,
9916 const uint16_t out_tensor_dim,
9920 #if defined(__zcc__)
9921 return tpt_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(
9922 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9923 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9926 return riscv_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(
9927 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9928 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9964 const uint16_t in_tensor_dim,
9965 const q7_t *ker_weight,
9966 const uint16_t out_tensor_ch,
9967 const uint16_t ker_dim,
9969 const uint16_t stride,
9970 const uint16_t pre_rshift,
9971 const uint16_t out_scale,
9972 const uint16_t post_rshift,
9974 const uint16_t out_tensor_dim,
9978 #if defined(__zcc__)
9979 return tpt_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(
9980 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9981 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9984 return riscv_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(
9985 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9986 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
10022 const uint16_t in_tensor_dim,
10023 const q7_t *ker_weight,
10024 const uint16_t out_tensor_ch,
10025 const uint16_t ker_dim,
10026 const uint16_t pad,
10027 const uint16_t stride,
10028 const uint16_t pre_rshift,
10029 const uint16_t out_scale,
10030 const uint16_t post_rshift,
10032 const uint16_t out_tensor_dim,
10036 #if defined(__zcc__)
10037 return tpt_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(
10038 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
10039 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
10042 return riscv_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(
10043 in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
10044 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
10080 const uint16_t in_tensor_dim,
10081 const uint16_t in_tensor_ch,
10082 const q7_t *ker_weight,
10083 const uint16_t out_tensor_ch,
10084 const uint16_t ker_dim,
10085 const uint16_t pad,
10086 const uint16_t stride,
10088 const uint16_t pre_rshift,
10089 const uint16_t out_scale,
10090 const uint16_t post_rshift,
10092 const uint16_t out_tensor_dim,
10095 #if defined(__zcc__)
10096 return tpt_nn_conv_HWC_s8_s8_s8_sym_bias_fast(
10097 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10098 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10101 return riscv_nn_conv_HWC_s8_s8_s8_sym_bias_fast(
10102 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10103 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10139 const uint16_t in_tensor_dim,
10140 const uint16_t in_tensor_ch,
10141 const q7_t *ker_weight,
10142 const uint16_t out_tensor_ch,
10143 const uint16_t ker_dim,
10144 const uint16_t pad,
10145 const uint16_t stride,
10147 const uint16_t pre_rshift,
10148 const uint16_t out_scale,
10149 const uint16_t post_rshift,
10151 const uint16_t out_tensor_dim,
10154 #if defined(__zcc__)
10155 return tpt_nn_conv_HWC_s8_s16_s8_sym_bias_fast(
10156 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10157 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10160 return riscv_nn_conv_HWC_s8_s16_s8_sym_bias_fast(
10161 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10162 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10198 const uint16_t in_tensor_dim,
10199 const uint16_t in_tensor_ch,
10200 const q7_t *ker_weight,
10201 const uint16_t out_tensor_ch,
10202 const uint16_t ker_dim,
10203 const uint16_t pad,
10204 const uint16_t stride,
10206 const uint16_t pre_rshift,
10207 const uint16_t out_scale,
10208 const uint16_t post_rshift,
10210 const uint16_t out_tensor_dim,
10213 #if defined(__zcc__)
10214 return tpt_nn_conv_HWC_u8_u8_s8_sym_bias_fast(
10215 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10216 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10219 return riscv_nn_conv_HWC_u8_u8_s8_sym_bias_fast(
10220 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10221 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10257 const uint16_t in_tensor_dim,
10258 const uint16_t in_tensor_ch,
10259 const q7_t *ker_weight,
10260 const uint16_t out_tensor_ch,
10261 const uint16_t ker_dim,
10262 const uint16_t pad,
10263 const uint16_t stride,
10265 const uint16_t pre_rshift,
10266 const uint16_t out_scale,
10267 const uint16_t post_rshift,
10269 const uint16_t out_tensor_dim,
10272 #if defined(__zcc__)
10273 return tpt_nn_conv_HWC_u8_s8_s8_sym_bias_fast(
10274 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10275 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10278 return riscv_nn_conv_HWC_u8_s8_s8_sym_bias_fast(
10279 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10280 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10316 const uint16_t in_tensor_dim,
10317 const uint16_t in_tensor_ch,
10318 const q7_t *ker_weight,
10319 const uint16_t out_tensor_ch,
10320 const uint16_t ker_dim,
10321 const uint16_t pad,
10322 const uint16_t stride,
10324 const uint16_t pre_rshift,
10325 const uint16_t out_scale,
10326 const uint16_t post_rshift,
10328 const uint16_t out_tensor_dim,
10331 #if defined(__zcc__)
10332 return tpt_nn_conv_HWC_u8_s16_s8_sym_bias_fast(
10333 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10334 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10337 return riscv_nn_conv_HWC_u8_s16_s8_sym_bias_fast(
10338 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10339 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
10374 const uint16_t in_tensor_dim,
10375 const uint16_t in_tensor_ch,
10376 const q7_t *ker_weight,
10377 const uint16_t out_tensor_ch,
10378 const uint16_t ker_dim,
10379 const uint16_t pad,
10380 const uint16_t stride,
10381 const uint16_t pre_rshift,
10382 const uint16_t out_scale,
10383 const uint16_t post_rshift,
10385 const uint16_t out_tensor_dim,
10388 #if defined(__zcc__)
10389 return tpt_nn_conv_HWC_s8_s8_s8_sym_fast(
10390 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10391 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10394 return riscv_nn_conv_HWC_s8_s8_s8_sym_fast(
10395 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10396 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10431 const uint16_t in_tensor_dim,
10432 const uint16_t in_tensor_ch,
10433 const q7_t *ker_weight,
10434 const uint16_t out_tensor_ch,
10435 const uint16_t ker_dim,
10436 const uint16_t pad,
10437 const uint16_t stride,
10438 const uint16_t pre_rshift,
10439 const uint16_t out_scale,
10440 const uint16_t post_rshift,
10442 const uint16_t out_tensor_dim,
10445 #if defined(__zcc__)
10446 return tpt_nn_conv_HWC_s8_s16_s8_sym_fast(
10447 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10448 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10451 return riscv_nn_conv_HWC_s8_s16_s8_sym_fast(
10452 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10453 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10488 const uint16_t in_tensor_dim,
10489 const uint16_t in_tensor_ch,
10490 const q7_t *ker_weight,
10491 const uint16_t out_tensor_ch,
10492 const uint16_t ker_dim,
10493 const uint16_t pad,
10494 const uint16_t stride,
10495 const uint16_t pre_rshift,
10496 const uint16_t out_scale,
10497 const uint16_t post_rshift,
10499 const uint16_t out_tensor_dim,
10502 #if defined(__zcc__)
10503 return tpt_nn_conv_HWC_u8_u8_s8_sym_fast(
10504 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10505 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10508 return riscv_nn_conv_HWC_u8_u8_s8_sym_fast(
10509 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10510 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10545 const uint16_t in_tensor_dim,
10546 const uint16_t in_tensor_ch,
10547 const q7_t *ker_weight,
10548 const uint16_t out_tensor_ch,
10549 const uint16_t ker_dim,
10550 const uint16_t pad,
10551 const uint16_t stride,
10552 const uint16_t pre_rshift,
10553 const uint16_t out_scale,
10554 const uint16_t post_rshift,
10556 const uint16_t out_tensor_dim,
10559 #if defined(__zcc__)
10560 return tpt_nn_conv_HWC_u8_s8_s8_sym_fast(
10561 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10562 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10565 return riscv_nn_conv_HWC_u8_s8_s8_sym_fast(
10566 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10567 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10602 const uint16_t in_tensor_dim,
10603 const uint16_t in_tensor_ch,
10604 const q7_t *ker_weight,
10605 const uint16_t out_tensor_ch,
10606 const uint16_t ker_dim,
10607 const uint16_t pad,
10608 const uint16_t stride,
10609 const uint16_t pre_rshift,
10610 const uint16_t out_scale,
10611 const uint16_t post_rshift,
10613 const uint16_t out_tensor_dim,
10616 #if defined(__zcc__)
10617 return tpt_nn_conv_HWC_u8_s16_s8_sym_fast(
10618 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10619 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10622 return riscv_nn_conv_HWC_u8_s16_s8_sym_fast(
10623 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10624 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10666 const uint16_t in_tensor_dim_x,
10667 const uint16_t in_tensor_dim_y,
10668 const uint16_t in_tensor_ch,
10669 const q7_t *ker_weight,
10670 const uint16_t out_tensor_ch,
10671 const uint16_t ker_dim_x,
10672 const uint16_t ker_dim_y,
10673 const uint16_t pad_x,
10674 const uint16_t pad_y,
10675 const uint16_t stride_x,
10676 const uint16_t stride_y,
10678 const uint16_t pre_rshift,
10679 const uint16_t out_scale,
10680 const uint16_t post_rshift,
10682 const uint16_t out_tensor_dim_x,
10683 const uint16_t out_tensor_dim_y,
10686 #if defined(__zcc__)
10687 return tpt_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(
10688 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10689 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10690 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10693 return riscv_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(
10694 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10695 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10696 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10739 const uint16_t in_tensor_dim_x,
10740 const uint16_t in_tensor_dim_y,
10741 const uint16_t in_tensor_ch,
10742 const q7_t *ker_weight,
10743 const uint16_t out_tensor_ch,
10744 const uint16_t ker_dim_x,
10745 const uint16_t ker_dim_y,
10746 const uint16_t pad_x,
10747 const uint16_t pad_y,
10748 const uint16_t stride_x,
10749 const uint16_t stride_y,
10751 const uint16_t pre_rshift,
10752 const uint16_t out_scale,
10753 const uint16_t post_rshift,
10755 const uint16_t out_tensor_dim_x,
10756 const uint16_t out_tensor_dim_y,
10759 #if defined(__zcc__)
10760 return tpt_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(
10761 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10762 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10763 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10766 return riscv_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(
10767 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10768 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10769 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10811 const uint16_t in_tensor_dim_x,
10812 const uint16_t in_tensor_dim_y,
10813 const uint16_t in_tensor_ch,
10814 const q7_t *ker_weight,
10815 const uint16_t out_tensor_ch,
10816 const uint16_t ker_dim_x,
10817 const uint16_t ker_dim_y,
10818 const uint16_t pad_x,
10819 const uint16_t pad_y,
10820 const uint16_t stride_x,
10821 const uint16_t stride_y,
10823 const uint16_t pre_rshift,
10824 const uint16_t out_scale,
10825 const uint16_t post_rshift,
10827 const uint16_t out_tensor_dim_x,
10828 const uint16_t out_tensor_dim_y,
10831 #if defined(__zcc__)
10832 return tpt_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(
10833 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10834 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10835 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10838 return riscv_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(
10839 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10840 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10841 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10884 const uint16_t in_tensor_dim_x,
10885 const uint16_t in_tensor_dim_y,
10886 const uint16_t in_tensor_ch,
10887 const q7_t *ker_weight,
10888 const uint16_t out_tensor_ch,
10889 const uint16_t ker_dim_x,
10890 const uint16_t ker_dim_y,
10891 const uint16_t pad_x,
10892 const uint16_t pad_y,
10893 const uint16_t stride_x,
10894 const uint16_t stride_y,
10896 const uint16_t pre_rshift,
10897 const uint16_t out_scale,
10898 const uint16_t post_rshift,
10900 const uint16_t out_tensor_dim_x,
10901 const uint16_t out_tensor_dim_y,
10904 #if defined(__zcc__)
10905 return tpt_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(
10906 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10907 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10908 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10911 return riscv_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(
10912 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10913 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10914 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10957 const uint16_t in_tensor_dim_x,
10958 const uint16_t in_tensor_dim_y,
10959 const uint16_t in_tensor_ch,
10960 const q7_t *ker_weight,
10961 const uint16_t out_tensor_ch,
10962 const uint16_t ker_dim_x,
10963 const uint16_t ker_dim_y,
10964 const uint16_t pad_x,
10965 const uint16_t pad_y,
10966 const uint16_t stride_x,
10967 const uint16_t stride_y,
10969 const uint16_t pre_rshift,
10970 const uint16_t out_scale,
10971 const uint16_t post_rshift,
10973 const uint16_t out_tensor_dim_x,
10974 const uint16_t out_tensor_dim_y,
10977 #if defined(__zcc__)
10978 return tpt_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(
10979 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10980 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10981 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10984 return riscv_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(
10985 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10986 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10987 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11028 const uint16_t in_tensor_dim_x,
11029 const uint16_t in_tensor_dim_y,
11030 const uint16_t in_tensor_ch,
11031 const q7_t *ker_weight,
11032 const uint16_t out_tensor_ch,
11033 const uint16_t ker_dim_x,
11034 const uint16_t ker_dim_y,
11035 const uint16_t pad_x,
11036 const uint16_t pad_y,
11037 const uint16_t stride_x,
11038 const uint16_t stride_y,
11039 const uint16_t pre_rshift,
11040 const uint16_t out_scale,
11041 const uint16_t post_rshift,
11043 const uint16_t out_tensor_dim_x,
11044 const uint16_t out_tensor_dim_y,
11047 #if defined(__zcc__)
11048 return tpt_nn_conv_HWC_s8_s8_s8_sym_fast_any(
11049 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11050 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11051 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11054 return riscv_nn_conv_HWC_s8_s8_s8_sym_fast_any(
11055 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11056 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11057 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11098 const uint16_t in_tensor_dim_x,
11099 const uint16_t in_tensor_dim_y,
11100 const uint16_t in_tensor_ch,
11101 const q7_t *ker_weight,
11102 const uint16_t out_tensor_ch,
11103 const uint16_t ker_dim_x,
11104 const uint16_t ker_dim_y,
11105 const uint16_t pad_x,
11106 const uint16_t pad_y,
11107 const uint16_t stride_x,
11108 const uint16_t stride_y,
11109 const uint16_t pre_rshift,
11110 const uint16_t out_scale,
11111 const uint16_t post_rshift,
11113 const uint16_t out_tensor_dim_x,
11114 const uint16_t out_tensor_dim_y,
11117 #if defined(__zcc__)
11118 return tpt_nn_conv_HWC_s8_s16_s8_sym_fast_any(
11119 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11120 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11121 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11124 return riscv_nn_conv_HWC_s8_s16_s8_sym_fast_any(
11125 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11126 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11127 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11168 const uint16_t in_tensor_dim_x,
11169 const uint16_t in_tensor_dim_y,
11170 const uint16_t in_tensor_ch,
11171 const q7_t *ker_weight,
11172 const uint16_t out_tensor_ch,
11173 const uint16_t ker_dim_x,
11174 const uint16_t ker_dim_y,
11175 const uint16_t pad_x,
11176 const uint16_t pad_y,
11177 const uint16_t stride_x,
11178 const uint16_t stride_y,
11179 const uint16_t pre_rshift,
11180 const uint16_t out_scale,
11181 const uint16_t post_rshift,
11183 const uint16_t out_tensor_dim_x,
11184 const uint16_t out_tensor_dim_y,
11187 #if defined(__zcc__)
11188 return tpt_nn_conv_HWC_u8_u8_s8_sym_fast_any(
11189 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11190 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11191 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11194 return riscv_nn_conv_HWC_u8_u8_s8_sym_fast_any(
11195 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11196 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11197 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11238 const uint16_t in_tensor_dim_x,
11239 const uint16_t in_tensor_dim_y,
11240 const uint16_t in_tensor_ch,
11241 const q7_t *ker_weight,
11242 const uint16_t out_tensor_ch,
11243 const uint16_t ker_dim_x,
11244 const uint16_t ker_dim_y,
11245 const uint16_t pad_x,
11246 const uint16_t pad_y,
11247 const uint16_t stride_x,
11248 const uint16_t stride_y,
11249 const uint16_t pre_rshift,
11250 const uint16_t out_scale,
11251 const uint16_t post_rshift,
11253 const uint16_t out_tensor_dim_x,
11254 const uint16_t out_tensor_dim_y,
11257 #if defined(__zcc__)
11258 return tpt_nn_conv_HWC_u8_s8_s8_sym_fast_any(
11259 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11260 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11261 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11264 return riscv_nn_conv_HWC_u8_s8_s8_sym_fast_any(
11265 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11266 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11267 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11308 const uint16_t in_tensor_dim_x,
11309 const uint16_t in_tensor_dim_y,
11310 const uint16_t in_tensor_ch,
11311 const q7_t *ker_weight,
11312 const uint16_t out_tensor_ch,
11313 const uint16_t ker_dim_x,
11314 const uint16_t ker_dim_y,
11315 const uint16_t pad_x,
11316 const uint16_t pad_y,
11317 const uint16_t stride_x,
11318 const uint16_t stride_y,
11319 const uint16_t pre_rshift,
11320 const uint16_t out_scale,
11321 const uint16_t post_rshift,
11323 const uint16_t out_tensor_dim_x,
11324 const uint16_t out_tensor_dim_y,
11327 #if defined(__zcc__)
11328 return tpt_nn_conv_HWC_u8_s16_s8_sym_fast_any(
11329 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11330 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11331 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11334 return riscv_nn_conv_HWC_u8_s16_s8_sym_fast_any(
11335 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11336 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11337 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11374 const uint16_t in_tensor_dim,
11375 const uint16_t in_tensor_ch,
11376 const q7_t *ker_weight,
11377 const uint16_t out_tensor_ch,
11378 const uint16_t ker_dim,
11379 const uint16_t pad,
11380 const uint16_t stride,
11382 const uint16_t pre_rshift,
11383 const uint16_t out_scale,
11384 const uint16_t post_rshift,
11386 const uint16_t out_tensor_dim,
11389 #if defined(__zcc__)
11390 return tpt_nn_conv_dw_HWC_s8_s8_s8_sym_bias(
11391 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11392 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11395 return riscv_nn_conv_dw_HWC_s8_s8_s8_sym_bias(
11396 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11397 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11433 const uint16_t in_tensor_dim,
11434 const uint16_t in_tensor_ch,
11435 const q7_t *ker_weight,
11436 const uint16_t out_tensor_ch,
11437 const uint16_t ker_dim,
11438 const uint16_t pad,
11439 const uint16_t stride,
11441 const uint16_t pre_rshift,
11442 const uint16_t out_scale,
11443 const uint16_t post_rshift,
11445 const uint16_t out_tensor_dim,
11448 #if defined(__zcc__)
11449 return tpt_nn_conv_dw_HWC_s8_s16_s8_sym_bias(
11450 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11451 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11454 return riscv_nn_conv_dw_HWC_s8_s16_s8_sym_bias(
11455 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11456 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11492 const uint16_t in_tensor_dim,
11493 const uint16_t in_tensor_ch,
11494 const q7_t *ker_weight,
11495 const uint16_t out_tensor_ch,
11496 const uint16_t ker_dim,
11497 const uint16_t pad,
11498 const uint16_t stride,
11500 const uint16_t pre_rshift,
11501 const uint16_t out_scale,
11502 const uint16_t post_rshift,
11504 const uint16_t out_tensor_dim,
11507 #if defined(__zcc__)
11508 return tpt_nn_conv_dw_HWC_u8_u8_s8_sym_bias(
11509 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11510 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11513 return riscv_nn_conv_dw_HWC_u8_u8_s8_sym_bias(
11514 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11515 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11551 const uint16_t in_tensor_dim,
11552 const uint16_t in_tensor_ch,
11553 const q7_t *ker_weight,
11554 const uint16_t out_tensor_ch,
11555 const uint16_t ker_dim,
11556 const uint16_t pad,
11557 const uint16_t stride,
11559 const uint16_t pre_rshift,
11560 const uint16_t out_scale,
11561 const uint16_t post_rshift,
11563 const uint16_t out_tensor_dim,
11566 #if defined(__zcc__)
11567 return tpt_nn_conv_dw_HWC_u8_s8_s8_sym_bias(
11568 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11569 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11572 return riscv_nn_conv_dw_HWC_u8_s8_s8_sym_bias(
11573 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11574 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11610 const uint16_t in_tensor_dim,
11611 const uint16_t in_tensor_ch,
11612 const q7_t *ker_weight,
11613 const uint16_t out_tensor_ch,
11614 const uint16_t ker_dim,
11615 const uint16_t pad,
11616 const uint16_t stride,
11618 const uint16_t pre_rshift,
11619 const uint16_t out_scale,
11620 const uint16_t post_rshift,
11622 const uint16_t out_tensor_dim,
11625 #if defined(__zcc__)
11626 return tpt_nn_conv_dw_HWC_u8_s16_s8_sym_bias(
11627 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11628 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11631 return riscv_nn_conv_dw_HWC_u8_s16_s8_sym_bias(
11632 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11633 ker_dim, pad, stride,
bias, pre_rshift, out_scale, post_rshift,
11668 const uint16_t in_tensor_dim,
11669 const uint16_t in_tensor_ch,
11670 const q7_t *ker_weight,
11671 const uint16_t out_tensor_ch,
11672 const uint16_t ker_dim,
11673 const uint16_t pad,
11674 const uint16_t stride,
11675 const uint16_t pre_rshift,
11676 const uint16_t out_scale,
11677 const uint16_t post_rshift,
11679 const uint16_t out_tensor_dim,
11682 #if defined(__zcc__)
11683 return tpt_nn_conv_dw_HWC_s8_s8_s8_sym(
11684 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11685 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11688 return riscv_nn_conv_dw_HWC_s8_s8_s8_sym(
11689 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11690 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11725 const uint16_t in_tensor_dim,
11726 const uint16_t in_tensor_ch,
11727 const q7_t *ker_weight,
11728 const uint16_t out_tensor_ch,
11729 const uint16_t ker_dim,
11730 const uint16_t pad,
11731 const uint16_t stride,
11732 const uint16_t pre_rshift,
11733 const uint16_t out_scale,
11734 const uint16_t post_rshift,
11736 const uint16_t out_tensor_dim,
11739 #if defined(__zcc__)
11740 return tpt_nn_conv_dw_HWC_s8_s16_s8_sym(
11741 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11742 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11745 return riscv_nn_conv_dw_HWC_s8_s16_s8_sym(
11746 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11747 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11782 const uint16_t in_tensor_dim,
11783 const uint16_t in_tensor_ch,
11784 const q7_t *ker_weight,
11785 const uint16_t out_tensor_ch,
11786 const uint16_t ker_dim,
11787 const uint16_t pad,
11788 const uint16_t stride,
11789 const uint16_t pre_rshift,
11790 const uint16_t out_scale,
11791 const uint16_t post_rshift,
11793 const uint16_t out_tensor_dim,
11796 #if defined(__zcc__)
11797 return tpt_nn_conv_dw_HWC_u8_u8_s8_sym(
11798 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11799 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11802 return riscv_nn_conv_dw_HWC_u8_u8_s8_sym(
11803 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11804 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11839 const uint16_t in_tensor_dim,
11840 const uint16_t in_tensor_ch,
11841 const q7_t *ker_weight,
11842 const uint16_t out_tensor_ch,
11843 const uint16_t ker_dim,
11844 const uint16_t pad,
11845 const uint16_t stride,
11846 const uint16_t pre_rshift,
11847 const uint16_t out_scale,
11848 const uint16_t post_rshift,
11850 const uint16_t out_tensor_dim,
11853 #if defined(__zcc__)
11854 return tpt_nn_conv_dw_HWC_u8_s8_s8_sym(
11855 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11856 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11859 return riscv_nn_conv_dw_HWC_u8_s8_s8_sym(
11860 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11861 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11896 const uint16_t in_tensor_dim,
11897 const uint16_t in_tensor_ch,
11898 const q7_t *ker_weight,
11899 const uint16_t out_tensor_ch,
11900 const uint16_t ker_dim,
11901 const uint16_t pad,
11902 const uint16_t stride,
11903 const uint16_t pre_rshift,
11904 const uint16_t out_scale,
11905 const uint16_t post_rshift,
11907 const uint16_t out_tensor_dim,
11910 #if defined(__zcc__)
11911 return tpt_nn_conv_dw_HWC_u8_s16_s8_sym(
11912 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11913 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11916 return riscv_nn_conv_dw_HWC_u8_s16_s8_sym(
11917 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11918 ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11960 const uint16_t in_tensor_dim_x,
11961 const uint16_t in_tensor_dim_y,
11962 const uint16_t in_tensor_ch,
11963 const q7_t *ker_weight,
11964 const uint16_t out_tensor_ch,
11965 const uint16_t ker_dim_x,
11966 const uint16_t ker_dim_y,
11967 const uint16_t pad_x,
11968 const uint16_t pad_y,
11969 const uint16_t stride_x,
11970 const uint16_t stride_y,
11972 const uint16_t pre_rshift,
11973 const uint16_t out_scale,
11974 const uint16_t post_rshift,
11976 const uint16_t out_tensor_dim_x,
11977 const uint16_t out_tensor_dim_y,
11980 #if defined(__zcc__)
11981 return tpt_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(
11982 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11983 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11984 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11987 return riscv_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(
11988 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11989 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11990 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12033 const uint16_t in_tensor_dim_x,
12034 const uint16_t in_tensor_dim_y,
12035 const uint16_t in_tensor_ch,
12036 const q7_t *ker_weight,
12037 const uint16_t out_tensor_ch,
12038 const uint16_t ker_dim_x,
12039 const uint16_t ker_dim_y,
12040 const uint16_t pad_x,
12041 const uint16_t pad_y,
12042 const uint16_t stride_x,
12043 const uint16_t stride_y,
12045 const uint16_t pre_rshift,
12046 const uint16_t out_scale,
12047 const uint16_t post_rshift,
12049 const uint16_t out_tensor_dim_x,
12050 const uint16_t out_tensor_dim_y,
12053 #if defined(__zcc__)
12054 return tpt_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(
12055 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12056 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12057 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12060 return riscv_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(
12061 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12062 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12063 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12105 const uint16_t in_tensor_dim_x,
12106 const uint16_t in_tensor_dim_y,
12107 const uint16_t in_tensor_ch,
12108 const q7_t *ker_weight,
12109 const uint16_t out_tensor_ch,
12110 const uint16_t ker_dim_x,
12111 const uint16_t ker_dim_y,
12112 const uint16_t pad_x,
12113 const uint16_t pad_y,
12114 const uint16_t stride_x,
12115 const uint16_t stride_y,
12117 const uint16_t pre_rshift,
12118 const uint16_t out_scale,
12119 const uint16_t post_rshift,
12121 const uint16_t out_tensor_dim_x,
12122 const uint16_t out_tensor_dim_y,
12125 #if defined(__zcc__)
12126 return tpt_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(
12127 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12128 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12129 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12132 return riscv_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(
12133 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12134 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12135 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12178 const uint16_t in_tensor_dim_x,
12179 const uint16_t in_tensor_dim_y,
12180 const uint16_t in_tensor_ch,
12181 const q7_t *ker_weight,
12182 const uint16_t out_tensor_ch,
12183 const uint16_t ker_dim_x,
12184 const uint16_t ker_dim_y,
12185 const uint16_t pad_x,
12186 const uint16_t pad_y,
12187 const uint16_t stride_x,
12188 const uint16_t stride_y,
12190 const uint16_t pre_rshift,
12191 const uint16_t out_scale,
12192 const uint16_t post_rshift,
12194 const uint16_t out_tensor_dim_x,
12195 const uint16_t out_tensor_dim_y,
12198 #if defined(__zcc__)
12199 return tpt_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(
12200 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12201 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12202 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12205 return riscv_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(
12206 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12207 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12208 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12251 const uint16_t in_tensor_dim_x,
12252 const uint16_t in_tensor_dim_y,
12253 const uint16_t in_tensor_ch,
12254 const q7_t *ker_weight,
12255 const uint16_t out_tensor_ch,
12256 const uint16_t ker_dim_x,
12257 const uint16_t ker_dim_y,
12258 const uint16_t pad_x,
12259 const uint16_t pad_y,
12260 const uint16_t stride_x,
12261 const uint16_t stride_y,
12263 const uint16_t pre_rshift,
12264 const uint16_t out_scale,
12265 const uint16_t post_rshift,
12267 const uint16_t out_tensor_dim_x,
12268 const uint16_t out_tensor_dim_y,
12271 #if defined(__zcc__)
12272 return tpt_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(
12273 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12274 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12275 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12278 return riscv_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(
12279 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12280 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12281 bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12322 const uint16_t in_tensor_dim_x,
12323 const uint16_t in_tensor_dim_y,
12324 const uint16_t in_tensor_ch,
12325 const q7_t *ker_weight,
12326 const uint16_t out_tensor_ch,
12327 const uint16_t ker_dim_x,
12328 const uint16_t ker_dim_y,
12329 const uint16_t pad_x,
12330 const uint16_t pad_y,
12331 const uint16_t stride_x,
12332 const uint16_t stride_y,
12333 const uint16_t pre_rshift,
12334 const uint16_t out_scale,
12335 const uint16_t post_rshift,
12337 const uint16_t out_tensor_dim_x,
12338 const uint16_t out_tensor_dim_y,
12341 #if defined(__zcc__)
12342 return tpt_nn_conv_dw_HWC_s8_s8_s8_sym_any(
12343 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12344 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12345 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12348 return riscv_nn_conv_dw_HWC_s8_s8_s8_sym_any(
12349 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12350 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12351 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12393 const uint16_t in_tensor_dim_x,
12394 const uint16_t in_tensor_dim_y,
12395 const uint16_t in_tensor_ch,
12396 const q7_t *ker_weight,
12397 const uint16_t out_tensor_ch,
12398 const uint16_t ker_dim_x,
12399 const uint16_t ker_dim_y,
12400 const uint16_t pad_x,
12401 const uint16_t pad_y,
12402 const uint16_t stride_x,
12403 const uint16_t stride_y,
12404 const uint16_t pre_rshift,
12405 const uint16_t out_scale,
12406 const uint16_t post_rshift,
12408 const uint16_t out_tensor_dim_x,
12409 const uint16_t out_tensor_dim_y,
12412 #if defined(__zcc__)
12413 return tpt_nn_conv_dw_HWC_s8_s16_s8_sym_any(
12414 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12415 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12416 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12419 return riscv_nn_conv_dw_HWC_s8_s16_s8_sym_any(
12420 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12421 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12422 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12463 const uint16_t in_tensor_dim_x,
12464 const uint16_t in_tensor_dim_y,
12465 const uint16_t in_tensor_ch,
12466 const q7_t *ker_weight,
12467 const uint16_t out_tensor_ch,
12468 const uint16_t ker_dim_x,
12469 const uint16_t ker_dim_y,
12470 const uint16_t pad_x,
12471 const uint16_t pad_y,
12472 const uint16_t stride_x,
12473 const uint16_t stride_y,
12474 const uint16_t pre_rshift,
12475 const uint16_t out_scale,
12476 const uint16_t post_rshift,
12478 const uint16_t out_tensor_dim_x,
12479 const uint16_t out_tensor_dim_y,
12482 #if defined(__zcc__)
12483 return tpt_nn_conv_dw_HWC_u8_u8_s8_sym_any(
12484 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12485 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12486 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12489 return riscv_nn_conv_dw_HWC_u8_u8_s8_sym_any(
12490 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12491 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12492 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12534 const uint16_t in_tensor_dim_x,
12535 const uint16_t in_tensor_dim_y,
12536 const uint16_t in_tensor_ch,
12537 const q7_t *ker_weight,
12538 const uint16_t out_tensor_ch,
12539 const uint16_t ker_dim_x,
12540 const uint16_t ker_dim_y,
12541 const uint16_t pad_x,
12542 const uint16_t pad_y,
12543 const uint16_t stride_x,
12544 const uint16_t stride_y,
12545 const uint16_t pre_rshift,
12546 const uint16_t out_scale,
12547 const uint16_t post_rshift,
12549 const uint16_t out_tensor_dim_x,
12550 const uint16_t out_tensor_dim_y,
12553 #if defined(__zcc__)
12554 return tpt_nn_conv_dw_HWC_u8_s8_s8_sym_any(
12555 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12556 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12557 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12560 return riscv_nn_conv_dw_HWC_u8_s8_s8_sym_any(
12561 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12562 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12563 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12605 const uint16_t in_tensor_dim_x,
12606 const uint16_t in_tensor_dim_y,
12607 const uint16_t in_tensor_ch,
12608 const q7_t *ker_weight,
12609 const uint16_t out_tensor_ch,
12610 const uint16_t ker_dim_x,
12611 const uint16_t ker_dim_y,
12612 const uint16_t pad_x,
12613 const uint16_t pad_y,
12614 const uint16_t stride_x,
12615 const uint16_t stride_y,
12616 const uint16_t pre_rshift,
12617 const uint16_t out_scale,
12618 const uint16_t post_rshift,
12620 const uint16_t out_tensor_dim_x,
12621 const uint16_t out_tensor_dim_y,
12624 #if defined(__zcc__)
12625 return tpt_nn_conv_dw_HWC_u8_s16_s8_sym_any(
12626 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12627 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12628 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12631 return riscv_nn_conv_dw_HWC_u8_s16_s8_sym_any(
12632 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12633 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12634 pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12686 const uint16_t in_tensor_dim_x,
12687 const uint16_t in_tensor_dim_y,
12688 const uint16_t in_tensor_ch,
12689 const uint16_t in_tensor_group,
12690 const q7_t *ker_weight,
12691 const uint16_t out_tensor_ch,
12692 const uint16_t pad_x,
12693 const uint16_t pad_y,
12694 const uint16_t stride_x,
12695 const uint16_t stride_y,
12696 const int32_t *
bias,
12698 const int32_t *out_shift,
12699 const int32_t *out_scale,
12700 const int32_t out_offset,
12701 const int32_t in_offset,
12702 const int32_t act_min,
12703 const int32_t act_max,
12704 const uint16_t out_tensor_dim_x,
12705 const uint16_t out_tensor_dim_y,
12708 #if defined(__zcc__)
12710 tpt_nn_conv_1x1_asym_params aConv_params = {in_offset, out_offset, stride_x,
12711 stride_y, pad_x, pad_y, act_min, act_max};
12713 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
12715 tpt_nn_1x1_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12716 in_tensor_group, out_tensor_ch};
12718 return tpt_convolve_1x1_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
12719 bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
12722 return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(
12723 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12724 in_tensor_group, ker_weight, out_tensor_ch, pad_x, pad_y, stride_x,
12725 stride_y,
bias, out_tensor, out_shift, out_scale, out_offset, in_offset,
12726 act_min, act_max, out_tensor_dim_x, out_tensor_dim_y, tmp_buf);
12736 static inline int32_t
12738 const uint16_t in_tensor_ch) {
12739 #if defined(__zcc__)convol
12740 return tpt_convolve_1x1_s8_s8_s8_asym_bias_any_get_buf_size(
12743 return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
12787 const uint16_t in_tensor_dim_x,
12788 const uint16_t in_tensor_ch,
12789 const uint16_t in_tensor_group,
12790 const q7_t *ker_weight,
12791 const uint16_t out_tensor_ch,
12792 const uint16_t ker_dim_x,
12793 const uint16_t pad_x,
12794 const uint16_t stride_x,
12795 const int32_t *
bias,
12797 const int32_t *out_shift,
12798 const int32_t *out_scale,
12799 const int32_t out_offset,
12800 const int32_t in_offset,
12801 const int32_t act_min,
12802 const int32_t act_max,
12803 const uint16_t out_tensor_dim_x,
12806 #if defined(__zcc__)
12808 tpt_nn_conv_1xn_asym_params aConv_params = {in_offset, out_offset, stride_x, pad_x,
12811 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
12813 tpt_nn_1xn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_ch, in_tensor_group,
12814 ker_dim_x, out_tensor_dim_x, out_tensor_ch};
12816 return tpt_convolve_1xn_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
12820 return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(
12821 in_tensor, in_tensor_dim_x, in_tensor_ch, in_tensor_group, ker_weight,
12822 out_tensor_ch, ker_dim_x, pad_x, stride_x,
bias, out_tensor, out_shift,
12823 out_scale, out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
12838 const uint16_t ker_dim_x,
12839 const uint16_t ker_dim_y)
12841 #if defined(__zcc__)
12842 return tpt_convolve_1xn_s8_s8_s8_asym_bias_any_get_buffer_size(
12843 in_tensor_ch, ker_dim_x, ker_dim_y);
12845 return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
12846 in_tensor_ch, ker_dim_x, ker_dim_y);
12892 const uint16_t in_tensor_dim_x,
12893 const uint16_t in_tensor_dim_y,
12894 const uint16_t in_tensor_ch,
12895 const uint16_t in_tensor_group,
12896 const q7_t *ker_weight,
12897 const uint16_t out_tensor_ch,
12898 const uint16_t ker_dim_x,
12899 const uint16_t ker_dim_y,
12900 const uint16_t pad_x,
12901 const uint16_t pad_y,
12902 const uint16_t stride_x,
12903 const uint16_t stride_y,
12904 const int32_t *
bias,
12906 const int32_t *out_shift,
12907 const int32_t *out_scale,
12908 const int32_t out_offset,
12909 const int32_t in_offset,
12910 const int32_t act_min,
12911 const int32_t act_max,
12912 const uint16_t out_tensor_dim_x,
12913 const uint16_t out_tensor_dim_y,
12916 #if defined(__zcc__)
12918 tpt_nn_conv_asym_params aConv_params = {stride_x, stride_y, pad_x, pad_y,
12919 in_offset, out_offset, act_min, act_max};
12921 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
12923 tpt_nn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12924 in_tensor_group, ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y,
12927 return tpt_convolve_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
12931 return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any(
12932 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12933 in_tensor_group, ker_weight, out_tensor_ch, ker_dim_x, ker_dim_y, pad_x,
12934 pad_y, stride_x, stride_y,
bias, out_tensor, out_shift, out_scale,
12935 out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
12949 const uint16_t ker_dim_x,
12950 const uint16_t ker_dim_y)
12952 #if defined(__zcc__)
12953 return tpt_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
12954 in_tensor_ch, ker_dim_x, ker_dim_y);
12956 return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
12957 in_tensor_ch, ker_dim_x, ker_dim_y);
13001 const int32_t in_tensor_dim_x,
13002 const int32_t in_tensor_dim_y,
13003 const int32_t in_tensor_ch,
13004 const int8_t *ker_weight,
13005 const int32_t out_tensor_ch,
13006 const int32_t pad_x,
13007 const int32_t pad_y,
13008 const int32_t stride_x,
13009 const int32_t stride_y,
13010 const int32_t *
bias,
13011 int8_t *out_tensor,
13012 const int32_t *out_shift,
13013 const int32_t *out_scale,
13014 const int32_t out_tensor_dim_x,
13015 const int32_t out_tensor_dim_y,
13016 const int32_t out_offset,
13017 const int32_t in_offset,
13018 const int32_t act_min,
13019 const int32_t act_max,
13020 const int32_t dilation_x,
13021 const int32_t dilation_y,
13024 #if defined(__zcc__)
13025 return tpt_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(
13026 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13027 out_tensor_ch, pad_x, pad_y, stride_x, stride_y,
bias, out_tensor,
13028 out_shift, out_scale, out_tensor_dim_x, out_tensor_dim_y, out_offset,
13029 in_offset, act_min, act_max, dilation_x, dilation_y, tmp_buf);
13031 return riscv_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(
13032 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13033 out_tensor_ch, pad_x, pad_y, stride_x, stride_y,
bias, out_tensor,
13034 out_shift, out_scale, out_tensor_dim_x, out_tensor_dim_y, out_offset,
13035 in_offset, act_min, act_max, dilation_x, dilation_y, tmp_buf);
13087 const uint16_t in_tensor_dim_x,
13088 const uint16_t in_tensor_dim_y,
13089 const uint16_t in_tensor_ch,
13090 const q7_t *ker_weight,
13091 const uint16_t out_tensor_ch,
13092 const uint16_t ch_mult,
13093 const uint16_t ker_dim_x,
13094 const uint16_t ker_dim_y,
13095 const uint16_t pad_x,
13096 const uint16_t pad_y,
13097 const uint16_t stride_x,
13098 const uint16_t stride_y,
13099 const int32_t *
bias,
13101 const int32_t *out_shift,
13102 const int32_t *out_scale,
13103 const uint16_t out_tensor_dim_x,
13104 const uint16_t out_tensor_dim_y,
13105 const int32_t out_offset,
13106 const int32_t in_offset,
13107 const int32_t act_min,
13108 const int32_t act_max,
13109 const uint16_t dilation_x,
13110 const uint16_t dilation_y,
13113 #if defined(__zcc__)
13115 tpt_nn_dw_conv_asym_params aConv_params = {in_offset, out_offset, ch_mult,
13116 stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13118 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13120 tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13121 ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13123 return tpt_depthwise_conv_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13124 bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
13128 return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(
13129 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13130 out_tensor_ch, ch_mult, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x,
13131 stride_y,
bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13132 out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13133 dilation_y, tmp_buf);
13182 const uint16_t in_tensor_dim_x,
13183 const uint16_t in_tensor_dim_y,
13184 const uint16_t in_tensor_ch,
13185 const q7_t *ker_weight,
13186 const uint16_t out_tensor_ch,
13187 const uint16_t ker_dim_x,
13188 const uint16_t ker_dim_y,
13189 const uint16_t pad_x,
13190 const uint16_t pad_y,
13191 const uint16_t stride_x,
13192 const uint16_t stride_y,
13193 const int32_t *
bias,
13195 const int32_t *out_shift,
13196 const int32_t *out_scale,
13197 const uint16_t out_tensor_dim_x,
13198 const uint16_t out_tensor_dim_y,
13199 const int32_t out_offset,
13200 const int32_t in_offset,
13201 const int32_t act_min,
13202 const int32_t act_max,
13203 const uint16_t dilation_x,
13204 const uint16_t dilation_y,
13207 #if defined(__zcc__)
13209 tpt_nn_dw_conv_asym_fast_params aConv_params = {in_offset, out_offset,
13210 stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13212 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13214 tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13215 ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13217 return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any(out_tensor, in_tensor, ker_weight,
13221 return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(
13222 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13223 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13224 bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13225 out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13239 const uint16_t ker_dim_x,
13240 const uint16_t ker_dim_y)
13242 #if defined(__zcc__)
13243 return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13244 in_tensor_ch, ker_dim_x, ker_dim_y);
13246 return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13247 in_tensor_ch, ker_dim_x, ker_dim_y);
13292 const uint16_t in_tensor_dim_x,
13293 const uint16_t in_tensor_dim_y,
13294 const uint16_t in_tensor_ch,
13295 const uint8_t *ker_weight,
13296 const uint16_t ker_dim_x,
13297 const uint16_t ker_dim_y,
13298 const int16_t ch_mult,
13299 const int16_t pad_x,
13300 const int16_t pad_y,
13301 const int16_t stride_x,
13302 const int16_t stride_y,
13303 const int16_t dilation_x,
13304 const int16_t dilation_y,
13305 const int32_t *
bias,
13306 const int32_t in_offset,
13307 const int32_t ker_offset,
13308 const int32_t out_offset,
13309 uint8_t *out_tensor,
13310 const uint16_t out_tensor_dim_x,
13311 const uint16_t out_tensor_dim_y,
13312 const int32_t act_min,
13313 const int32_t act_max,
13314 const int32_t out_shift,
13315 const int32_t out_scale)
13317 #if defined(__zcc__)
13318 return tpt_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(
13319 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13320 ker_dim_x, ker_dim_y, ch_mult, pad_x, pad_y, stride_x, stride_y,
13321 dilation_x, dilation_y,
bias, in_offset, ker_offset, out_offset,
13322 out_tensor, out_tensor_dim_x, out_tensor_dim_y, act_min, act_max,
13323 out_shift, out_scale);
13325 return riscv_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(
13326 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13327 ker_dim_x, ker_dim_y, ch_mult, pad_x, pad_y, stride_x, stride_y,
13328 dilation_x, dilation_y,
bias, in_offset, ker_offset, out_offset,
13329 out_tensor, out_tensor_dim_x, out_tensor_dim_y, act_min, act_max,
13330 out_shift, out_scale);
13370 static inline int32_t hpm_nn_conv_1x1_HWC_f16_f16_f16_bias_any(
const float16_t *in_tensor,
13371 const uint16_t in_tensor_dim_x,
13372 const uint16_t in_tensor_dim_y,
13373 const uint16_t in_tensor_ch,
13374 const float16_t *ker_weight,
13375 const uint16_t out_tensor_ch,
13376 const uint16_t ker_dim_x,
13377 const uint16_t ker_dim_y,
13378 const uint16_t pad_x,
13379 const uint16_t pad_y,
13380 const uint16_t stride_x,
13381 const uint16_t stride_y,
13382 const float16_t *
bias,
13383 float16_t *out_tensor,
13384 const uint16_t out_tensor_dim_x,
13385 const uint16_t out_tensor_dim_y,
13387 float16_t *tmp_buf)
13389 #if defined(__zcc__)
13390 return tpt_nn_conv_1x1_HWC_f16_f16_f16_bias_any(
13391 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13392 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13393 bias, out_tensor, out_tensor_dim_x, out_tensor_dim_y,
in_tmp_buf,
13396 return riscv_nn_conv_1x1_HWC_f16_f16_f16_bias_any(
13397 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13398 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13399 bias, out_tensor, out_tensor_dim_x, out_tensor_dim_y,
in_tmp_buf,
13425 static inline int32_t hpm_nn_conv_HWC_f16_f16_f16_bias(
const float16_t *in_tensor,
13426 const uint16_t in_tensor_dim,
13427 const uint16_t in_tensor_ch,
13428 const float16_t *ker_weight,
13429 const uint16_t out_tensor_ch,
13430 const uint16_t ker_dim,
13431 const uint16_t pad,
13432 const uint16_t stride,
13433 const float16_t *
bias,
13434 float16_t *out_tensor,
13435 const uint16_t out_tensor_dim,
13437 float16_t *tmp_buf)
13439 #if defined(__zcc__)
13440 return tpt_nn_conv_HWC_f16_f16_f16_bias(
13441 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13442 ker_dim, pad, stride,
bias, out_tensor, out_tensor_dim,
in_tmp_buf,
13445 return riscv_nn_conv_HWC_f16_f16_f16_bias(
13446 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13447 ker_dim, pad, stride,
bias, out_tensor, out_tensor_dim,
in_tmp_buf,
13473 static inline int32_t hpm_nn_conv_dw_HWC_f16_f16_f16_bias(
const float16_t *in_tensor,
13474 const uint16_t in_tensor_dim,
13475 const uint16_t in_tensor_ch,
13476 const float16_t *ker_weight,
13477 const uint16_t out_tensor_ch,
13478 const uint16_t ker_dim,
13479 const uint16_t pad,
13480 const uint16_t stride,
13481 const float16_t *
bias,
13482 float16_t *out_tensor,
13483 const uint16_t out_tensor_dim,
13485 float16_t *tmp_buf)
13487 #if defined(__zcc__)
13488 return tpt_nn_conv_dw_HWC_f16_f16_f16_bias(
13489 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13490 ker_dim, pad, stride,
bias, out_tensor, out_tensor_dim,
in_tmp_buf,
13493 return riscv_nn_conv_dw_HWC_f16_f16_f16_bias(
13494 in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13495 ker_dim, pad, stride,
bias, out_tensor, out_tensor_dim,
in_tmp_buf,
13507 #ifdef HPM_EN_MATH_NN_RVP32_LIB
13508 #if defined(__zcc__)
13509 #include "tpt_nn_convolution.h"
13511 #include "riscv_nn_convolution.h"
13556 const uint16_t in_tensor_dim_x,
13557 const uint16_t in_tensor_dim_y,
13558 const uint16_t in_tensor_ch,
13559 const uint16_t in_tensor_group,
13560 const q7_t *ker_weight,
13561 const uint16_t out_tensor_ch,
13562 const uint16_t ker_dim_x,
13563 const uint16_t ker_dim_y,
13564 const uint16_t pad_x,
13565 const uint16_t pad_y,
13566 const uint16_t stride_x,
13567 const uint16_t stride_y,
13568 const int32_t *
bias,
13570 const int32_t *out_shift,
13571 const int32_t *out_scale,
13572 const int32_t out_offset,
13573 const int32_t in_offset,
13574 const int32_t act_min,
13575 const int32_t act_max,
13576 const uint16_t out_tensor_dim_x,
13577 const uint16_t out_tensor_dim_y,
13580 #if defined(__zcc__)
13582 tpt_nn_conv_asym_params aConv_params = {stride_x, stride_y, pad_x, pad_y,
13583 in_offset, out_offset, act_min, act_max};
13585 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13587 tpt_nn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13588 in_tensor_group, ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y,
13591 return tpt_convolve_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13595 return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any(
13596 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13597 in_tensor_group, ker_weight, out_tensor_ch, ker_dim_x, ker_dim_y, pad_x,
13598 pad_y, stride_x, stride_y,
bias, out_tensor, out_shift, out_scale,
13599 out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
13651 const uint16_t in_tensor_dim_x,
13652 const uint16_t in_tensor_dim_y,
13653 const uint16_t in_tensor_ch,
13654 const uint16_t in_tensor_group,
13655 const q7_t *ker_weight,
13656 const uint16_t out_tensor_ch,
13657 const uint16_t pad_x,
13658 const uint16_t pad_y,
13659 const uint16_t stride_x,
13660 const uint16_t stride_y,
13661 const int32_t *
bias,
13663 const int32_t *out_shift,
13664 const int32_t *out_scale,
13665 const int32_t out_offset,
13666 const int32_t in_offset,
13667 const int32_t act_min,
13668 const int32_t act_max,
13669 const uint16_t out_tensor_dim_x,
13670 const uint16_t out_tensor_dim_y,
13673 #if defined(__zcc__)
13675 tpt_nn_conv_1x1_asym_params aConv_params = {in_offset, out_offset, stride_x,
13676 stride_y, pad_x, pad_y, act_min, act_max};
13678 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13680 tpt_nn_1x1_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13681 in_tensor_group, out_tensor_ch};
13683 return tpt_convolve_1x1_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13684 bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
13687 return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(
13688 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13689 in_tensor_group, ker_weight, out_tensor_ch, pad_x, pad_y, stride_x,
13690 stride_y,
bias, out_tensor, out_shift, out_scale, out_offset, in_offset,
13691 act_min, act_max, out_tensor_dim_x, out_tensor_dim_y, tmp_buf);
13743 const uint16_t in_tensor_dim_x,
13744 const uint16_t in_tensor_dim_y,
13745 const uint16_t in_tensor_ch,
13746 const q7_t *ker_weight,
13747 const uint16_t out_tensor_ch,
13748 const uint16_t ch_mult,
13749 const uint16_t ker_dim_x,
13750 const uint16_t ker_dim_y,
13751 const uint16_t pad_x,
13752 const uint16_t pad_y,
13753 const uint16_t stride_x,
13754 const uint16_t stride_y,
13755 const int32_t *
bias,
13757 const int32_t *out_shift,
13758 const int32_t *out_scale,
13759 const uint16_t out_tensor_dim_x,
13760 const uint16_t out_tensor_dim_y,
13761 const int32_t out_offset,
13762 const int32_t in_offset,
13763 const int32_t act_min,
13764 const int32_t act_max,
13765 const uint16_t dilation_x,
13766 const uint16_t dilation_y,
13769 #if defined(__zcc__)
13771 tpt_nn_dw_conv_asym_params aConv_params = {in_offset, out_offset, ch_mult,
13772 stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13774 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13776 tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13777 ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13779 return tpt_depthwise_conv_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13780 bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
13783 return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(
13784 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13785 out_tensor_ch, ch_mult, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x,
13786 stride_y,
bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13787 out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13788 dilation_y, tmp_buf);
13832 const uint16_t in_tensor_dim_x,
13833 const uint16_t in_tensor_ch,
13834 const uint16_t in_tensor_group,
13835 const q7_t *ker_weight,
13836 const uint16_t out_tensor_ch,
13837 const uint16_t ker_dim_x,
13838 const uint16_t pad_x,
13839 const uint16_t stride_x,
13840 const int32_t *
bias,
13842 const int32_t *out_shift,
13843 const int32_t *out_scale,
13844 const int32_t out_offset,
13845 const int32_t in_offset,
13846 const int32_t act_min,
13847 const int32_t act_max,
13848 const uint16_t out_tensor_dim_x,
13851 #if defined(__zcc__)
13853 tpt_nn_conv_1xn_asym_params aConv_params = {in_offset, out_offset, stride_x, pad_x,
13856 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13858 tpt_nn_1xn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_ch, in_tensor_group,
13859 ker_dim_x, out_tensor_dim_x, out_tensor_ch};
13861 return tpt_convolve_1xn_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13865 return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(
13866 in_tensor, in_tensor_dim_x, in_tensor_ch, in_tensor_group, ker_weight,
13867 out_tensor_ch, ker_dim_x, pad_x, stride_x,
bias, out_tensor, out_shift,
13868 out_scale, out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
13918 const uint16_t in_tensor_dim_x,
13919 const uint16_t in_tensor_dim_y,
13920 const uint16_t in_tensor_ch,
13921 const q7_t *ker_weight,
13922 const uint16_t out_tensor_ch,
13923 const uint16_t ker_dim_x,
13924 const uint16_t ker_dim_y,
13925 const uint16_t pad_x,
13926 const uint16_t pad_y,
13927 const uint16_t stride_x,
13928 const uint16_t stride_y,
13929 const int32_t *
bias,
13931 const int32_t *out_shift,
13932 const int32_t *out_scale,
13933 const uint16_t out_tensor_dim_x,
13934 const uint16_t out_tensor_dim_y,
13935 const int32_t out_offset,
13936 const int32_t in_offset,
13937 const int32_t act_min,
13938 const int32_t act_max,
13939 const uint16_t dilation_x,
13940 const uint16_t dilation_y,
13943 #if defined(__zcc__)
13945 tpt_nn_dw_conv_asym_fast_params aConv_params = {in_offset, out_offset,
13946 stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13948 tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13950 tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13951 ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13953 return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any(out_tensor, in_tensor, ker_weight,
13957 return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(
13958 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13959 out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13960 bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13961 out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13974 #if defined(__zcc__)
13975 return tpt_convolve_1x1_s8_s8_s8_asym_bias_any_get_buf_size(
13978 return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13992 const uint16_t ker_dim_x,
13993 const uint16_t ker_dim_y)
13995 #if defined(__zcc__)
13996 return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13997 in_tensor_ch, ker_dim_x, ker_dim_y);
13999 return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
14000 in_tensor_ch, ker_dim_x, ker_dim_y);
14014 const uint16_t ker_dim_x,
14015 const uint16_t ker_dim_y)
14017 #if defined(__zcc__)
14018 return tpt_convolve_1xn_s8_s8_s8_asym_bias_any_get_buffer_size(
14019 in_tensor_ch, ker_dim_x, ker_dim_y);
14021 return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
14022 in_tensor_ch, ker_dim_x, ker_dim_y);
14035 const uint16_t ker_dim_x,
14036 const uint16_t ker_dim_y)
14039 #if defined(__zcc__)
14040 return tpt_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
14041 in_tensor_ch, ker_dim_x, ker_dim_y);
14043 return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
14044 in_tensor_ch, ker_dim_x, ker_dim_y);
14052 #ifdef HPM_MATH_NN_CONNECTED
14053 #ifdef HPM_EN_MATH_NN_LIB
14054 #if defined(__zcc__)
14055 #include "tpt_nn_fully_connected.h"
14057 #include "riscv_nn_fully_connected.h"
14105 const uint16_t
size,
14112 #if defined(__zcc__)
14146 const uint16_t
size,
14154 #if defined(__zcc__)
14181 const uint16_t
size,
14189 #if defined(__zcc__)
14224 const uint16_t
size,
14232 #if defined(__zcc__)
14260 const uint16_t
size,
14268 #if defined(__zcc__)
14273 return riscv_nn_fc_mat_vec_s16_s16_s8_sft_bias(
14302 const uint16_t
size,
14310 #if defined(__zcc__)
14311 return tpt_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(
14315 return riscv_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(
14347 const uint16_t
size,
14349 const uint16_t pre_rshift,
14350 const uint16_t out_scale,
14351 const uint16_t post_rshift,
14356 #if defined(__zcc__)
14358 pre_rshift, out_scale, post_rshift,
bias,
14362 pre_rshift, out_scale, post_rshift,
bias,
14393 const uint16_t
size,
14395 const uint16_t pre_rshift,
14396 const uint16_t out_scale,
14397 const uint16_t post_rshift,
14402 #if defined(__zcc__)
14404 pre_rshift, out_scale, post_rshift,
bias,
14408 pre_rshift, out_scale, post_rshift,
14439 const uint16_t
size,
14441 const uint16_t pre_rshift,
14442 const uint16_t out_scale,
14443 const uint16_t post_rshift,
14448 #if defined(__zcc__)
14450 pre_rshift, out_scale, post_rshift,
bias,
14454 pre_rshift, out_scale, post_rshift,
bias,
14485 const uint16_t
size,
14487 const uint16_t pre_rshift,
14488 const uint16_t out_scale,
14489 const uint16_t post_rshift,
14494 #if defined(__zcc__)
14496 pre_rshift, out_scale, post_rshift,
bias,
14500 pre_rshift, out_scale, post_rshift,
bias,
14531 const uint16_t
size,
14533 const uint16_t pre_rshift,
14534 const uint16_t out_scale,
14535 const uint16_t post_rshift,
14540 #if defined(__zcc__)
14542 pre_rshift, out_scale, post_rshift,
bias,
14546 pre_rshift, out_scale, post_rshift,
14576 const uint16_t
size,
14578 const uint16_t pre_rshift,
14579 const uint16_t out_scale,
14580 const uint16_t post_rshift,
14584 #if defined(__zcc__)
14618 const uint16_t
size,
14620 const uint16_t pre_rshift,
14621 const uint16_t out_scale,
14622 const uint16_t post_rshift,
14626 #if defined(__zcc__)
14660 const uint16_t
size,
14662 const uint16_t pre_rshift,
14663 const uint16_t out_scale,
14664 const uint16_t post_rshift,
14668 #if defined(__zcc__)
14702 const uint16_t
size,
14704 const uint16_t pre_rshift,
14705 const uint16_t out_scale,
14706 const uint16_t post_rshift,
14710 #if defined(__zcc__)
14744 const uint16_t
size,
14746 const uint16_t pre_rshift,
14747 const uint16_t out_scale,
14748 const uint16_t post_rshift,
14752 #if defined(__zcc__)
14787 const uint16_t
size,
14789 const uint16_t pre_rshift,
14790 const uint16_t out_scale,
14791 const uint16_t post_rshift,
14796 #if defined(__zcc__)
14798 pre_rshift, out_scale, post_rshift,
14802 pre_rshift, out_scale, post_rshift,
14834 const uint16_t
size,
14836 const uint16_t pre_rshift,
14837 const uint16_t out_scale,
14838 const uint16_t post_rshift,
14843 #if defined(__zcc__)
14845 pre_rshift, out_scale, post_rshift,
14849 pre_rshift, out_scale, post_rshift,
14880 const uint16_t
size,
14882 const uint16_t pre_rshift,
14883 const uint16_t out_scale,
14884 const uint16_t post_rshift,
14889 #if defined(__zcc__)
14891 pre_rshift, out_scale, post_rshift,
14895 pre_rshift, out_scale, post_rshift,
14927 const uint16_t
size,
14929 const uint16_t pre_rshift,
14930 const uint16_t out_scale,
14931 const uint16_t post_rshift,
14936 #if defined(__zcc__)
14938 pre_rshift, out_scale, post_rshift,
14942 pre_rshift, out_scale, post_rshift,
14974 const uint16_t
size,
14976 const uint16_t pre_rshift,
14977 const uint16_t out_scale,
14978 const uint16_t post_rshift,
14983 #if defined(__zcc__)
14985 pre_rshift, out_scale, post_rshift,
14989 pre_rshift, out_scale, post_rshift,
15019 const uint16_t
size,
15021 const uint16_t pre_rshift,
15022 const uint16_t out_scale,
15023 const uint16_t post_rshift,
15027 #if defined(__zcc__)
15029 pre_rshift, out_scale, post_rshift,
15033 pre_rshift, out_scale, post_rshift,
15064 const uint16_t
size,
15066 const uint16_t pre_rshift,
15067 const uint16_t out_scale,
15068 const uint16_t post_rshift,
15072 #if defined(__zcc__)
15074 pre_rshift, out_scale, post_rshift,
15078 pre_rshift, out_scale, post_rshift,
15108 const uint16_t
size,
15110 const uint16_t pre_rshift,
15111 const uint16_t out_scale,
15112 const uint16_t post_rshift,
15116 #if defined(__zcc__)
15118 pre_rshift, out_scale, post_rshift,
15122 pre_rshift, out_scale, post_rshift,
15153 const uint16_t
size,
15155 const uint16_t pre_rshift,
15156 const uint16_t out_scale,
15157 const uint16_t post_rshift,
15161 #if defined(__zcc__)
15163 pre_rshift, out_scale, post_rshift,
15167 pre_rshift, out_scale, post_rshift,
15198 const uint16_t
size,
15200 const uint16_t pre_rshift,
15201 const uint16_t out_scale,
15202 const uint16_t post_rshift,
15206 #if defined(__zcc__)
15208 pre_rshift, out_scale, post_rshift,
15212 pre_rshift, out_scale, post_rshift,
15228 const uint32_t
size,
15232 #if defined(__zcc__)
15250 const uint32_t
size,
15254 #if defined(__zcc__)
15271 const uint32_t
size,
15275 #if defined(__zcc__)
15314 const uint16_t in_vec_col,
15315 const uint16_t wt_mat_row,
15316 const uint16_t in_vec_group,
15317 const int32_t in_offset,
15318 const int32_t wt_offset,
15319 const int32_t out_scale,
15320 const int32_t out_shift,
15321 const int32_t out_offset,
15322 const int32_t *
bias,
15324 const int32_t act_min,
15325 const int32_t act_max,
15328 #if defined(__zcc__)
15330 tpt_nn_fc_params_asym_s8 aFc_params = {in_offset, wt_offset, out_offset, out_scale,
15331 out_shift, act_min, act_max};
15332 tpt_nn_fc_dims_asym_s8 aFC_dims = {in_vec_col, in_vec_group, wt_mat_row};
15335 &aFC_dims, tmp_buf);
15337 return riscv_nn_fc_s8_s8_s8_asym_bias(in_vec,
wt_mat, in_vec_col, wt_mat_row,
15338 in_vec_group, in_offset, wt_offset,
15339 out_scale, out_shift, out_offset,
bias,
15340 out_vec, act_min, act_max, tmp_buf);
15353 #if defined(__zcc__)
15354 return tpt_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15356 return riscv_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15366 #ifdef HPM_EN_MATH_NN_RVP32_LIB
15367 #if defined(__zcc__)
15368 #include "tpt_nn_fully_connected.h"
15370 #include "riscv_nn_fully_connected.h"
15405 const uint16_t in_vec_col,
15406 const uint16_t wt_mat_row,
15407 const uint16_t in_vec_group,
15408 const int32_t in_offset,
15409 const int32_t wt_offset,
15410 const int32_t out_scale,
15411 const int32_t out_shift,
15412 const int32_t out_offset,
15413 const int32_t *
bias,
15415 const int32_t act_min,
15416 const int32_t act_max,
15419 #if defined(__zcc__)
15421 tpt_nn_fc_params_asym_s8 aFc_params = {in_offset, wt_offset, out_offset, out_scale,
15422 out_shift, act_min, act_max};
15423 tpt_nn_fc_dims_asym_s8 aFC_dims = {in_vec_col, in_vec_group, wt_mat_row};
15426 &aFC_dims, tmp_buf);
15428 return riscv_nn_fc_s8_s8_s8_asym_bias(in_vec,
wt_mat, in_vec_col, wt_mat_row,
15429 in_vec_group, in_offset, wt_offset,
15430 out_scale, out_shift, out_offset,
bias,
15431 out_vec, act_min, act_max, tmp_buf);
15444 #if defined(__zcc__)
15445 return tpt_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15447 return riscv_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15455 #ifdef HPM_MATH_NN_POOLING
15456 #ifdef HPM_EN_MATH_NN_LIB
15457 #if defined(__zcc__)
15458 #include "tpt_nn_pooling.h"
15460 #include "riscv_nn_pooling.h"
15506 const uint16_t in_tensor_dim,
15507 const uint16_t in_tensor_ch,
15508 const uint16_t ker_dim,
15509 const uint16_t pad,
15510 const uint16_t stride,
15511 const uint16_t out_tensor_dim,
15515 #if defined(__zcc__)
15516 tpt_nn_avepool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15517 stride, out_tensor_dim,
in_tmp_buf, out_tensor);
15519 riscv_nn_avepool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15520 stride, out_tensor_dim,
in_tmp_buf, out_tensor);
15571 const uint16_t in_tensor_dim_x,
15572 const uint16_t in_tensor_dim_y,
15573 const uint16_t in_tensor_ch,
15574 const uint16_t ker_dim_x,
15575 const uint16_t ker_dim_y,
15576 const uint16_t pad_x,
15577 const uint16_t pad_y,
15578 const uint16_t stride_x,
15579 const uint16_t stride_y,
15580 const uint16_t out_tensor_dim_x,
15581 const uint16_t out_tensor_dim_y,
15584 const uint16_t out_lshift)
15586 #if defined(__zcc__)
15587 tpt_nn_avepool_HWC_s8_any(
15588 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_dim_x,
15589 ker_dim_y, pad_x, pad_y, stride_x, stride_y, out_tensor_dim_x,
15590 out_tensor_dim_y,
in_tmp_buf, out_tensor, out_lshift);
15592 riscv_nn_avepool_HWC_s8_any(
15593 in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_dim_x,
15594 ker_dim_y, pad_x, pad_y, stride_x, stride_y, out_tensor_dim_x,
15595 out_tensor_dim_y,
in_tmp_buf, out_tensor, out_lshift);
15628 const int in_tensor_dim_x,
15629 const int out_tensor_dim_y,
15630 const int out_tensor_dim_x,
15631 const int stride_y,
15632 const int stride_x,
15633 const int ker_dim_y,
15634 const int ker_dim_x,
15639 const int in_tensor_ch,
15642 int8_t *out_tensor)
15644 #if defined(__zcc__)
15646 tpt_nn_avgpool_params_act_s8 aPool_params = {stride_x, stride_y, pad_x, pad_y,
15648 tpt_nn_avgpool_dims_act_s8 aPool_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
15649 ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y};
15651 return tpt_avgpool_s8_any_act(out_tensor, in_tensor, &aPool_params, &aPool_dims,
in_tmp_buf);
15654 return riscv_nn_avepool_HWC_s8_any_act(
15655 in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15656 stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15657 in_tensor_ch, in_tensor,
in_tmp_buf, out_tensor);
15671 #if defined(__zcc__)
15672 return tpt_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15675 return riscv_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15710 const uint16_t in_tensor_dim,
15711 const uint16_t in_tensor_ch,
15712 const uint16_t ker_dim,
15713 const uint16_t pad,
15714 const uint16_t stride,
15715 const uint16_t out_tensor_dim,
15719 #if defined(__zcc__)
15720 tpt_nn_maxpool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15721 stride, out_tensor_dim,
in_tmp_buf, out_tensor);
15723 riscv_nn_maxpool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15724 stride, out_tensor_dim,
in_tmp_buf, out_tensor);
15755 const uint16_t in_tensor_dim_x,
15756 const uint16_t out_tensor_dim_y,
15757 const uint16_t out_tensor_dim_x,
15758 const uint16_t stride_y,
15759 const uint16_t stride_x,
15760 const uint16_t ker_dim_y,
15761 const uint16_t ker_dim_x,
15762 const uint16_t pad_y,
15763 const uint16_t pad_x,
15764 const int8_t act_min,
15765 const int8_t act_max,
15766 const uint16_t in_tensor_ch,
15768 int16_t *tmp_buffer,
15769 int8_t *out_tensor)
15771 #if defined(__zcc__)
15772 return tpt_nn_maxpool_HWC_s8_any_act(
15773 in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15774 stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15775 in_tensor_ch, in_tensor, tmp_buffer, out_tensor);
15777 return riscv_nn_maxpool_HWC_s8_any_act(
15778 in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15779 stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15780 in_tensor_ch, in_tensor, tmp_buffer, out_tensor);
15790 #ifdef HPM_EN_MATH_NN_RVP32_LIB
15791 #if defined(__zcc__)
15792 #include "tpt_nn_pooling.h"
15794 #include "riscv_nn_pooling.h"
15826 const int in_tensor_dim_x,
15827 const int out_tensor_dim_y,
15828 const int out_tensor_dim_x,
15829 const int stride_y,
15830 const int stride_x,
15831 const int ker_dim_y,
15832 const int ker_dim_x,
15837 const int in_tensor_ch,
15840 int8_t *out_tensor)
15842 #if defined(__zcc__)
15844 tpt_nn_avgpool_params_act_s8 aPool_params = {stride_x, stride_y, pad_x, pad_y,
15846 tpt_nn_avgpool_dims_act_s8 aPool_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
15847 ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y};
15849 return tpt_avgpool_s8_any_act(out_tensor, in_tensor, &aPool_params, &aPool_dims,
in_tmp_buf);
15852 return riscv_nn_avepool_HWC_s8_any_act(
15853 in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15854 stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15855 in_tensor_ch, in_tensor,
in_tmp_buf, out_tensor);
15869 #if defined(__zcc__)
15870 return tpt_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15873 return riscv_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15881 #ifdef HPM_MATH_NN_SOFTMAX
15882 #ifdef HPM_EN_MATH_NN_LIB
15883 #if defined(__zcc__)
15884 #include "tpt_nn_softmax.h"
15886 #include "riscv_nn_softmax.h"
15914 const uint16_t
size,
15917 #if defined(__zcc__)
15932 const uint16_t
size,
15935 #if defined(__zcc__)
15938 riscv_nn_softmax_s16_fast(in_vec,
size,
out_vec);
15957 const int32_t in_tensor_row,
15958 const int32_t in_tensor_col,
15959 const int32_t scale,
15960 const int32_t lshift,
15961 const int32_t diff_min,
15962 int8_t *out_tensor)
15964 #if defined(__zcc__)
15965 tpt_softmax_s8_hp(out_tensor, in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
15968 riscv_nn_softmax_s8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
15969 diff_min, out_tensor);
15988 const int32_t in_tensor_row,
15989 const int32_t in_tensor_col,
15990 const int32_t scale,
15991 const int32_t lshift,
15992 const int32_t diff_min,
15993 uint8_t *out_tensor)
15995 #if defined(__zcc__)
15996 tpt_nn_softmax_u8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
15997 diff_min, out_tensor);
15999 riscv_nn_softmax_u8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
16000 diff_min, out_tensor);
16010 #ifdef HPM_EN_MATH_NN_RVP32_LIB
16011 #if defined(__zcc__)
16012 #include "tpt_nn_softmax.h"
16014 #include "riscv_nn_softmax.h"
16032 const int32_t in_tensor_row,
16033 const int32_t in_tensor_col,
16034 const int32_t scale,
16035 const int32_t lshift,
16036 const int32_t diff_min,
16037 int8_t *out_tensor)
16039 #if defined(__zcc__)
16040 tpt_softmax_s8_hp(out_tensor, in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
16043 riscv_nn_softmax_s8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
16044 diff_min, out_tensor);
16051 #ifdef HPM_MATH_NN_UTIL
16052 #ifdef HPM_EN_MATH_NN_LIB
16053 #if defined(__zcc__)
16054 #include "tpt_nn_util.h"
16056 #include "riscv_nn_util.h"
16076 static inline int32_t hpm_nn_exp_f16(
const float16_t *in_vec,
16077 const uint32_t
size,
16080 #if defined(__zcc__)
16105 int8_t *out_tensor,
16106 const uint32_t
size)
16108 #if defined(__zcc__)
16109 tpt_reshape_s8(out_tensor, in_tensor,
size);
16111 riscv_nn_reshape_s8(in_tensor, out_tensor,
size);
16140 #if defined(__zcc__)
16141 return tpt_nn_top_k_s8(in_vec,
size, k, val, idx);
16143 return riscv_nn_top_k_s8(in_vec,
size, k, val, idx);
16167 static inline int32_t hpm_nn_top_k_f16(float16_t *in_vec,
16173 #if defined(__zcc__)
16174 return tpt_nn_top_k_f16(in_vec,
size, k, val, idx);
16176 return riscv_nn_top_k_f16(in_vec,
size, k, val, idx);
16187 #ifdef HPM_EN_MATH_NN_RVP32_LIB
16188 #if defined(__zcc__)
16189 #include "tpt_nn_util.h"
16191 #include "riscv_nn_util.h"
16211 int8_t *out_tensor,
16212 const uint32_t
size)
16214 #if defined(__zcc__)
16215 tpt_reshape_s8(out_tensor, in_tensor,
size);
16217 riscv_nn_reshape_s8(in_tensor, out_tensor,
size);
#define HPM_FFA
Definition: hpm_soc.h:392
static void hpm_dsp_and_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
Compute the logical bitwise AND of two u32 vectors.
Definition: hpm_math.h:1998
static void hpm_dsp_and_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
Compute the logical bitwise AND of two u16 vectors.
Definition: hpm_math.h:2017
static void hpm_dsp_and_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
Compute the logical bitwise AND of two u8 vectors.
Definition: hpm_math.h:2036
static void hpm_dsp_clip_q31(q31_t *src, q31_t *dst, q31_t low, q31_t high, uint32_t size)
Elementwise clipping of q31 function.
Definition: hpm_math.h:1927
static void hpm_dsp_clip_f32(float32_t *src, float32_t *dst, float32_t low, float32_t high, uint32_t size)
Elementwise clipping of f32 function.
Definition: hpm_math.h:1908
static void hpm_dsp_clip_q7(q7_t *src, q7_t *dst, q7_t low, q7_t high, uint32_t size)
Elementwise clipping of q7 function.
Definition: hpm_math.h:1965
static void hpm_dsp_clip_q15(q15_t *src, q15_t *dst, q15_t low, q15_t high, uint32_t size)
Elementwise clipping of q15 function.
Definition: hpm_math.h:1946
static void hpm_dsp_not_u16(u16_t *src, u16_t *dst, uint32_t size)
Compute the logical bitwise NOT of u16 vector.
Definition: hpm_math.h:2223
static void hpm_dsp_not_u32(u32_t *src, u32_t *dst, uint32_t size)
Compute the logical bitwise NOT of u32 vector.
Definition: hpm_math.h:2206
static void hpm_dsp_not_u8(u8_t *src, u8_t *dst, uint32_t size)
Compute the logical bitwise NOT of u8 vector.
Definition: hpm_math.h:2240
static void hpm_dsp_or_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
Compute the logical bitwise OR of two u8 vectors.
Definition: hpm_math.h:2105
static void hpm_dsp_or_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
Compute the logical bitwise OR of two u16 vectors.
Definition: hpm_math.h:2087
static void hpm_dsp_or_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
Compute the logical bitwise OR of two u32 vectors.
Definition: hpm_math.h:2069
static void hpm_dsp_xor_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
Compute the logical bitwise XOR of two u32 vectors.
Definition: hpm_math.h:2138
static void hpm_dsp_xor_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
Compute the logical bitwise XOR of two u16 vectors.
Definition: hpm_math.h:2156
static void hpm_dsp_xor_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
Compute the logical bitwise XOR of two u8 vectors.
Definition: hpm_math.h:2174
static void hpm_dsp_add_u8_u16(uint8_t *src1, uint8_t *src2, uint16_t *dst, uint32_t size)
Addition of U8 vectors.
Definition: hpm_math.h:1119
static void hpm_dsp_sub_u8_q7(uint8_t *src1, uint8_t *src2, q7_t *dst, uint32_t size)
Subtraction of u8 vectors.
Definition: hpm_math.h:1218
static q31_t hpm_dsp_div_q31(q31_t src1, q31_t src2)
Division of q31 inputs.
Definition: hpm_math.h:1345
static void hpm_dsp_mul_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
Multiplication of q15 vectors.
Definition: hpm_math.h:1273
static uint32_t hpm_dsp_dprod_u8(uint8_t *src1, uint8_t *src2, uint32_t size)
Dot production of U8 vectors.
Definition: hpm_math.h:1606
static void hpm_dsp_scale_q31(q31_t *src, q31_t scalefract, int8_t shift, q31_t *dst, uint32_t size)
To multiply a q31 vectors by a q31 scale.
Definition: hpm_math.h:1739
static void hpm_dsp_add_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
Addition of q15 vectors.
Definition: hpm_math.h:1079
static void hpm_dsp_offset_q7(q7_t *src, q7_t offset, q7_t *dst, uint32_t size)
The offset of q7 vectors.
Definition: hpm_math.h:1681
static void hpm_dsp_sub_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
Subtraction of q15 vectors.
Definition: hpm_math.h:1178
static q31_t hpm_dsp_div_s64_u32(q63_t src1, uint32_t src2)
Division of q63 inputs divided by a positive 32 bits.
Definition: hpm_math.h:1362
static void hpm_dsp_mul_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
Multiplication of q31 vectors.
Definition: hpm_math.h:1253
static q31_t hpm_dsp_div_u64_u32(uint64_t src1, uint32_t src2)
Division of positive 64-bits inputs divided by a positive 32-bits.
Definition: hpm_math.h:1379
static void hpm_dsp_abs_q7(q7_t *src, q7_t *dst, uint32_t size)
Absolute value of q7 vectors.
Definition: hpm_math.h:1020
static void hpm_dsp_neg_q15(q15_t *src, q15_t *dst, uint32_t size)
Negation of q15 vectors.
Definition: hpm_math.h:1437
static q63_t hpm_dsp_dprod_q31(q31_t *src1, q31_t *src2, uint32_t size)
Dot production of q31 vectors.
Definition: hpm_math.h:1501
static void hpm_dsp_offset_f32(float32_t *src, float32_t offset, float32_t *dst, uint32_t size)
The offset of floating-point vectors.
Definition: hpm_math.h:1621
static void hpm_dsp_scale_q7(q7_t *src, q7_t scalefract, int8_t shift, q7_t *dst, uint32_t size)
To multiply a q7 vectors by a q7 scale.
Definition: hpm_math.h:1785
static void hpm_dsp_add_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
Addition of q7 vectors.
Definition: hpm_math.h:1099
static void hpm_dsp_offset_u8(uint8_t *src, q7_t offset, uint8_t *dst, uint32_t size)
The offset of U8 vectors.
Definition: hpm_math.h:1701
static void hpm_dsp_sub_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
Subtraction of q7 vectors.
Definition: hpm_math.h:1198
static void hpm_dsp_shift_q31(q31_t *src, int8_t shift, q31_t *dst, uint32_t size)
Shifts a q31 vector with a specified shift number.
Definition: hpm_math.h:1846
static void hpm_dsp_scale_q15(q15_t *src, q15_t scalefract, int8_t shift, q15_t *dst, uint32_t size)
To multiply a q15 vectors by a q15 scale.
Definition: hpm_math.h:1762
static void hpm_dsp_add_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
Addition of q31 vectors.
Definition: hpm_math.h:1059
static void hpm_dsp_offset_q15(q15_t *src, q15_t offset, q15_t *dst, uint32_t size)
The offset of q15 vectors.
Definition: hpm_math.h:1661
static void hpm_dsp_scale_f32(float32_t *src, float32_t scale, float32_t *dst, uint32_t size)
To multiply a floating-point vectors by a floating-point scale.
Definition: hpm_math.h:1716
static void hpm_dsp_offset_q31(q31_t *src, q31_t offset, q31_t *dst, uint32_t size)
The offset of q31 vectors.
Definition: hpm_math.h:1641
static void hpm_dsp_add_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Addition of floating-potint vectors.
Definition: hpm_math.h:1039
static void hpm_dsp_sub_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Subtraction of floating-point vectors.
Definition: hpm_math.h:1138
static void hpm_dsp_neg_f32(float32_t *src, float32_t *dst, uint32_t size)
Negation of floating-potint vectors.
Definition: hpm_math.h:1397
static void hpm_dsp_neg_q31(q31_t *src, q31_t *dst, uint32_t size)
Negation of q31 vectors.
Definition: hpm_math.h:1417
static void hpm_dsp_neg_q7(q7_t *src, q7_t *dst, uint32_t size)
Negation of q15 vectors.
Definition: hpm_math.h:1457
static void hpm_dsp_shift_q7(q7_t *src, int8_t shift, q7_t *dst, uint32_t size)
Shifts a q7 vector with a specified shift number.
Definition: hpm_math.h:1867
static q31_t hpm_dsp_dprod_q7(q7_t *src1, q7_t *src2, uint32_t size)
Dot production of q7 vectors.
Definition: hpm_math.h:1566
static q63_t hpm_dsp_dprod_q15(q15_t *src1, q15_t *src2, uint32_t size)
Dot production of q15 vectors.
Definition: hpm_math.h:1524
static q31_t hpm_dsp_dprod_q7xq15(q7_t *src1, q15_t *src2, uint32_t size)
Dot production of q7 * q15 vectors.
Definition: hpm_math.h:1589
static float32_t hpm_dsp_dprod_f32(float32_t *src1, float32_t *src2, uint32_t size)
Dot production of floating-point vectors.
Definition: hpm_math.h:1476
static void hpm_dsp_scale_u8(uint8_t *src, q7_t scalefract, int8_t shift, uint8_t *dst, uint32_t size)
To multiply a u8 vectors by a q7 scale.
Definition: hpm_math.h:1807
static void hpm_dsp_sub_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
Subtraction of q31 vectors.
Definition: hpm_math.h:1158
static q31_t hpm_dsp_dprod_u8xq15(uint8_t *src1, q15_t *src2, uint32_t size)
Dot production of u8 * q15 vectors.
Definition: hpm_math.h:1548
static void hpm_dsp_abs_q31(q31_t *src, q31_t *dst, uint32_t size)
Absolute value of q31 vectors.
Definition: hpm_math.h:979
static void hpm_dsp_mul_u8_u16(uint8_t *src1, uint8_t *src2, uint16_t *dst, uint32_t size)
Multiplication of u8 vectors.
Definition: hpm_math.h:1313
static void hpm_dsp_shift_q15(q15_t *src, int8_t shift, q15_t *dst, uint32_t size)
Shifts a q15 vector with a specified shift number.
Definition: hpm_math.h:1825
static void hpm_dsp_div_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Division of floating-point vectors.
Definition: hpm_math.h:1328
static void hpm_dsp_abs_f32(float32_t *src, float32_t *dst, uint32_t size)
Absolute value of floating-potint vectors.
Definition: hpm_math.h:959
static void hpm_dsp_mul_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Multiplication of floating-point vectors.
Definition: hpm_math.h:1233
static void hpm_dsp_abs_q15(q15_t *src, q15_t *dst, uint32_t size)
Absolute value of q15 vectors.
Definition: hpm_math.h:1000
static void hpm_dsp_shift_u8(uint8_t *src, int8_t shift, uint8_t *dst, uint32_t size)
Shifts a u8 vector for a specified shift number.
Definition: hpm_math.h:1888
static void hpm_dsp_mul_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
Multiplication of q7 vectors.
Definition: hpm_math.h:1293
static void hpm_dsp_cmul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t size)
Multiply two folating-point complex vector.
Definition: hpm_math.h:2603
static void hpm_dsp_cconj_q15(const q15_t *src, q15_t *dst, uint32_t size)
Conjugate the q15 complex vector.
Definition: hpm_math.h:2338
static void hpm_dsp_cmul_real_f32(const float32_t *src, const float32_t *real, float32_t *dst, uint32_t size)
Multiply the folating-point complex vector by a real vector.
Definition: hpm_math.h:2664
static void hpm_dsp_cdprod_q31(const q31_t *src1, const q31_t *src2, uint32_t size, q31_t *dst)
Compute the dot product of the q31 complex vector.
Definition: hpm_math.h:2448
static void hpm_dsp_cconj_f32(const float32_t *src, float32_t *dst, uint32_t size)
Conjugate the floating-potint complex vector.
Definition: hpm_math.h:2318
static void hpm_dsp_cmul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t size)
Multiply two q15 complex vector.
Definition: hpm_math.h:2624
static void hpm_dsp_cmag_f32(const float32_t *src, float32_t *dst, uint32_t size)
Compute the magnitude of the floating-potint complex vector.
Definition: hpm_math.h:2485
static void hpm_dsp_cmag_q15(const q15_t *src, q15_t *dst, uint32_t size)
Compute the magnitude of the q15 complex vector.
Definition: hpm_math.h:2505
static void hpm_dsp_cmag_sqr_f32(const float32_t *src, float32_t *dst, uint32_t size)
Compute the magnitude squared of the floating-potint complex vector.
Definition: hpm_math.h:2544
static void hpm_dsp_cmag_q31(const q31_t *src, q31_t *dst, uint32_t size)
Compute the magnitude of the q31 complex vector.
Definition: hpm_math.h:2525
static void hpm_dsp_cmul_real_q31(const q31_t *src, const q31_t *real, q31_t *dst, uint32_t size)
Multiply the q31 complex vector by a real vector.
Definition: hpm_math.h:2704
static void hpm_dsp_cconj_q31(const q31_t *src, q31_t *dst, uint32_t size)
Conjugate the q31 complex vector.
Definition: hpm_math.h:2358
static void hpm_dsp_cdprod_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *dst)
Compute the dot product of the floating-potint complex vector.
Definition: hpm_math.h:2377
static void hpm_dsp_cdprod_q15(const q15_t *src1, const q15_t *src2, uint32_t size, q15_t *dst)
Compute the dot product of the q15 complex vector.
Definition: hpm_math.h:2413
static void hpm_dsp_cdprod_typ2_q31(const q31_t *src1, const q31_t *src2, uint32_t size, q63_t *rout, q63_t *iout)
Compute the dot product type2 of the q31 complex vector.
Definition: hpm_math.h:2467
static void hpm_dsp_cmag_sqr_q15(const q15_t *src, q15_t *dst, uint32_t size)
Compute the magnitude squared of the q15 complex vector.
Definition: hpm_math.h:2564
static void hpm_dsp_cmul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t size)
Multiply two q31 complex vector.
Definition: hpm_math.h:2645
static void hpm_dsp_cmag_sqr_q31(const q31_t *src, q31_t *dst, uint32_t size)
Compute the magnitude squared of the q31 complex vector.
Definition: hpm_math.h:2584
static void hpm_dsp_cdprod_typ2_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *rout, float32_t *iout)
Compute the dot product type2 of the floating-potint complex vector.
Definition: hpm_math.h:2392
static void hpm_dsp_cdprod_typ2_q15(const q15_t *src1, const q15_t *src2, uint32_t size, q31_t *rout, q31_t *iout)
Compute the dot product type2 of the q15 complex vector.
Definition: hpm_math.h:2431
static void hpm_dsp_cmul_real_q15(const q15_t *src, const q15_t *real, q15_t *dst, uint32_t size)
Multiply the q15 complex vector by a real vector.
Definition: hpm_math.h:2684
static q15_t hpm_dsp_pid_q15(riscv_dsp_pid_q15_t *instance, q15_t src)
Definition: hpm_math.h:2930
static void hpm_dsp_init_pid_q15(riscv_dsp_pid_q15_t *instance, int32_t set)
PID initializatopn control function of Q15 formats.
Definition: hpm_math.h:2948
static void hpm_dsp_inv_park_q31(q31_t a, q31_t b, q31_t *alpha, q31_t *beta, q31_t sin, q31_t cos)
Inverse Park transform of q31 input.
Definition: hpm_math.h:2859
static void hpm_dsp_park_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b, q31_t sin, q31_t cos)
Park transform of q31 input.
Definition: hpm_math.h:2824
static void hpm_dsp_init_pid_f32(riscv_dsp_pid_f32_t *instance, int32_t set)
PID initializatopn control function of floating-point formats.
Definition: hpm_math.h:2890
static void hpm_dsp_park_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b, float32_t sin, float32_t cos)
Park transform of floating-point input.
Definition: hpm_math.h:2805
static void hpm_dsp_inv_clarke_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b)
Inverse Clarke transform of q31 input.
Definition: hpm_math.h:2788
static void hpm_dsp_init_pid_q31(riscv_dsp_pid_q31_t *instance, int32_t set)
PID initializatopn control function of Q31 formats.
Definition: hpm_math.h:2923
static void hpm_dsp_inv_clarke_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b)
Inverse Clarke transform of floating-point input.
Definition: hpm_math.h:2772
static q31_t hpm_dsp_pid_q31(riscv_dsp_pid_q31_t *instance, q31_t src)
PID control of Q31 input.
Definition: hpm_math.h:2904
static void hpm_dsp_clarke_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta)
Clarke transform of floating-point input.
Definition: hpm_math.h:2741
static void hpm_dsp_inv_park_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta, float32_t sin, float32_t cos)
Inverse Park transform of floating-point input.
Definition: hpm_math.h:2841
static float32_t hpm_dsp_pid_f32(riscv_dsp_pid_f32_t *instance, float32_t src)
PID control of floating-point input.
Definition: hpm_math.h:2872
static void hpm_dsp_clarke_q31(q31_t a, q31_t b, q31_t *alpha, q31_t *beta)
Clarke transform of q31 input.
Definition: hpm_math.h:2757
static float32_t hpm_dsp_dist_euclidean_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Euclidean distance between two vectors.
Definition: hpm_math.h:3091
static float32_t hpm_dsp_dist_city_block_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Cityblock (Manhattan) distance between two vectors.
Definition: hpm_math.h:3037
static float32_t hpm_dsp_bdist_sokal_sneath_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Sokal-Sneath distance between two vectors.
Definition: hpm_math.h:3236
static float32_t hpm_dsp_dist_bray_curtis_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Bray-Curtis distance between two vectors.
Definition: hpm_math.h:2983
static float32_t hpm_dsp_bdist_kulsinski_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Kulsinski distance between two vectors.
Definition: hpm_math.h:3200
static float32_t hpm_dsp_bdist_jaccard_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Jaccard distance between two vectors.
Definition: hpm_math.h:3182
static float32_t hpm_dsp_dist_canberra_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Canberra distance between two vectors.
Definition: hpm_math.h:3001
static float32_t hpm_dsp_bdist_hamming_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Hamming distance between two vectors.
Definition: hpm_math.h:3164
static float32_t hpm_dsp_dist_cos_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Cosine distance between two vectors.
Definition: hpm_math.h:3073
static float32_t hpm_dsp_dist_corr_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Correlation distance between two vectors.
Definition: hpm_math.h:3055
static float32_t hpm_dsp_dist_chebyshev_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Chebyshev distance between two vectors.
Definition: hpm_math.h:3019
static float32_t hpm_dsp_bdist_rogers_tanimoto_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Roger Stanimoto distance between two vectors.
Definition: hpm_math.h:3254
static float32_t hpm_dsp_bdist_yule_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Yule distance between two vectors.
Definition: hpm_math.h:3272
static float32_t hpm_dsp_bdist_russell_rao_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Russell-Rao distance between two vectors.
Definition: hpm_math.h:3290
static float32_t hpm_dsp_bdist_dice_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Dice distance between two vectors.
Definition: hpm_math.h:3146
static float32_t hpm_dsp_bdist_sokal_michener_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Sokal-Michener distance between two vectors.
Definition: hpm_math.h:3218
static float32_t hpm_dsp_dist_minkowski_f32(const float32_t *src1, const float32_t *src2, int32_t order, uint32_t size)
Minkowski distance between two vectors.
Definition: hpm_math.h:3128
static float32_t hpm_dsp_dist_jensen_shannon_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Jensen-Shannon distance between two vectors.
Definition: hpm_math.h:3109
#define FFA_DATA_TYPE_COMPLEX_Q31
Definition: hpm_ffa_drv.h:39
hpm_stat_t ffa_calculate_fft_blocking(FFA_Type *ptr, fft_xfer_t *fft_xfer)
Perform FFT transformation in blocking mode.
Definition: hpm_ffa_drv.c:118
#define FFA_DATA_TYPE_COMPLEX_Q15
Definition: hpm_ffa_drv.h:40
static void hpm_dsp_corr_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
Convolution of the q31 vectors.
Definition: hpm_math.h:3929
static void hpm_dsp_bq_df1_32x64_q31(const riscv_dsp_bq_df1_32x64_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3995
static void hpm_dsp_lfir_f32(const riscv_dsp_lfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Function for the floating-point lattice FIR filter.
Definition: hpm_math.h:3445
static void hpm_dsp_spafir_q15(riscv_dsp_spafir_q15_t *instance, q15_t *src, q15_t *dst, q15_t *buf1, q31_t *buf2, uint32_t size)
Definition: hpm_math.h:3539
static void hpm_dsp_corr_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
Correlation of the q7 vectors.
Definition: hpm_math.h:3955
static void hpm_dsp_dcmfir_q31(const riscv_dsp_dcmfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3497
static void hpm_dsp_nlms_q15(riscv_dsp_nlms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t size)
Definition: hpm_math.h:3649
static void hpm_dsp_liir_q15(const riscv_dsp_liir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:4038
static void hpm_dsp_bq_df1_f32(const riscv_dsp_bq_df1_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3965
static void hpm_dsp_spafir_f32(riscv_dsp_spafir_f32_t *instance, float32_t *src, float32_t *dst, float32_t *buf, uint32_t size)
Definition: hpm_math.h:3533
static void hpm_dsp_conv_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
Convolution of the floating-point vectors.
Definition: hpm_math.h:3667
static void hpm_dsp_spafir_q7(riscv_dsp_spafir_q7_t *instance, q7_t *src, q7_t *dst, q7_t *buf1, q31_t *buf2, uint32_t size)
Definition: hpm_math.h:3551
static void hpm_dsp_fir_fast_q15(const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Function for the q15 FIR filter.
Definition: hpm_math.h:3411
static void hpm_dsp_bq_df2T_f64(const riscv_dsp_bq_df2T_f64_t *instance, float64_t *src, float64_t *dst, uint32_t size)
Definition: hpm_math.h:4007
static void hpm_dsp_bq_df1_fast_q15(const riscv_dsp_bq_df1_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3977
static void hpm_dsp_dcmfir_fast_q15(const riscv_dsp_dcmfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3509
static void hpm_dsp_liir_fast_q31(const riscv_dsp_liir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:4032
static void hpm_dsp_lms_q31(const riscv_dsp_lms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t size)
Function for the q31 LMS filter.
Definition: hpm_math.h:3595
static void hpm_dsp_fir_f32(const riscv_dsp_fir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Function for the floating-point FIR filter.
Definition: hpm_math.h:3330
static void hpm_dsp_conv_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
Convolution of the q31 vectors.
Definition: hpm_math.h:3721
static void hpm_dsp_bq_df2T_f32(const riscv_dsp_bq_df2T_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:4001
static void hpm_dsp_lms_f32(const riscv_dsp_lms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t size)
Structure for the floatint-point standard LMS Filters.
Definition: hpm_math.h:3572
static void hpm_dsp_corr_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
Correlation of the floating-point vectors.
Definition: hpm_math.h:3873
static void hpm_dsp_fir_q15(const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Function for the q15 FIR filter.
Definition: hpm_math.h:3392
static void hpm_dsp_lfir_q15(const riscv_dsp_lfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Function for the q15 lattice FIR filter.
Definition: hpm_math.h:3460
static int32_t hpm_dsp_conv_partial_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the q15 vectors.
Definition: hpm_math.h:3798
static void hpm_dsp_dcmfir_f32(const riscv_dsp_dcmfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3485
static void hpm_dsp_nlms_f32(riscv_dsp_nlms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t size)
Structure for the f32 normalized LMS filter.
Definition: hpm_math.h:3629
static void hpm_dsp_upsplfir_q15(const riscv_dsp_upsplfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3521
static void hpm_dsp_conv_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
Convolution of the q15 vectors.
Definition: hpm_math.h:3693
static void hpm_dsp_upsplfir_q31(const riscv_dsp_upsplfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3527
static void hpm_dsp_dcmfir_q15(const riscv_dsp_dcmfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3491
static void hpm_dsp_dcmfir_fast_q31(const riscv_dsp_dcmfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3503
static void hpm_dsp_spafir_q31(riscv_dsp_spafir_q31_t *instance, q31_t *src, q31_t *dst, q31_t *buf, uint32_t size)
Definition: hpm_math.h:3545
static void hpm_dsp_fir_q31(const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Function for the q31 FIR filter.
Definition: hpm_math.h:3351
static void hpm_dsp_lfir_q31(const riscv_dsp_lfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Function for the q31 lattice FIR filter.
Definition: hpm_math.h:3479
static void hpm_dsp_upsplfir_f32(const riscv_dsp_upsplfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3515
static int32_t hpm_dsp_conv_partial_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the q31 vectors.
Definition: hpm_math.h:3824
static void hpm_dsp_liir_q31(const riscv_dsp_liir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:4026
static int32_t hpm_dsp_conv_partial_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the floating-point vectors.
Definition: hpm_math.h:3772
static void hpm_dsp_fir_fast_q31(const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Function for the q31 FIR filter.
Definition: hpm_math.h:3372
static void hpm_dsp_liir_f32(const riscv_dsp_liir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:4020
static void hpm_dsp_conv_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
Convolution of the q7 vectors.
Definition: hpm_math.h:3747
static void hpm_dsp_bq_df1_fast_q31(const riscv_dsp_bq_df1_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3989
static void hpm_dsp_bq_df1_q15(const riscv_dsp_bq_df1_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3971
static void hpm_dsp_corr_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
Correlation of the q15 vectors.
Definition: hpm_math.h:3899
static void hpm_dsp_liir_fast_q15(const riscv_dsp_liir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:4044
static int32_t hpm_dsp_conv_partial_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the q7 vectors.
Definition: hpm_math.h:3850
static void hpm_dsp_bq_df1_q31(const riscv_dsp_bq_df1_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3983
static void hpm_dsp_lms_q15(const riscv_dsp_lms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t size)
Function for the q15 LMS filter.
Definition: hpm_math.h:3618
static void hpm_dsp_bq_stereo_df2T_f32(const riscv_dsp_bq_stereo_df2T_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:4013
static void hpm_dsp_nlms_q31(riscv_dsp_nlms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t size)
Structure for the q31 normalized LMS filter.
Definition: hpm_math.h:3641
static void hpm_dsp_fir_q7(const riscv_dsp_fir_q7_t *instance, q7_t *src, q7_t *dst, uint32_t size)
Function for the q7 FIR filter.
Definition: hpm_math.h:3430
static void hpm_dsp_mat_sub_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
Substraction of two floating-potint matrices.
Definition: hpm_math.h:4539
static void hpm_dsp_mat_oprod_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t size1, uint32_t size2)
Outer production of two q31 matrices.
Definition: hpm_math.h:4723
static void hpm_dsp_mat_trans_q15(const q15_t *src, q15_t *dst, uint32_t row, uint32_t col)
Transpose the q15 matricex.
Definition: hpm_math.h:4634
static void hpm_dsp_mat_mul_mxv_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for f32 formats.
Definition: hpm_math.h:4757
static void hpm_dsp_mat_mul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two floating-point matrices.
Definition: hpm_math.h:4209
static void hpm_dsp_cmat_mul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two floating-point complex matrices.
Definition: hpm_math.h:4240
static int32_t hpm_dsp_mat_inv_f32(float32_t *src, float32_t *dst, uint32_t size)
Compute the inverse matrix of the floating-potint matrix.
Definition: hpm_math.h:4178
static void hpm_dsp_mat_mul_vxm_q7(const q7_t *src1, const q7_t *src2, q7_t *dst, uint32_t col, uint32_t col2)
Multiplication of q7 vetor by matrix.
Definition: hpm_math.h:4417
static void hpm_dsp_mat_add_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
Addition of two q31 matrices.
Definition: hpm_math.h:4159
static void hpm_dsp_mat_mul_f64(const float64_t *src1, const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Definition: hpm_math.h:4220
static void hpm_dsp_mat_trans_u8(const uint8_t *src, uint8_t *dst, uint32_t row, uint32_t col)
Transpose the u8 matricex.
Definition: hpm_math.h:4670
static void hpm_dsp_mat_scale_q15(const q15_t *src, q15_t scale_fract, int32_t shift, q15_t *dst, uint32_t row, uint32_t col)
Multiplt a scale value of q15 matrix.
Definition: hpm_math.h:4472
static void hpm_dsp_mat_mul_fast_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Definition: hpm_math.h:4341
static void hpm_dsp_mat_mul_mxv_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for q15 formats.
Definition: hpm_math.h:4777
static void hpm_dsp_mat_mul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q31 matrices.
Definition: hpm_math.h:4331
static void hpm_dsp_mat_trans_f64(const float64_t *src, float64_t *dst, uint32_t row, uint32_t col)
Transpose the double-precision floating-potint matrices.
Definition: hpm_math.h:4602
static void hpm_dsp_cmat_mul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q15 complex matrices.
Definition: hpm_math.h:4304
static void hpm_dsp_cmat_mul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q31 complex matrices.
Definition: hpm_math.h:4368
static void hpm_dsp_mat_trans_f32(const float32_t *src, float32_t *dst, uint32_t row, uint32_t col)
Transpose the floating-potint matricex.
Definition: hpm_math.h:4620
static void hpm_dsp_mat_mul_mxv_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for q31 formats.
Definition: hpm_math.h:4797
static void hpm_dsp_mat_mul_fast_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Definition: hpm_math.h:4277
static void hpm_dsp_mat_add_f64(const float64_t *src1, const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col)
Addition of two floating-potint matrices.
Definition: hpm_math.h:4117
static void hpm_dsp_mat_trans_q31(const q31_t *src, q31_t *dst, uint32_t row, uint32_t col)
Transpose the q31 matricex.
Definition: hpm_math.h:4652
static void hpm_dsp_mat_sub_f64(const float64_t *src1, const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col)
Substraction of two double-precision floating-potint matrices.
Definition: hpm_math.h:4519
static void hpm_dsp_mat_add_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
Addition of two floating-potint matrices.
Definition: hpm_math.h:4098
static void hpm_dsp_mat_add_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
Addition of two q15 matrices.
Definition: hpm_math.h:4138
static void hpm_dsp_mat_scale_f32(const float32_t *src, float32_t scale, float32_t *dst, uint32_t row, uint32_t col)
Multiplt a scale value of floating-potint matrix.
Definition: hpm_math.h:4447
static void hpm_dsp_mat_mul_mxv_q7(const q7_t *src1, const q7_t *src2, q7_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for q7 formats.
Definition: hpm_math.h:4817
static void hpm_dsp_mat_mul_q7(const q7_t *src1, const q7_t *src2, q7_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q7 matrices.
Definition: hpm_math.h:4395
static int32_t hpm_dsp_mat_pwr2_cache_f64(const float64_t *src, float64_t *dst, uint32_t size)
Definition: hpm_math.h:4431
static void hpm_dsp_mat_mul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q15 matrices.
Definition: hpm_math.h:4267
static void hpm_dsp_mat_scale_q31(const q31_t *src, q31_t scale_fract, int32_t shift, q31_t *dst, uint32_t row, uint32_t col)
Multiplt a scale value of q31 matrix.
Definition: hpm_math.h:4497
static void hpm_dsp_mat_trans_q7(const q7_t *src, q7_t *dst, uint32_t row, uint32_t col)
Transpose the q7 matrices.
Definition: hpm_math.h:4685
static void hpm_dsp_mat_sub_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
Substraction of two q31 matrices.
Definition: hpm_math.h:4581
static int32_t hpm_dsp_mat_inv_f64(float64_t *src, float64_t *dst, uint32_t size)
Definition: hpm_math.h:4188
static void hpm_dsp_mat_sub_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
Substraction of two q15 matrices.
Definition: hpm_math.h:4560
static void hpm_nn_activate_s16(q15_t *in_out, uint32_t size, uint16_t int_bits, riscv_nn_activation_fun act_fun)
This function uses sigmoid or tanh function to perform activation for signed 16-bit integer input vec...
Definition: hpm_math.h:6903
static void size
Definition: hpm_math.h:6938
static void hpm_nn_leaky_relu_s8(q7_t *in_out, uint32_t size, q15_t slope) riscv_nn_leaky_relu_s8(in_out
This function uses the leaky ReLU function to perform activation for signed 8-bit integer input vecto...
static void hpm_nn_relu_s16(q15_t *in_out, uint32_t size)
This function uses the ReLU function to perform activation for signed 16-bit integer input vectors.
Definition: hpm_math.h:6989
static void hpm_nn_activate_s8(q7_t *in_out, uint32_t size, uint16_t int_bits, riscv_nn_activation_fun act_fun)
This function uses the sigmoid or tanh function to perform activation for signed 8-bit integer input ...
Definition: hpm_math.h:6876
static void slope
Definition: hpm_math.h:6938
static void hpm_nn_relu_any_s8(q7_t *data, uint16_t size, q7_t max_val)
This function uses the ReLU function to perform activation for signed 8-bit integer input vectors.
Definition: hpm_math.h:6949
static void hpm_nn_relu_s8(q7_t *in_out, uint32_t size)
This function uses the ReLU function to perform activation for signed 8-bit integer input vectors.
Definition: hpm_math.h:6974
static void hpm_nn_add_s8_sym(const q7_t *in_tensor1, const q7_t *in_tensor2, const int16_t *scale1, const int16_t *scale2, const uint32_t size, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out)
This function performs element-wise addition for signed 8-bit integer input vectors with two-stage sh...
Definition: hpm_math.h:7097
static int hpm_nn_ew_add_s8_asym(const int8_t *in_tensor1, const int8_t *in_tensor2, const int32_t in_offset1, const int32_t in_scale1, const int32_t in_rshift1, const int32_t in_offset2, const int32_t in_scale2, const int32_t in_rshift2, const int32_t lshift, int8_t *out, const int32_t out_offset, const int32_t out_scale, const int32_t out_rshift, const int32_t act_min, const int32_t act_max, const uint32_t size)
This function performs element-wise addition for signed 8-bit integer input vectors.
Definition: hpm_math.h:7205
static int hpm_nn_ew_mul_s8_asym(const int8_t *in_tensor1, const int8_t *in_tensor2, const int32_t in_offset1, const int32_t in_offset2, int8_t *out, const int32_t out_offset, const int32_t out_scale, const int32_t out_shift, const int32_t act_min, const int32_t act_max, const uint32_t size)
This function performs element-wise multiplication for signed 8-bit integer input vectors.
Definition: hpm_math.h:7274
static void hpm_nn_add_s8_sym_round(const q7_t *in_tensor1, const q7_t *in_tensor2, const uint32_t scale1, const uint32_t scale2, const uint32_t size, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out)
This function performs element-wise addition for signed 8-bit integer input vectors with two-stage sh...
Definition: hpm_math.h:7135
static void hpm_nn_concate_s8_z(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint16_t out_tensor_z, const uint32_t out_offset_z)
This function concatenates the int8_t/uint8_t input tensor along the z-axis with the output tensor.
Definition: hpm_math.h:7534
static void hpm_nn_concate_s8_x(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint16_t out_tensor_x, const uint32_t out_offset_x)
This function concatenates the int8_t/uint8_t input tensor along the x-axis with the output tensor.
Definition: hpm_math.h:7464
static void hpm_nn_concate_s8_y(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint16_t out_tensor_y, const uint32_t out_offset_y)
This function concatenates the int8_t/uint8_t input tensor along the y-axis with the output tensor.
Definition: hpm_math.h:7499
static void hpm_nn_concate_s8_w(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint32_t out_offset_w)
This function concatenates the int8_t/uint8_t input tensor along the w-axis with the output tensor.
Definition: hpm_math.h:7429
static int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with bi...
Definition: hpm_math.h:9669
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:12104
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:12032
static int32_t hpm_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(const uint8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint8_t *ker_weight, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const int16_t ch_mult, const int16_t pad_x, const int16_t pad_y, const int16_t stride_x, const int16_t stride_y, const int16_t dilation_x, const int16_t dilation_y, const int32_t *bias, const int32_t in_offset, const int32_t ker_offset, const int32_t out_offset, uint8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, const int32_t act_min, const int32_t act_max, const int32_t out_shift, const int32_t out_scale)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:13291
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs with bias inputs and ...
Definition: hpm_math.h:10079
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs with symmetric...
Definition: hpm_math.h:11781
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:9103
static int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 8-bit int...
Definition: hpm_math.h:9342
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:10315
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer depthwise convolution with shift-based quantization on th...
Definition: hpm_math.h:8522
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:11895
static int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs/outputs in any x and...
Definition: hpm_math.h:9262
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:10956
static int32_t hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:12837
static int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 8-bit int...
Definition: hpm_math.h:8941
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint16_t in_tensor_group, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit interger inputs/outputs in any x and ...
Definition: hpm_math.h:12685
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:12250
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:8694
static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast(const q15_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q15_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q15_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 16-bit integer convolution with shift-based quantization on the ou...
Definition: hpm_math.h:8340
static int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 16-bit in...
Definition: hpm_math.h:9422
static int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 8-...
Definition: hpm_math.h:9963
static int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 16...
Definition: hpm_math.h:10021
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:11550
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:11432
static int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 16...
Definition: hpm_math.h:9730
static int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:9183
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs in any x and y d...
Definition: hpm_math.h:11959
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer convolution with shift-based quantization on the outputs.
Definition: hpm_math.h:7899
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs with symmetric quan...
Definition: hpm_math.h:10487
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ch_mult, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t dilation_x, const uint16_t dilation_y, q15_t *tmp_buf)
This function performs depthwise convolution for signed 8-bit interger inputs/outputs in any x and y ...
Definition: hpm_math.h:13086
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer convolution for RGB images with shift-based quantization ...
Definition: hpm_math.h:7741
static int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs/outputs with ...
Definition: hpm_math.h:9905
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:10544
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:12392
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:10601
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 8-bit integer convolution in any x and y dimensions with shift-bas...
Definition: hpm_math.h:8172
static int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs and signed 16-b...
Definition: hpm_math.h:9847
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:11307
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:10256
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:11838
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs with bias inpu...
Definition: hpm_math.h:11491
static int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:12948
static int32_t hpm_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(const int8_t *in_tensor, const int32_t in_tensor_dim_x, const int32_t in_tensor_dim_y, const int32_t in_tensor_ch, const int8_t *ker_weight, const int32_t out_tensor_ch, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, int8_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_tensor_dim_x, const int32_t out_tensor_dim_y, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const int32_t dilation_x, const int32_t dilation_y, int16_t *tmp_buf)
This function performs depthwise 3x3 kernels convolution for signed 8-bit integer inputs/outputs in a...
Definition: hpm_math.h:13000
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs with bias inputs...
Definition: hpm_math.h:11373
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs in any x and y dime...
Definition: hpm_math.h:10810
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:10138
static void hpm_nn_conv_HWC_s8_s8_s8_sft_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer convolution in any x and y dimensions with shift-based qu...
Definition: hpm_math.h:7989
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs in any x and y dimens...
Definition: hpm_math.h:10665
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:12737
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs with bias inputs an...
Definition: hpm_math.h:10197
static int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:8778
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:12462
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:12177
static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(const q15_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q15_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q15_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 16-bit integer convolution in any x and y dimensions with shift-ba...
Definition: hpm_math.h:8435
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:10430
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:12533
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with sy...
Definition: hpm_math.h:9789
static int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 16-bit in...
Definition: hpm_math.h:9023
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast signed 8-bit integer convolution for RGB images with shift-based quantiza...
Definition: hpm_math.h:7820
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:11724
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer depthwise convolution in any x and y dimensions with shif...
Definition: hpm_math.h:8613
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs in any x and y dime...
Definition: hpm_math.h:11167
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:11237
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:13238
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t dilation_x, const uint16_t dilation_y, q15_t *in_tmp_buf)
This function performs fast depthwise convolution for signed 8-bit integer inputs/outputs in any x an...
Definition: hpm_math.h:13181
static int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs/outputs in any x and...
Definition: hpm_math.h:8859
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 8-bit integer convolution with shift-based quantization on the out...
Definition: hpm_math.h:8077
static int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs and signed 16-b...
Definition: hpm_math.h:9549
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs with symmetric q...
Definition: hpm_math.h:11667
static int hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_ch, const uint16_t in_tensor_group, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t pad_x, const uint16_t stride_x, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t out_tensor_dim_x, q15_t *in_tmp_buf)
This function performs 1xn kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:12786
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:11609
static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias(const q15_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q15_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q15_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 16-bit integer convolution with shift-based quantization on the outputs...
Definition: hpm_math.h:8259
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:10883
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs in any x and y d...
Definition: hpm_math.h:12321
static int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs/outputs with ...
Definition: hpm_math.h:9609
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:12604
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:11097
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with bi...
Definition: hpm_math.h:9488
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs in any x and y dimens...
Definition: hpm_math.h:11027
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:7654
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs with symmetric quanti...
Definition: hpm_math.h:10373
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:10738
static int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint16_t in_tensor_group, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs convolution for signed 8-bit integer inputs/outputs in any x and y dimensions ...
Definition: hpm_math.h:12891
static int32_t hpm_nn_fc_s16_s16_s16_sft_bias(const q15_t *in_vec, const q15_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q15_t *bias, q15_t *out_vec, q15_t *tmp_buf)
This is a fully connected layer function for signed 16-bit integer inputs with shift-based quantizati...
Definition: hpm_math.h:14179
static int32_t hpm_nn_fc_u8_s16_s8_sym_bias(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:14529
static int32_t hpm_nn_fc_u8_s8_s8_sym_bias_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:14925
static int32_t out_vec
Definition: hpm_math.h:14118
static int32_t in_tmp_buf
Definition: hpm_math.h:14119
static int32_t hpm_nn_fc_s8_s16_s8_sym(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:14616
static int32_t hpm_nn_fc_s8_s16_s8_sym_bias(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:14391
static int32_t hpm_nn_fc_u8_s16_s8_sym_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:15196
static int32_t bias
Definition: hpm_math.h:14118
static int32_t hpm_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(const q15_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q15_t *out_vec, q15_t *tmp_buf)
This function multiplies a signed 16-bit integer input vector by a signed 8-bit integer weight matrix...
Definition: hpm_math.h:14300
static int32_t hpm_nn_fc_mat_vec_s16_s16_s8_sft_bias(const q15_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q15_t *out_vec, q15_t *tmp_buf)
This function multiplies a signed 16-bit integer input vector by a signed 8-bit integer weight matrix...
Definition: hpm_math.h:14258
static void hpm_nn_fc_mat_vec_s8_wt_converter(const q7_t *wt_mat, const uint32_t size, const uint32_t wt_row_num, q7_t *wt_mat_out)
This is a weight converter for riscv_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast.
Definition: hpm_math.h:15270
static int32_t hpm_nn_fc_s8_s16_s8_sym_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:15062
static int32_t hpm_nn_fc_s8_s8_s8_asym_bias(const int8_t *in_vec, const int8_t *wt_mat, const uint16_t in_vec_col, const uint16_t wt_mat_row, const uint16_t in_vec_group, const int32_t in_offset, const int32_t wt_offset, const int32_t out_scale, const int32_t out_shift, const int32_t out_offset, const int32_t *bias, int8_t *out_vec, const int32_t act_min, const int32_t act_max, q15_t *tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs with bias inputs and asymmet...
Definition: hpm_math.h:15312
static int32_t hpm_nn_fc_s8_s8_s8_sym(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with symmetric quant...
Definition: hpm_math.h:14574
static void hpm_nn_fc_s8_wt_converter(const q7_t *wt_mat, const uint32_t size, const uint32_t wt_row_num, q7_t *wt_mat_out)
This is a weight converter for those fully-connected functions with signed 8-bit weight data and name...
Definition: hpm_math.h:15227
static int32_t hpm_nn_fc_u8_s16_s8_sym_bias_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:14972
static int32_t hpm_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(const uint16_t in_vec_col)
This function is used to get the needed size, in bytes, by the temporary buffer of riscv_nn_fc_s8_s8_...
Definition: hpm_math.h:15351
static int32_t out_rshift
Definition: hpm_math.h:14118
static int32_t hpm_nn_fc_u8_u8_s8_sym_bias_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with bias inputs,...
Definition: hpm_math.h:14878
static int32_t hpm_nn_fc_s8_s8_s8_sym_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with interleaved mul...
Definition: hpm_math.h:15017
static int32_t hpm_nn_fc_s8_s8_s8_sft_bias_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs with interleaved multiplicat...
Definition: hpm_math.h:14144
static int32_t hpm_nn_fc_u8_s8_s8_sym_bias(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:14483
static int32_t hpm_nn_fc_s8_s8_s8_sft_bias(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q7_t *out_vec, q15_t *in_tmp_buf) return riscv_nn_fc_s8_s8_s8_sft_bias(in_vec
This is a fully connected layer function for signed 8-bit integer inputs with shift-based quantizatio...
static int32_t hpm_nn_fc_s8_s8_s8_sym_bias_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with bias inputs,...
Definition: hpm_math.h:14785
static int32_t wt_row_num
Definition: hpm_math.h:14117
static int32_t hpm_nn_fc_s8_s8_s8_sym_bias(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with bias inputs and...
Definition: hpm_math.h:14345
static int32_t hpm_nn_fc_u8_u8_s8_sym_bias(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with bias inputs a...
Definition: hpm_math.h:14437
static int32_t bias_lshift
Definition: hpm_math.h:14118
static void hpm_nn_fc_s16_wt_converter(const q15_t *wt_mat, const uint32_t size, const uint32_t wt_row_num, q15_t *wt_mat_out)
This is a weight converter for those fully-connected functions with signed 16-bit weight data and nam...
Definition: hpm_math.h:15249
static int32_t wt_mat
Definition: hpm_math.h:14117
static int32_t hpm_nn_fc_u8_s16_s8_sym(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:14742
static int32_t hpm_nn_fc_u8_u8_s8_sym(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with symmetric qua...
Definition: hpm_math.h:14658
static int32_t hpm_nn_fc_u8_s8_s8_sym_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:15151
static int32_t hpm_nn_fc_u8_u8_s8_sym_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with interleaved m...
Definition: hpm_math.h:15106
static int32_t hpm_nn_fc_s8_s16_s8_sym_bias_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:14832
static int32_t hpm_nn_fc_s16_s16_s16_sft_bias_fast(const q15_t *in_vec, const q15_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q15_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 16-bit integer inputs with interleaved multiplica...
Definition: hpm_math.h:14222
static int32_t hpm_nn_fc_u8_s8_s8_sym(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:14700
static int32_t hpm_nn_avepool_HWC_s8_any_act(const int in_tensor_dim_y, const int in_tensor_dim_x, const int out_tensor_dim_y, const int out_tensor_dim_x, const int stride_y, const int stride_x, const int ker_dim_y, const int ker_dim_x, const int pad_y, const int pad_x, const int act_min, const int act_max, const int in_tensor_ch, int8_t *in_tensor, int16_t *in_tmp_buf, int8_t *out_tensor)
This is an average pooling function for S8 inputs with any x and y dimension with the actvating param...
Definition: hpm_math.h:15627
static void hpm_nn_maxpool_HWC_s8(q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t out_tensor_dim, q7_t *in_tmp_buf, q7_t *out_tensor)
This is a max pooling function for signed 8-bit integer inputs.
Definition: hpm_math.h:15709
static int32_t hpm_nn_maxpool_HWC_s8_any_act(const uint16_t in_tensor_dim_y, const uint16_t in_tensor_dim_x, const uint16_t out_tensor_dim_y, const uint16_t out_tensor_dim_x, const uint16_t stride_y, const uint16_t stride_x, const uint16_t ker_dim_y, const uint16_t ker_dim_x, const uint16_t pad_y, const uint16_t pad_x, const int8_t act_min, const int8_t act_max, const uint16_t in_tensor_ch, int8_t *in_tensor, int16_t *tmp_buffer, int8_t *out_tensor)
This is a max pooling function for signed 8-bit integer inputs in any x and y dimensions with the act...
Definition: hpm_math.h:15754
static void hpm_nn_avepool_HWC_s8(q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t out_tensor_dim, q7_t *in_tmp_buf, q7_t *out_tensor)
This is an average pooling function for signed 8-bit integer inputs.
Definition: hpm_math.h:15505
static int32_t hpm_nn_avepool_HWC_s8_any_act_get_buffer_size(const int out_tensor_dim_x, const int in_tensor_ch)
This function is used to obtain the required size, in bytes, for the input temporary buffer of riscv_...
Definition: hpm_math.h:15669
static void hpm_nn_avepool_HWC_s8_any(q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q7_t *in_tmp_buf, q7_t *out_tensor, const uint16_t out_lshift)
This is an average pooling function for signed 8-bit integer inputs in any x and y dimensions.
Definition: hpm_math.h:15570
static void hpm_nn_softmax_s8_hp(const int8_t *in_tensor, const int32_t in_tensor_row, const int32_t in_tensor_col, const int32_t scale, const int32_t lshift, const int32_t diff_min, int8_t *out_tensor)
This is a softmax function for signed 8-bit integer input tensor with high precision algorithm.
Definition: hpm_math.h:15956
static void hpm_nn_softmax_u8_hp(const uint8_t *in_tensor, const int32_t in_tensor_row, const int32_t in_tensor_col, const int32_t scale, const int32_t lshift, const int32_t diff_min, uint8_t *out_tensor)
This is a softmax function for unsigned 8-bit integer input tensor with high precision algorithm.
Definition: hpm_math.h:15987
static void hpm_nn_softmax_s8_fast(const q7_t *in_vec, const uint16_t size, q7_t *out_vec)
This is a softmax function for signed 8-bit integer input vectors.
Definition: hpm_math.h:15913
static void hpm_nn_softmax_s16_fast(const q15_t *in_vec, const uint16_t size, q15_t *out_vec)
This is a softmax function for signed 16-bit integer input vectors.
Definition: hpm_math.h:15931
static int32_t hpm_nn_top_k_s8(q7_t *in_vec, uint32_t size, uint32_t k, q7_t *val, uint32_t *idx)
This function finds the k largest values and their indices from the signed 8-bit integer input vector...
Definition: hpm_math.h:16134
static void hpm_nn_reshape_s8(const int8_t *in_tensor, int8_t *out_tensor, const uint32_t size)
This function turns the input tensor into another tensor with the same data but in a different shape.
Definition: hpm_math.h:16104
static void hpm_dsp_sort_merge_init_f32(riscv_dsp_sort_merge_f32_t *instance, riscv_dsp_sort_order order, float32_t *buf)
Definition: hpm_math.h:6586
__STATIC_FORCEINLINE int32_t hpm_nn_read_s8x4_ia(const int8_t **in_s8)
Read 4 s8 from s8 pointer and post increment pointer.
Definition: hpm_math.h:6782
static void hpm_dsp_sort_f32(const riscv_dsp_sort_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Generic sorting function.
Definition: hpm_math.h:6564
#define Q31_MIN
Definition: hpm_math.h:6646
#define RIGHT_SHIFT(_shift)
Definition: hpm_math.h:6644
#define LEFT_SHIFT(_shift)
Definition: hpm_math.h:6643
__STATIC_FORCEINLINE void hpm_nn_q7_to_q15_with_offset(const int8_t *src, int16_t *dst, int32_t block_size, int16_t offset)
Definition: hpm_math.h:6792
__STATIC_FORCEINLINE q31_t hpm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift)
Definition: hpm_math.h:6726
__STATIC_FORCEINLINE q31_t hpm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
Rounding divide by power of two.
Definition: hpm_math.h:6707
#define Q31_MAX
Definition: hpm_math.h:6645
static void hpm_dsp_sort_merge_f32(const riscv_dsp_sort_merge_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Merge sort.
Definition: hpm_math.h:6626
__STATIC_FORCEINLINE const q7_t * read_and_pad(const q7_t *source, q31_t *out1, q31_t *out2)
read and expand one q7 word into two q15 words
Definition: hpm_math.h:6765
__STATIC_FORCEINLINE const q7_t * read_and_pad_reordered(const q7_t *source, q31_t *out1, q31_t *out2)
read and expand one q7 word into two q15 words with reordering
Definition: hpm_math.h:6751
__STATIC_FORCEINLINE q31_t hpm_nn_read_q7x4_ia(const q7_t **in_q7)
Read 4 q7 from q7 pointer and post increment pointer.
Definition: hpm_math.h:6737
static void write_q15x2_ia(q15_t **pQ15, q31_t value)
Definition: hpm_math.h:6648
static void hpm_dsp_sort_init_f32(riscv_dsp_sort_f32_t *instance, riscv_dsp_sort_alg alg, riscv_dsp_sort_order order)
Definition: hpm_math.h:6517
__STATIC_FORCEINLINE q31_t hpm_nn_sat_doubling_high_mult(const q31_t m1, const q31_t m2)
Saturating doubling high multiply. Result matches NEON instruction VQRDMULH.
Definition: hpm_math.h:6682
__STATIC_FORCEINLINE q31_t hpm_nn_read_q15x2_ia(const q15_t **in_q15)
Read 2 q15 elements and post increment pointer.
Definition: hpm_math.h:6664
static float32_t hpm_dsp_std_f32(const float32_t *src, uint32_t size)
Standard deviation of the floating-potint vector.
Definition: hpm_math.h:565
static uint32_t hpm_dsp_gaussian_naive_bayes_est_f32(const riscv_dsp_gaussian_naivebayes_f32_t *instance, const float32_t *src, float32_t *buf)
Naive Gaussian Bayesian Estimator.
Definition: hpm_math.h:810
static float32_t hpm_dsp_var_f32(const float32_t *src, uint32_t size)
Variance of the floating-potint vector.
Definition: hpm_math.h:656
static q15_t hpm_dsp_max_q15(const q15_t *src, uint32_t size, uint32_t *index)
Maximum value of the q15 vector.
Definition: hpm_math.h:120
static q63_t hpm_dsp_pwr_q15(const q15_t *src, uint32_t size)
Sum of the squares of the q15 vector.
Definition: hpm_math.h:422
static q7_t hpm_dsp_mean_q7(const q7_t *src, uint32_t size)
Mean value of the q7 vector.
Definition: hpm_math.h:361
static q15_t hpm_dsp_absmin_q15(const q15_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the q15 vector.
Definition: hpm_math.h:908
static uint8_t hpm_dsp_max_u8(const uint8_t *src, uint32_t size, uint32_t *index)
Max value of the u8 vector.
Definition: hpm_math.h:180
static q7_t hpm_dsp_absmin_q7(const q7_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the q7 vector.
Definition: hpm_math.h:922
static uint8_t hpm_dsp_min_u8(const uint8_t *src, uint32_t size, uint32_t *index)
Minimum value of the u8 vector.
Definition: hpm_math.h:275
static q7_t hpm_dsp_max_q7(const q7_t *src, uint32_t size, uint32_t *index)
Maximum value of the q7 vector.
Definition: hpm_math.h:160
static float32_t hpm_dsp_absmin_f32(const float32_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the floating-potint vector.
Definition: hpm_math.h:880
static q31_t hpm_dsp_rms_q31(const q31_t *src, uint32_t size)
RMS of the q31 vector.
Definition: hpm_math.h:545
static q7_t hpm_dsp_min_q7(const q7_t *src, uint32_t size, uint32_t *index)
Minimum value of the q7 vector.
Definition: hpm_math.h:255
static q63_t hpm_dsp_var_q31(const q31_t *src, uint32_t size)
Variance of the q31 vector.
Definition: hpm_math.h:708
static q31_t hpm_dsp_max_q31(const q31_t *src, uint32_t size, uint32_t *index)
Maximum value of the q31 vector.
Definition: hpm_math.h:140
static q31_t hpm_dsp_mean_q31(const q31_t *src, uint32_t size)
Mean value of the q31 vector.
Definition: hpm_math.h:337
static q31_t hpm_dsp_absmax_q31(const q31_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the q31 vector.
Definition: hpm_math.h:852
static float32_t hpm_dsp_entropy_f32(const float32_t *src, uint32_t size)
Entropy of the floating-potint vector.
Definition: hpm_math.h:729
static float32_t hpm_dsp_mean_f32(const float32_t *src, uint32_t size)
Mean value of the floating-potint vector.
Definition: hpm_math.h:289
static float32_t hpm_dsp_rms_f32(const float32_t *src, uint32_t size)
RMS of the floating-potint vector.
Definition: hpm_math.h:493
static float32_t hpm_dsp_lse_f32(const float32_t *src, uint32_t size)
Log-Sum-Exp of the floating-potint vector.
Definition: hpm_math.h:770
static q31_t hpm_dsp_pwr_q7(const q7_t *src, uint32_t size)
Sum of the squares of the q7 vector.
Definition: hpm_math.h:473
static q31_t hpm_dsp_absmin_q31(const q31_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the q31 vector.
Definition: hpm_math.h:894
static float32_t hpm_dsp_max_f32(const float32_t *src, uint32_t size, uint32_t *index)
Maximum value of the floating-potint vector.
Definition: hpm_math.h:95
static float32_t hpm_dsp_absmax_f32(const float32_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the floating-potint vector.
Definition: hpm_math.h:824
static q31_t hpm_dsp_min_q31(const q31_t *src, uint32_t size, uint32_t *index)
Minimum value of the q31 vector.
Definition: hpm_math.h:235
static q15_t hpm_dsp_std_u8(const uint8_t *src, uint32_t size)
Standard deviation of the u8 vector.
Definition: hpm_math.h:642
static q15_t hpm_dsp_min_q15(const q15_t *src, uint32_t size, uint32_t *index)
Minimum value of the q15 vector.
Definition: hpm_math.h:215
static q15_t hpm_dsp_rms_q15(const q15_t *src, uint32_t size)
RMS of the q15 vector.
Definition: hpm_math.h:519
static float32_t hpm_dsp_min_f32(const float32_t *src, uint32_t size, uint32_t *index)
Minimum value of the floating-potint vector.
Definition: hpm_math.h:195
static q63_t hpm_dsp_pwr_q31(const q31_t *src, uint32_t size)
Sum of the squares of the q31 vector.
Definition: hpm_math.h:448
static float32_t hpm_dsp_max_val_f32(const float32_t *src, uint32_t size)
Definition: hpm_math.h:107
static q31_t hpm_dsp_var_q15(const q15_t *src, uint32_t size)
Variance of the q15 vector.
Definition: hpm_math.h:682
static q7_t hpm_dsp_absmax_q7(const q7_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the q7 vector.
Definition: hpm_math.h:866
static q15_t hpm_dsp_mean_q15(const q15_t *src, uint32_t size)
Mean value of the q15 vector.
Definition: hpm_math.h:313
static q15_t hpm_dsp_std_q15(const q15_t *src, uint32_t size)
Standard deviation of the q15 vector.
Definition: hpm_math.h:591
static float32_t hpm_dsp_lse_dprod_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *buffer)
Dot product with Log-Sum-Exp of the floating-potint vector.
Definition: hpm_math.h:790
static q15_t hpm_dsp_absmax_q15(const q15_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the q15 vector.
Definition: hpm_math.h:838
static uint8_t hpm_dsp_mean_u8(const uint8_t *src, uint32_t size)
Mean value of the u8 vector.
Definition: hpm_math.h:383
static q31_t hpm_dsp_std_q31(const q31_t *src, uint32_t size)
Standard deviation of the q31 vector.
Definition: hpm_math.h:617
static float32_t hpm_dsp_relative_entropy_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Relative Entropy of the floating-potint vector.
Definition: hpm_math.h:752
static float32_t hpm_dsp_pwr_f32(const float32_t *src, uint32_t size)
Sum of the squares of the floating-potint vector.
Definition: hpm_math.h:397
static void hpm_dsp_svm_linear_est_f32(const riscv_dsp_svm_linear_f32_t *instance, const float32_t *src, int32_t *result)
SVM linear prediction.
Definition: hpm_math.h:4857
static void hpm_dsp_svm_rbf_est_f32(const riscv_dsp_svm_rbf_f32_t *instance, const float32_t *src, int32_t *result)
SVM rbf prediction.
Definition: hpm_math.h:4885
static void hpm_dsp_svm_poly_est_f32(const riscv_dsp_svm_poly_f32_t *instance, const float32_t *src, int32_t *result)
SVM polynomial prediction.
Definition: hpm_math.h:4899
static void hpm_dsp_svm_sigmoid_est_f32(const riscv_dsp_svm_sigmoid_f32_t *instance, const float32_t *src, int32_t *result)
SVM Sigmoid prediction.
Definition: hpm_math.h:4871
static void hpm_dsp_dup_f32(float32_t *src, float32_t *dst, uint32_t size)
Duplicate the floating vector.
Definition: hpm_math.h:6264
static void hpm_dsp_set_f32(float32_t val, float32_t *dst, uint32_t size)
Set the floating-point vector.
Definition: hpm_math.h:6317
static float32_t hpm_dsp_atan2_f32(float32_t srcy, float32_t src2)
Definition: hpm_math.h:6036
static void hpm_dsp_convert_q31_q15(q31_t *src, q15_t *dst, uint32_t size)
Convert a Q31 vector to Q15.
Definition: hpm_math.h:6198
static void hpm_dsp_set_q15(q15_t val, q15_t *dst, uint32_t size)
Set the Q15 vector.
Definition: hpm_math.h:6330
static float32_t hpm_dsp_exp_f32(float32_t src)
Calculate exponential value of f32 vector.
Definition: hpm_math.h:6399
static float32_t hpm_dsp_sin_f32(float32_t src)
Definition: hpm_math.h:5984
static float32_t hpm_dsp_sigmoid_f32(float32_t src)
Calculate sigmoid value of f32 vector.
Definition: hpm_math.h:6425
static void hpm_dsp_convert_q31_f32(q31_t *src, float32_t *dst, uint32_t size)
Convert a Q31 vector to floating.
Definition: hpm_math.h:6181
static q15_t hpm_dsp_atan_q15(q15_t src)
Definition: hpm_math.h:6030
static q31_t hpm_dsp_sin_q31(q31_t src)
Definition: hpm_math.h:6004
static void hpm_dsp_convert_f32_q31(float32_t *src, q31_t *dst, uint32_t size)
Convert a floating-point vector to Q31.
Definition: hpm_math.h:6112
static q31_t hpm_dsp_cos_q31(q31_t src)
Definition: hpm_math.h:5971
static q31_t hpm_dsp_atan2_q31(q31_t srcy, q31_t src2)
Definition: hpm_math.h:6048
static void hpm_dsp_dup_q31(q31_t *src, q31_t *dst, uint32_t size)
Duplicate the Q31 vector.
Definition: hpm_math.h:6290
static void hpm_dsp_convert_q15_q7(q15_t *src, q7_t *dst, uint32_t size)
Convert a Q15 vector to Q7.
Definition: hpm_math.h:6168
static void hpm_dsp_set_q31(q31_t val, q31_t *dst, uint32_t size)
Set the Q31 vector.
Definition: hpm_math.h:6343
static q15_t hpm_dsp_sin_q15(q15_t src)
Definition: hpm_math.h:6010
static void hpm_dsp_dup_q15(q15_t *src, q15_t *dst, uint32_t size)
Duplicate the Q15 vector.
Definition: hpm_math.h:6277
static void hpm_dsp_convert_q15_q31(q15_t *src, q31_t *dst, uint32_t size)
Convert a Q15 vector to Q31.
Definition: hpm_math.h:6155
static q15_t hpm_dsp_cos_q15(q15_t src)
Definition: hpm_math.h:5977
static float32_t hpm_dsp_cos_f32(float32_t src)
Definition: hpm_math.h:5965
static void hpm_dsp_barycenter_f32(const float32_t *src, const float32_t *weights, float32_t *out, uint32_t numofvec, uint32_t dimofvec)
Barycenter of the floating-potint type.
Definition: hpm_math.h:6387
static q15_t hpm_dsp_atan2_q15(q15_t srcy, q15_t src2)
Definition: hpm_math.h:6042
static void hpm_dsp_convert_q7_q15(q7_t *src, q15_t *dst, uint32_t size)
Convert a Q7 vector to Q15.
Definition: hpm_math.h:6237
static void hpm_dsp_convert_q31_q7(q31_t *src, q7_t *dst, uint32_t size)
Convert a Q31 vector to Q7.
Definition: hpm_math.h:6211
static void hpm_dsp_convert_f32_q15(float32_t *src, q15_t *dst, uint32_t size)
Convert a floating-point vector to Q15.
Definition: hpm_math.h:6099
static void hpm_dsp_set_q7(q7_t val, q7_t *dst, uint32_t size)
Set the Q7 vector.
Definition: hpm_math.h:6356
static q31_t hpm_dsp_atan_q31(q31_t src)
Definition: hpm_math.h:6024
static void hpm_dsp_convert_q7_q31(q7_t *src, q31_t *dst, uint32_t size)
Convert a Q7 vector to Q31.
Definition: hpm_math.h:6250
static void hpm_dsp_convert_q7_f32(q7_t *src, float32_t *dst, uint32_t size)
Convert a Q7 vector to floating.
Definition: hpm_math.h:6224
static float32_t hpm_dsp_log_f32(float32_t src)
Calculate the natural logarithm value of f32 vector.
Definition: hpm_math.h:6451
static void hpm_dsp_convert_q15_f32(q15_t *src, float32_t *dst, uint32_t size)
Convert a Q15 vector to floating.
Definition: hpm_math.h:6142
static float32_t hpm_dsp_weighted_sum_f32(const float32_t *src, const float32_t *weight, uint32_t size)
Weighted Sum of the floating-potint vector.
Definition: hpm_math.h:6371
static void hpm_dsp_convert_f32_q7(float32_t *src, q7_t *dst, uint32_t size)
Convert a floating-point vector to Q7.
Definition: hpm_math.h:6129
static q31_t hpm_dsp_sqrt_q31(q31_t src)
Square root of the q31 input.
Definition: hpm_math.h:6073
static float32_t hpm_dsp_sqrt_f32(float32_t src)
Square root of the floating-potint input.
Definition: hpm_math.h:6061
static void hpm_dsp_dup_q7(q7_t *src, q7_t *dst, uint32_t size)
Duplicate the Q7 vector.
Definition: hpm_math.h:6303
static q15_t hpm_dsp_sqrt_q15(q15_t src)
Square root of the q15 input.
Definition: hpm_math.h:6085
static float32_t hpm_dsp_atan_f32(float32_t src)
Definition: hpm_math.h:6018
uint32_t hpm_math_sw_reverse_bit32_msb_to_lsb(uint32_t msb)
Reserve 32bit data msb to lsb.
uint8_t hpm_math_sw_reverse_bit8_msb_to_lsb(uint8_t msb)
Reserve 8bit data msb to lsb.
uint32_t hpm_math_sw_reverse_bit32_lsb_to_msb(uint32_t lsb)
Reserve 32bit data lsb to msb.
uint8_t hpm_math_sw_reverse_bit8_lsb_to_msb(uint8_t lsb)
Reserve 8bit data lsb to msb.
FFT transform context.
Definition: hpm_ffa_drv.h:75
void * dst
Definition: hpm_ffa_drv.h:81
uint8_t dst_data_type
Definition: hpm_ffa_drv.h:78
uint8_t src_data_type
Definition: hpm_ffa_drv.h:77
const void * src
Definition: hpm_ffa_drv.h:80
uint32_t num_points
Definition: hpm_ffa_drv.h:79
uint16_t is_ifft
Definition: hpm_ffa_drv.h:76